484
485 REX_B = 0x41,
486 REX_X = 0x42,
487 REX_XB = 0x43,
488 REX_R = 0x44,
489 REX_RB = 0x45,
490 REX_RX = 0x46,
491 REX_RXB = 0x47,
492
493 REX_W = 0x48,
494
495 REX_WB = 0x49,
496 REX_WX = 0x4A,
497 REX_WXB = 0x4B,
498 REX_WR = 0x4C,
499 REX_WRB = 0x4D,
500 REX_WRX = 0x4E,
501 REX_WRXB = 0x4F,
502
503 VEX_3bytes = 0xC4,
504 VEX_2bytes = 0xC5
505 };
506
507 enum VexPrefix {
508 VEX_B = 0x20,
509 VEX_X = 0x40,
510 VEX_R = 0x80,
511 VEX_W = 0x80
512 };
513
514 enum VexSimdPrefix {
515 VEX_SIMD_NONE = 0x0,
516 VEX_SIMD_66 = 0x1,
517 VEX_SIMD_F3 = 0x2,
518 VEX_SIMD_F2 = 0x3
519 };
520
521 enum VexOpcode {
522 VEX_OPCODE_NONE = 0x0,
523 VEX_OPCODE_0F = 0x1,
524 VEX_OPCODE_0F_38 = 0x2,
525 VEX_OPCODE_0F_3A = 0x3
526 };
527
528 enum WhichOperand {
529 // input to locate_operand, and format code for relocations
530 imm_operand = 0, // embedded 32-bit|64-bit immediate operand
531 disp32_operand = 1, // embedded 32-bit displacement or address
532 call32_operand = 2, // embedded 32-bit self-relative displacement
533 #ifndef _LP64
534 _WhichOperand_limit = 3
535 #else
536 narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop
537 _WhichOperand_limit = 4
538 #endif
539 };
540
541
542
543 // NOTE: The general philopsophy of the declarations here is that 64bit versions
544 // of instructions are freely declared without the need for wrapping them an ifdef.
545 // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
546 // In the .cpp file the implementations are wrapped so that they are dropped out
547 // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
548 // to the size it was prior to merging up the 32bit and 64bit assemblers.
549 //
550 // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
551 // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
552
553 private:
554
555
556 // 64bit prefixes
557 int prefix_and_encode(int reg_enc, bool byteinst = false);
558 int prefixq_and_encode(int reg_enc);
559
560 int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false);
561 int prefixq_and_encode(int dst_enc, int src_enc);
562
563 void prefix(Register reg);
564 void prefix(Address adr);
565 void prefixq(Address adr);
566
567 void prefix(Address adr, Register reg, bool byteinst = false);
568 void prefix(Address adr, XMMRegister reg);
569 void prefixq(Address adr, Register reg);
570 void prefixq(Address adr, XMMRegister reg);
571
572 void prefetch_prefix(Address src);
573
574 void rex_prefix(Address adr, XMMRegister xreg,
575 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
576 int rex_prefix_and_encode(int dst_enc, int src_enc,
577 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
578
579 void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
580 int nds_enc, VexSimdPrefix pre, VexOpcode opc,
581 bool vector256);
582
583 void vex_prefix(Address adr, int nds_enc, int xreg_enc,
584 VexSimdPrefix pre, VexOpcode opc,
585 bool vex_w, bool vector256);
586
587 void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
588 VexSimdPrefix pre, bool vector256 = false) {
589 int dst_enc = dst->encoding();
590 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
591 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256);
592 }
593
594 void vex_prefix_0F38(Register dst, Register nds, Address src) {
595 bool vex_w = false;
596 bool vector256 = false;
597 vex_prefix(src, nds->encoding(), dst->encoding(),
598 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
599 }
600
601 void vex_prefix_0F38_q(Register dst, Register nds, Address src) {
602 bool vex_w = true;
603 bool vector256 = false;
604 vex_prefix(src, nds->encoding(), dst->encoding(),
605 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
606 }
607 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
608 VexSimdPrefix pre, VexOpcode opc,
609 bool vex_w, bool vector256);
610
611 int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) {
612 bool vex_w = false;
613 bool vector256 = false;
614 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
615 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
616 }
617 int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) {
618 bool vex_w = true;
619 bool vector256 = false;
620 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
621 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
622 }
623 int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
624 VexSimdPrefix pre, bool vector256 = false,
625 VexOpcode opc = VEX_OPCODE_0F) {
626 int src_enc = src->encoding();
627 int dst_enc = dst->encoding();
628 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
629 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256);
630 }
631
632 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
633 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
634 bool rex_w = false, bool vector256 = false);
635
636 void simd_prefix(XMMRegister dst, Address src,
637 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
638 simd_prefix(dst, xnoreg, src, pre, opc);
639 }
640
641 void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
642 simd_prefix(src, dst, pre);
643 }
644 void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
645 VexSimdPrefix pre) {
646 bool rex_w = true;
647 simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
648 }
649
650 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
651 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
652 bool rex_w = false, bool vector256 = false);
653
654 // Move/convert 32-bit integer value.
655 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
656 VexSimdPrefix pre) {
657 // It is OK to cast from Register to XMMRegister to pass argument here
658 // since only encoding is used in simd_prefix_and_encode() and number of
659 // Gen and Xmm registers are the same.
660 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre);
661 }
662 int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) {
663 return simd_prefix_and_encode(dst, xnoreg, src, pre);
664 }
665 int simd_prefix_and_encode(Register dst, XMMRegister src,
666 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
667 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc);
668 }
669
670 // Move/convert 64-bit integer value.
671 int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
672 VexSimdPrefix pre) {
673 bool rex_w = true;
674 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w);
675 }
676 int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) {
677 return simd_prefix_and_encode_q(dst, xnoreg, src, pre);
678 }
679 int simd_prefix_and_encode_q(Register dst, XMMRegister src,
680 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
681 bool rex_w = true;
682 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w);
683 }
684
685 // Helper functions for groups of instructions
686 void emit_arith_b(int op1, int op2, Register dst, int imm8);
687
688 void emit_arith(int op1, int op2, Register dst, int32_t imm32);
689 // Force generation of a 4 byte immediate value even if it fits into 8bit
690 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
691 void emit_arith(int op1, int op2, Register dst, Register src);
692
693 void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
694 void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
695 void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
696 void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
697 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
698 Address src, VexSimdPrefix pre, bool vector256);
699 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
700 XMMRegister src, VexSimdPrefix pre, bool vector256);
701
702 void emit_operand(Register reg,
703 Register base, Register index, Address::ScaleFactor scale,
704 int disp,
705 RelocationHolder const& rspec,
706 int rip_relative_correction = 0);
707
708 void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
709
710 // operands that only take the original 32bit registers
711 void emit_operand32(Register reg, Address adr);
712
713 void emit_operand(XMMRegister reg,
714 Register base, Register index, Address::ScaleFactor scale,
715 int disp,
716 RelocationHolder const& rspec);
717
718 void emit_operand(XMMRegister reg, Address adr);
719
720 void emit_operand(MMXRegister reg, Address adr);
806 // Move Scalar Double-Precision Floating-Point Values
807 void movsd(XMMRegister dst, Address src);
808 void movsd(XMMRegister dst, XMMRegister src);
809 void movsd(Address dst, XMMRegister src);
810 void movlpd(XMMRegister dst, Address src);
811
812 // New cpus require use of movaps and movapd to avoid partial register stall
813 // when moving between registers.
814 void movaps(XMMRegister dst, XMMRegister src);
815 void movapd(XMMRegister dst, XMMRegister src);
816
817 // End avoid using directly
818
819
820 // Instruction prefixes
821 void prefix(Prefix p);
822
823 public:
824
825 // Creation
826 Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
827
828 // Decoding
829 static address locate_operand(address inst, WhichOperand which);
830 static address locate_next_instruction(address inst);
831
832 // Utilities
833 static bool is_polling_page_far() NOT_LP64({ return false;});
834
835 // Generic instructions
836 // Does 32bit or 64bit as needed for the platform. In some sense these
837 // belong in macro assembler but there is no need for both varieties to exist
838
839 void lea(Register dst, Address src);
840
841 void mov(Register dst, Register src);
842
843 void pusha();
844 void popa();
845
846 void pushf();
847 void popf();
848
849 void push(int32_t imm32);
850
851 void push(Register src);
852
853 void pop(Register dst);
854
855 // These are dummies to prevent surprise implicit conversions to Register
856 void push(void* v);
857 void pop(void* v);
858
1319 if (offset < -128) {
1320 offset = -128;
1321 }
1322
1323 lock();
1324 addl(Address(rsp, offset), 0);// Assert the lock# signal here
1325 }
1326 }
1327 }
1328
1329 void mfence();
1330
1331 // Moves
1332
1333 void mov64(Register dst, int64_t imm64);
1334
1335 void movb(Address dst, Register src);
1336 void movb(Address dst, int imm8);
1337 void movb(Register dst, Address src);
1338
1339 void movdl(XMMRegister dst, Register src);
1340 void movdl(Register dst, XMMRegister src);
1341 void movdl(XMMRegister dst, Address src);
1342 void movdl(Address dst, XMMRegister src);
1343
1344 // Move Double Quadword
1345 void movdq(XMMRegister dst, Register src);
1346 void movdq(Register dst, XMMRegister src);
1347
1348 // Move Aligned Double Quadword
1349 void movdqa(XMMRegister dst, XMMRegister src);
1350 void movdqa(XMMRegister dst, Address src);
1351
1352 // Move Unaligned Double Quadword
1353 void movdqu(Address dst, XMMRegister src);
1354 void movdqu(XMMRegister dst, Address src);
1355 void movdqu(XMMRegister dst, XMMRegister src);
1356
1357 // Move Unaligned 256bit Vector
1358 void vmovdqu(Address dst, XMMRegister src);
1359 void vmovdqu(XMMRegister dst, Address src);
1360 void vmovdqu(XMMRegister dst, XMMRegister src);
1361
1362 // Move lower 64bit to high 64bit in 128bit register
1363 void movlhps(XMMRegister dst, XMMRegister src);
1364
1365 void movl(Register dst, int32_t imm32);
1366 void movl(Address dst, int32_t imm32);
1367 void movl(Register dst, Register src);
1368 void movl(Register dst, Address src);
1369 void movl(Address dst, Register src);
1370
1371 // These dummies prevent using movl from converting a zero (like NULL) into Register
1372 // by giving the compiler two choices it can't resolve
1373
1374 void movl(Address dst, void* junk);
1375 void movl(Register dst, void* junk);
1376
1377 #ifdef _LP64
1378 void movq(Register dst, Register src);
1379 void movq(Register dst, Address src);
1380 void movq(Address dst, Register src);
1381 #endif
1467
1468 void notl(Register dst);
1469
1470 #ifdef _LP64
1471 void notq(Register dst);
1472 #endif
1473
1474 void orl(Address dst, int32_t imm32);
1475 void orl(Register dst, int32_t imm32);
1476 void orl(Register dst, Address src);
1477 void orl(Register dst, Register src);
1478
1479 void orq(Address dst, int32_t imm32);
1480 void orq(Register dst, int32_t imm32);
1481 void orq(Register dst, Address src);
1482 void orq(Register dst, Register src);
1483
1484 // Pack with unsigned saturation
1485 void packuswb(XMMRegister dst, XMMRegister src);
1486 void packuswb(XMMRegister dst, Address src);
1487 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1488
1489 // Pemutation of 64bit words
1490 void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256);
1491
1492 void pause();
1493
1494 // SSE4.2 string instructions
1495 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1496 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1497
1498 // SSE 4.1 extract
1499 void pextrd(Register dst, XMMRegister src, int imm8);
1500 void pextrq(Register dst, XMMRegister src, int imm8);
1501
1502 // SSE 4.1 insert
1503 void pinsrd(XMMRegister dst, Register src, int imm8);
1504 void pinsrq(XMMRegister dst, Register src, int imm8);
1505
1506 // SSE4.1 packed move
1507 void pmovzxbw(XMMRegister dst, XMMRegister src);
1508 void pmovzxbw(XMMRegister dst, Address src);
1509
1510 #ifndef _LP64 // no 32bit push/pop on amd64
1715 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1716 void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
1717 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1718 void vdivss(XMMRegister dst, XMMRegister nds, Address src);
1719 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1720 void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
1721 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1722 void vmulss(XMMRegister dst, XMMRegister nds, Address src);
1723 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1724 void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
1725 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1726 void vsubss(XMMRegister dst, XMMRegister nds, Address src);
1727 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1728
1729
1730 //====================VECTOR ARITHMETIC=====================================
1731
1732 // Add Packed Floating-Point Values
1733 void addpd(XMMRegister dst, XMMRegister src);
1734 void addps(XMMRegister dst, XMMRegister src);
1735 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1736 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1737 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1738 void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1739
1740 // Subtract Packed Floating-Point Values
1741 void subpd(XMMRegister dst, XMMRegister src);
1742 void subps(XMMRegister dst, XMMRegister src);
1743 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1744 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1745 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1746 void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1747
1748 // Multiply Packed Floating-Point Values
1749 void mulpd(XMMRegister dst, XMMRegister src);
1750 void mulps(XMMRegister dst, XMMRegister src);
1751 void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1752 void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1753 void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1754 void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1755
1756 // Divide Packed Floating-Point Values
1757 void divpd(XMMRegister dst, XMMRegister src);
1758 void divps(XMMRegister dst, XMMRegister src);
1759 void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1760 void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1761 void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1762 void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1763
1764 // Bitwise Logical AND of Packed Floating-Point Values
1765 void andpd(XMMRegister dst, XMMRegister src);
1766 void andps(XMMRegister dst, XMMRegister src);
1767 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1768 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1769 void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1770 void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1771
1772 // Bitwise Logical XOR of Packed Floating-Point Values
1773 void xorpd(XMMRegister dst, XMMRegister src);
1774 void xorps(XMMRegister dst, XMMRegister src);
1775 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1776 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1777 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1778 void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1779
1780 // Add horizontal packed integers
1781 void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1782 void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1783 void phaddw(XMMRegister dst, XMMRegister src);
1784 void phaddd(XMMRegister dst, XMMRegister src);
1785
1786 // Add packed integers
1787 void paddb(XMMRegister dst, XMMRegister src);
1788 void paddw(XMMRegister dst, XMMRegister src);
1789 void paddd(XMMRegister dst, XMMRegister src);
1790 void paddq(XMMRegister dst, XMMRegister src);
1791 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1792 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1793 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1794 void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1795 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1796 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1797 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1798 void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1799
1800 // Sub packed integers
1801 void psubb(XMMRegister dst, XMMRegister src);
1802 void psubw(XMMRegister dst, XMMRegister src);
1803 void psubd(XMMRegister dst, XMMRegister src);
1804 void psubq(XMMRegister dst, XMMRegister src);
1805 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1806 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1807 void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1808 void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1809 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1810 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1811 void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1812 void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1813
1814 // Multiply packed integers (only shorts and ints)
1815 void pmullw(XMMRegister dst, XMMRegister src);
1816 void pmulld(XMMRegister dst, XMMRegister src);
1817 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1818 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1819 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1820 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1821
1822 // Shift left packed integers
1823 void psllw(XMMRegister dst, int shift);
1824 void pslld(XMMRegister dst, int shift);
1825 void psllq(XMMRegister dst, int shift);
1826 void psllw(XMMRegister dst, XMMRegister shift);
1827 void pslld(XMMRegister dst, XMMRegister shift);
1828 void psllq(XMMRegister dst, XMMRegister shift);
1829 void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1830 void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1831 void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1832 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1833 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1834 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1835
1836 // Logical shift right packed integers
1837 void psrlw(XMMRegister dst, int shift);
1838 void psrld(XMMRegister dst, int shift);
1839 void psrlq(XMMRegister dst, int shift);
1840 void psrlw(XMMRegister dst, XMMRegister shift);
1841 void psrld(XMMRegister dst, XMMRegister shift);
1842 void psrlq(XMMRegister dst, XMMRegister shift);
1843 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1844 void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1845 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1846 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1847 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1848 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1849
1850 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
1851 void psraw(XMMRegister dst, int shift);
1852 void psrad(XMMRegister dst, int shift);
1853 void psraw(XMMRegister dst, XMMRegister shift);
1854 void psrad(XMMRegister dst, XMMRegister shift);
1855 void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1856 void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1857 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1858 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1859
1860 // And packed integers
1861 void pand(XMMRegister dst, XMMRegister src);
1862 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1863 void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1864
1865 // Or packed integers
1866 void por(XMMRegister dst, XMMRegister src);
1867 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1868 void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1869
1870 // Xor packed integers
1871 void pxor(XMMRegister dst, XMMRegister src);
1872 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1873 void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1874
1875 // Copy low 128bit into high 128bit of YMM registers.
1876 void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1877 void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1878 void vextractf128h(XMMRegister dst, XMMRegister src);
1879
1880 // Load/store high 128bit of YMM registers which does not destroy other half.
1881 void vinsertf128h(XMMRegister dst, Address src);
1882 void vinserti128h(XMMRegister dst, Address src);
1883 void vextractf128h(Address dst, XMMRegister src);
1884 void vextracti128h(Address dst, XMMRegister src);
1885
1886 // duplicate 4-bytes integer data from src into 8 locations in dest
1887 void vpbroadcastd(XMMRegister dst, XMMRegister src);
1888
1889 // Carry-Less Multiplication Quadword
1890 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
1891 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
1892
1893 // AVX instruction which is used to clear upper 128 bits of YMM registers and
1894 // to avoid transaction penalty between AVX and SSE states. There is no
1895 // penalty if legacy SSE instructions are encoded using VEX prefix because
1896 // they always clear upper 128 bits. It should be used before calling
1897 // runtime code and native libraries.
1898 void vzeroupper();
1899
1900 protected:
1901 // Next instructions require address alignment 16 bytes SSE mode.
1902 // They should be called only from corresponding MacroAssembler instructions.
1903 void andpd(XMMRegister dst, Address src);
1904 void andps(XMMRegister dst, Address src);
1905 void xorpd(XMMRegister dst, Address src);
1906 void xorps(XMMRegister dst, Address src);
1907
|
484
485 REX_B = 0x41,
486 REX_X = 0x42,
487 REX_XB = 0x43,
488 REX_R = 0x44,
489 REX_RB = 0x45,
490 REX_RX = 0x46,
491 REX_RXB = 0x47,
492
493 REX_W = 0x48,
494
495 REX_WB = 0x49,
496 REX_WX = 0x4A,
497 REX_WXB = 0x4B,
498 REX_WR = 0x4C,
499 REX_WRB = 0x4D,
500 REX_WRX = 0x4E,
501 REX_WRXB = 0x4F,
502
503 VEX_3bytes = 0xC4,
504 VEX_2bytes = 0xC5,
505 EVEX_4bytes = 0x62
506 };
507
508 enum VexPrefix {
509 VEX_B = 0x20,
510 VEX_X = 0x40,
511 VEX_R = 0x80,
512 VEX_W = 0x80
513 };
514
515 enum ExexPrefix {
516 EVEX_F = 0x04,
517 EVEX_V = 0x08,
518 EVEX_Rb = 0x10,
519 EVEX_X = 0x40,
520 EVEX_Z = 0x80
521 };
522
523 enum VexSimdPrefix {
524 VEX_SIMD_NONE = 0x0,
525 VEX_SIMD_66 = 0x1,
526 VEX_SIMD_F3 = 0x2,
527 VEX_SIMD_F2 = 0x3
528 };
529
530 enum VexOpcode {
531 VEX_OPCODE_NONE = 0x0,
532 VEX_OPCODE_0F = 0x1,
533 VEX_OPCODE_0F_38 = 0x2,
534 VEX_OPCODE_0F_3A = 0x3
535 };
536
537 enum AvxVectorLen {
538 AVX_128bit = 0x0,
539 AVX_256bit = 0x1,
540 AVX_512bit = 0x2,
541 AVX_NoVec = 0x4
542 };
543
544 enum EvexTupleType {
545 EVEX_FV = 0,
546 EVEX_HV = 4,
547 EVEX_FVM = 6,
548 EVEX_T1S = 7,
549 EVEX_T1F = 11,
550 EVEX_T2 = 13,
551 EVEX_T4 = 15,
552 EVEX_T8 = 17,
553 EVEX_HVM = 18,
554 EVEX_QVM = 19,
555 EVEX_OVM = 20,
556 EVEX_M128 = 21,
557 EVEX_DUP = 22,
558 EVEX_ETUP = 23
559 };
560
561 enum EvexInputSizeInBits {
562 EVEX_8bit = 0,
563 EVEX_16bit = 1,
564 EVEX_32bit = 2,
565 EVEX_64bit = 3
566 };
567
568 enum WhichOperand {
569 // input to locate_operand, and format code for relocations
570 imm_operand = 0, // embedded 32-bit|64-bit immediate operand
571 disp32_operand = 1, // embedded 32-bit displacement or address
572 call32_operand = 2, // embedded 32-bit self-relative displacement
573 #ifndef _LP64
574 _WhichOperand_limit = 3
575 #else
576 narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop
577 _WhichOperand_limit = 4
578 #endif
579 };
580
581
582
583 // NOTE: The general philopsophy of the declarations here is that 64bit versions
584 // of instructions are freely declared without the need for wrapping them an ifdef.
585 // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
586 // In the .cpp file the implementations are wrapped so that they are dropped out
587 // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
588 // to the size it was prior to merging up the 32bit and 64bit assemblers.
589 //
590 // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
591 // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
592
593 private:
594
595 int evex_encoding;
596 int input_size_in_bits;
597 int avx_vector_len;
598 int tuple_type;
599 bool is_evex_instruction;
600
601 // 64bit prefixes
602 int prefix_and_encode(int reg_enc, bool byteinst = false);
603 int prefixq_and_encode(int reg_enc);
604
605 int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false);
606 int prefixq_and_encode(int dst_enc, int src_enc);
607
608 void prefix(Register reg);
609 void prefix(Address adr);
610 void prefixq(Address adr);
611
612 void prefix(Address adr, Register reg, bool byteinst = false);
613 void prefix(Address adr, XMMRegister reg);
614 void prefixq(Address adr, Register reg);
615 void prefixq(Address adr, XMMRegister reg);
616
617 void prefetch_prefix(Address src);
618
619 void rex_prefix(Address adr, XMMRegister xreg,
620 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
621 int rex_prefix_and_encode(int dst_enc, int src_enc,
622 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
623
624 void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
625 int nds_enc, VexSimdPrefix pre, VexOpcode opc,
626 int vector_len);
627
628 void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
629 int nds_enc, VexSimdPrefix pre, VexOpcode opc,
630 bool is_extended_context, bool is_merge_context,
631 int vector_len, bool no_mask_reg );
632
633 void vex_prefix(Address adr, int nds_enc, int xreg_enc,
634 VexSimdPrefix pre, VexOpcode opc,
635 bool vex_w, int vector_len,
636 bool legacy_mode = false, bool no_mask_reg = false);
637
638 void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
639 VexSimdPrefix pre, int vector_len = AVX_128bit,
640 bool no_mask_reg = false) {
641 int dst_enc = dst->encoding();
642 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
643 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, false, no_mask_reg);
644 }
645
646 void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
647 VexSimdPrefix pre, int vector_len = AVX_128bit,
648 bool no_mask_reg = false) {
649 int dst_enc = dst->encoding();
650 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
651 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
652 }
653
654 void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) {
655 bool vex_w = false;
656 int vector_len = AVX_128bit;
657 vex_prefix(src, nds->encoding(), dst->encoding(),
658 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
659 vector_len, no_mask_reg);
660 }
661
662 void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) {
663 bool vex_w = true;
664 int vector_len = AVX_128bit;
665 vex_prefix(src, nds->encoding(), dst->encoding(),
666 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
667 vector_len, no_mask_reg);
668 }
669 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
670 VexSimdPrefix pre, VexOpcode opc,
671 bool vex_w, int vector_len,
672 bool legacy_mode, bool no_mask_reg);
673
674 int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) {
675 bool vex_w = false;
676 int vector_len = AVX_128bit;
677 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
678 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
679 false, no_mask_reg);
680 }
681 int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) {
682 bool vex_w = true;
683 int vector_len = AVX_128bit;
684 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
685 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
686 false, no_mask_reg);
687 }
688 int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
689 VexSimdPrefix pre, int vector_len = AVX_128bit,
690 VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false,
691 bool no_mask_reg = false) {
692 int src_enc = src->encoding();
693 int dst_enc = dst->encoding();
694 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
695 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg);
696 }
697
698 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
699 VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F,
700 bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false);
701
702 void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre,
703 bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) {
704 simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc);
705 }
706
707 void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
708 simd_prefix(src, dst, pre, no_mask_reg);
709 }
710 void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
711 VexSimdPrefix pre, bool no_mask_reg = false) {
712 bool rex_w = true;
713 simd_prefix(dst, nds, src, pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
714 }
715
716 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
717 VexSimdPrefix pre, bool no_mask_reg,
718 VexOpcode opc = VEX_OPCODE_0F,
719 bool rex_w = false, int vector_len = AVX_128bit,
720 bool legacy_mode = false);
721
722 int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src,
723 VexSimdPrefix pre, bool no_mask_reg,
724 VexOpcode opc = VEX_OPCODE_0F,
725 bool rex_w = false, int vector_len = AVX_128bit);
726
727 int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src,
728 VexSimdPrefix pre, bool no_mask_reg,
729 VexOpcode opc = VEX_OPCODE_0F,
730 bool rex_w = false, int vector_len = AVX_128bit);
731
732 // Move/convert 32-bit integer value.
733 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
734 VexSimdPrefix pre, bool no_mask_reg) {
735 // It is OK to cast from Register to XMMRegister to pass argument here
736 // since only encoding is used in simd_prefix_and_encode() and number of
737 // Gen and Xmm registers are the same.
738 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F);
739 }
740 int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
741 return simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg);
742 }
743 int simd_prefix_and_encode(Register dst, XMMRegister src,
744 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
745 bool no_mask_reg = false) {
746 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc);
747 }
748
749 // Move/convert 64-bit integer value.
750 int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
751 VexSimdPrefix pre, bool no_mask_reg = false) {
752 bool rex_w = true;
753 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
754 }
755 int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
756 return simd_prefix_and_encode_q(dst, xnoreg, src, pre, no_mask_reg);
757 }
758 int simd_prefix_and_encode_q(Register dst, XMMRegister src,
759 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
760 bool no_mask_reg = false) {
761 bool rex_w = true;
762 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc, rex_w);
763 }
764
765 // Helper functions for groups of instructions
766 void emit_arith_b(int op1, int op2, Register dst, int imm8);
767
768 void emit_arith(int op1, int op2, Register dst, int32_t imm32);
769 // Force generation of a 4 byte immediate value even if it fits into 8bit
770 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
771 void emit_arith(int op1, int op2, Register dst, Register src);
772
773 void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
774 void emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
775 void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
776 void emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
777 void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
778 void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
779 void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
780 void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
781 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
782 Address src, VexSimdPrefix pre, int vector_len,
783 bool no_mask_reg = false);
784 void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
785 Address src, VexSimdPrefix pre, int vector_len,
786 bool no_mask_reg = false);
787 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
788 XMMRegister src, VexSimdPrefix pre, int vector_len,
789 bool no_mask_reg = false);
790 void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
791 XMMRegister src, VexSimdPrefix pre, int vector_len,
792 bool no_mask_reg = false);
793
794 bool emit_compressed_disp_byte(int &disp);
795
796 void emit_operand(Register reg,
797 Register base, Register index, Address::ScaleFactor scale,
798 int disp,
799 RelocationHolder const& rspec,
800 int rip_relative_correction = 0);
801
802 void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
803
804 // operands that only take the original 32bit registers
805 void emit_operand32(Register reg, Address adr);
806
807 void emit_operand(XMMRegister reg,
808 Register base, Register index, Address::ScaleFactor scale,
809 int disp,
810 RelocationHolder const& rspec);
811
812 void emit_operand(XMMRegister reg, Address adr);
813
814 void emit_operand(MMXRegister reg, Address adr);
900 // Move Scalar Double-Precision Floating-Point Values
901 void movsd(XMMRegister dst, Address src);
902 void movsd(XMMRegister dst, XMMRegister src);
903 void movsd(Address dst, XMMRegister src);
904 void movlpd(XMMRegister dst, Address src);
905
906 // New cpus require use of movaps and movapd to avoid partial register stall
907 // when moving between registers.
908 void movaps(XMMRegister dst, XMMRegister src);
909 void movapd(XMMRegister dst, XMMRegister src);
910
911 // End avoid using directly
912
913
914 // Instruction prefixes
915 void prefix(Prefix p);
916
917 public:
918
919 // Creation
920 Assembler(CodeBuffer* code) : AbstractAssembler(code) {
921 init_attributes();
922 }
923
924 // Decoding
925 static address locate_operand(address inst, WhichOperand which);
926 static address locate_next_instruction(address inst);
927
928 // Utilities
929 static bool is_polling_page_far() NOT_LP64({ return false;});
930
931 // Generic instructions
932 // Does 32bit or 64bit as needed for the platform. In some sense these
933 // belong in macro assembler but there is no need for both varieties to exist
934
935 void init_attributes(void) {
936 evex_encoding = 0;
937 input_size_in_bits = 0;
938 avx_vector_len = AVX_NoVec;
939 tuple_type = EVEX_ETUP;
940 is_evex_instruction = false;
941 }
942
943 void lea(Register dst, Address src);
944
945 void mov(Register dst, Register src);
946
947 void pusha();
948 void popa();
949
950 void pushf();
951 void popf();
952
953 void push(int32_t imm32);
954
955 void push(Register src);
956
957 void pop(Register dst);
958
959 // These are dummies to prevent surprise implicit conversions to Register
960 void push(void* v);
961 void pop(void* v);
962
1423 if (offset < -128) {
1424 offset = -128;
1425 }
1426
1427 lock();
1428 addl(Address(rsp, offset), 0);// Assert the lock# signal here
1429 }
1430 }
1431 }
1432
1433 void mfence();
1434
1435 // Moves
1436
1437 void mov64(Register dst, int64_t imm64);
1438
1439 void movb(Address dst, Register src);
1440 void movb(Address dst, int imm8);
1441 void movb(Register dst, Address src);
1442
1443 void kmovq(KRegister dst, KRegister src);
1444 void kmovql(KRegister dst, Register src);
1445 void kmovq(Address dst, KRegister src);
1446 void kmovq(KRegister dst, Address src);
1447
1448 void movdl(XMMRegister dst, Register src);
1449 void movdl(Register dst, XMMRegister src);
1450 void movdl(XMMRegister dst, Address src);
1451 void movdl(Address dst, XMMRegister src);
1452
1453 // Move Double Quadword
1454 void movdq(XMMRegister dst, Register src);
1455 void movdq(Register dst, XMMRegister src);
1456
1457 // Move Aligned Double Quadword
1458 void movdqa(XMMRegister dst, XMMRegister src);
1459 void movdqa(XMMRegister dst, Address src);
1460
1461 // Move Unaligned Double Quadword
1462 void movdqu(Address dst, XMMRegister src);
1463 void movdqu(XMMRegister dst, Address src);
1464 void movdqu(XMMRegister dst, XMMRegister src);
1465
1466 // Move Unaligned 256bit Vector
1467 void vmovdqu(Address dst, XMMRegister src);
1468 void vmovdqu(XMMRegister dst, Address src);
1469 void vmovdqu(XMMRegister dst, XMMRegister src);
1470
1471 // Move Unaligned 512bit Vector
1472 void evmovdqu(Address dst, XMMRegister src, int vector_len);
1473 void evmovdqu(XMMRegister dst, Address src, int vector_len);
1474 void evmovdqu(XMMRegister dst, XMMRegister src, int vector_len);
1475
1476 // Move lower 64bit to high 64bit in 128bit register
1477 void movlhps(XMMRegister dst, XMMRegister src);
1478
1479 void movl(Register dst, int32_t imm32);
1480 void movl(Address dst, int32_t imm32);
1481 void movl(Register dst, Register src);
1482 void movl(Register dst, Address src);
1483 void movl(Address dst, Register src);
1484
1485 // These dummies prevent using movl from converting a zero (like NULL) into Register
1486 // by giving the compiler two choices it can't resolve
1487
1488 void movl(Address dst, void* junk);
1489 void movl(Register dst, void* junk);
1490
1491 #ifdef _LP64
1492 void movq(Register dst, Register src);
1493 void movq(Register dst, Address src);
1494 void movq(Address dst, Register src);
1495 #endif
1581
1582 void notl(Register dst);
1583
1584 #ifdef _LP64
1585 void notq(Register dst);
1586 #endif
1587
1588 void orl(Address dst, int32_t imm32);
1589 void orl(Register dst, int32_t imm32);
1590 void orl(Register dst, Address src);
1591 void orl(Register dst, Register src);
1592
1593 void orq(Address dst, int32_t imm32);
1594 void orq(Register dst, int32_t imm32);
1595 void orq(Register dst, Address src);
1596 void orq(Register dst, Register src);
1597
1598 // Pack with unsigned saturation
1599 void packuswb(XMMRegister dst, XMMRegister src);
1600 void packuswb(XMMRegister dst, Address src);
1601 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1602
1603 // Pemutation of 64bit words
1604 void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1605
1606 void pause();
1607
1608 // SSE4.2 string instructions
1609 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1610 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1611
1612 // SSE 4.1 extract
1613 void pextrd(Register dst, XMMRegister src, int imm8);
1614 void pextrq(Register dst, XMMRegister src, int imm8);
1615
1616 // SSE 4.1 insert
1617 void pinsrd(XMMRegister dst, Register src, int imm8);
1618 void pinsrq(XMMRegister dst, Register src, int imm8);
1619
1620 // SSE4.1 packed move
1621 void pmovzxbw(XMMRegister dst, XMMRegister src);
1622 void pmovzxbw(XMMRegister dst, Address src);
1623
1624 #ifndef _LP64 // no 32bit push/pop on amd64
1829 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1830 void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
1831 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1832 void vdivss(XMMRegister dst, XMMRegister nds, Address src);
1833 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1834 void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
1835 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1836 void vmulss(XMMRegister dst, XMMRegister nds, Address src);
1837 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1838 void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
1839 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1840 void vsubss(XMMRegister dst, XMMRegister nds, Address src);
1841 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1842
1843
1844 //====================VECTOR ARITHMETIC=====================================
1845
1846 // Add Packed Floating-Point Values
1847 void addpd(XMMRegister dst, XMMRegister src);
1848 void addps(XMMRegister dst, XMMRegister src);
1849 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1850 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1851 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1852 void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1853
1854 // Subtract Packed Floating-Point Values
1855 void subpd(XMMRegister dst, XMMRegister src);
1856 void subps(XMMRegister dst, XMMRegister src);
1857 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1858 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1859 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1860 void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1861
1862 // Multiply Packed Floating-Point Values
1863 void mulpd(XMMRegister dst, XMMRegister src);
1864 void mulps(XMMRegister dst, XMMRegister src);
1865 void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1866 void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1867 void vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1868 void vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1869
1870 // Divide Packed Floating-Point Values
1871 void divpd(XMMRegister dst, XMMRegister src);
1872 void divps(XMMRegister dst, XMMRegister src);
1873 void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1874 void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1875 void vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1876 void vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1877
1878 // Bitwise Logical AND of Packed Floating-Point Values
1879 void andpd(XMMRegister dst, XMMRegister src);
1880 void andps(XMMRegister dst, XMMRegister src);
1881 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1882 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1883 void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1884 void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1885
1886 // Bitwise Logical XOR of Packed Floating-Point Values
1887 void xorpd(XMMRegister dst, XMMRegister src);
1888 void xorps(XMMRegister dst, XMMRegister src);
1889 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1890 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1891 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1892 void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1893
1894 // Add horizontal packed integers
1895 void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1896 void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1897 void phaddw(XMMRegister dst, XMMRegister src);
1898 void phaddd(XMMRegister dst, XMMRegister src);
1899
1900 // Add packed integers
1901 void paddb(XMMRegister dst, XMMRegister src);
1902 void paddw(XMMRegister dst, XMMRegister src);
1903 void paddd(XMMRegister dst, XMMRegister src);
1904 void paddq(XMMRegister dst, XMMRegister src);
1905 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1906 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1907 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1908 void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1909 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1910 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1911 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1912 void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1913
1914 // Sub packed integers
1915 void psubb(XMMRegister dst, XMMRegister src);
1916 void psubw(XMMRegister dst, XMMRegister src);
1917 void psubd(XMMRegister dst, XMMRegister src);
1918 void psubq(XMMRegister dst, XMMRegister src);
1919 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1920 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1921 void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1922 void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1923 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1924 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1925 void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1926 void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1927
1928 // Multiply packed integers (only shorts and ints)
1929 void pmullw(XMMRegister dst, XMMRegister src);
1930 void pmulld(XMMRegister dst, XMMRegister src);
1931 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1932 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1933 void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1934 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1935 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1936 void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1937
1938 // Shift left packed integers
1939 void psllw(XMMRegister dst, int shift);
1940 void pslld(XMMRegister dst, int shift);
1941 void psllq(XMMRegister dst, int shift);
1942 void psllw(XMMRegister dst, XMMRegister shift);
1943 void pslld(XMMRegister dst, XMMRegister shift);
1944 void psllq(XMMRegister dst, XMMRegister shift);
1945 void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
1946 void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
1947 void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
1948 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
1949 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
1950 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
1951
1952 // Logical shift right packed integers
1953 void psrlw(XMMRegister dst, int shift);
1954 void psrld(XMMRegister dst, int shift);
1955 void psrlq(XMMRegister dst, int shift);
1956 void psrlw(XMMRegister dst, XMMRegister shift);
1957 void psrld(XMMRegister dst, XMMRegister shift);
1958 void psrlq(XMMRegister dst, XMMRegister shift);
1959 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
1960 void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
1961 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
1962 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
1963 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
1964 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
1965
1966 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
1967 void psraw(XMMRegister dst, int shift);
1968 void psrad(XMMRegister dst, int shift);
1969 void psraw(XMMRegister dst, XMMRegister shift);
1970 void psrad(XMMRegister dst, XMMRegister shift);
1971 void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
1972 void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
1973 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
1974 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
1975
1976 // And packed integers
1977 void pand(XMMRegister dst, XMMRegister src);
1978 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1979 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1980
1981 // Or packed integers
1982 void por(XMMRegister dst, XMMRegister src);
1983 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1984 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1985
1986 // Xor packed integers
1987 void pxor(XMMRegister dst, XMMRegister src);
1988 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1989 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1990
1991 // Copy low 128bit into high 128bit of YMM registers.
1992 void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1993 void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1994 void vextractf128h(XMMRegister dst, XMMRegister src);
1995 void vextracti128h(XMMRegister dst, XMMRegister src);
1996
1997 // Load/store high 128bit of YMM registers which does not destroy other half.
1998 void vinsertf128h(XMMRegister dst, Address src);
1999 void vinserti128h(XMMRegister dst, Address src);
2000 void vextractf128h(Address dst, XMMRegister src);
2001 void vextracti128h(Address dst, XMMRegister src);
2002
2003 // Copy low 256bit into high 256bit of ZMM registers.
2004 void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2005 void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2006 void vextracti64x4h(XMMRegister dst, XMMRegister src);
2007 void vextractf64x4h(XMMRegister dst, XMMRegister src);
2008 void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
2009 void vextractf64x2h(XMMRegister dst, XMMRegister src, int value);
2010 void vextractf32x4h(XMMRegister dst, XMMRegister src, int value);
2011
2012 // duplicate 4-bytes integer data from src into 8 locations in dest
2013 void vpbroadcastd(XMMRegister dst, XMMRegister src);
2014
2015 // duplicate 4-bytes integer data from src into vector_len locations in dest
2016 void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2017
2018 // Carry-Less Multiplication Quadword
2019 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2020 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2021
2022 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2023 // to avoid transaction penalty between AVX and SSE states. There is no
2024 // penalty if legacy SSE instructions are encoded using VEX prefix because
2025 // they always clear upper 128 bits. It should be used before calling
2026 // runtime code and native libraries.
2027 void vzeroupper();
2028
2029 protected:
2030 // Next instructions require address alignment 16 bytes SSE mode.
2031 // They should be called only from corresponding MacroAssembler instructions.
2032 void andpd(XMMRegister dst, Address src);
2033 void andps(XMMRegister dst, Address src);
2034 void xorpd(XMMRegister dst, Address src);
2035 void xorps(XMMRegister dst, Address src);
2036
|