421
422 // x86 can do array addressing as a single operation since disp can be an absolute
423 // address amd64 can't. We create a class that expresses the concept but does extra
424 // magic on amd64 to get the final result
425
426 class ArrayAddress VALUE_OBJ_CLASS_SPEC {
427 private:
428
429 AddressLiteral _base;
430 Address _index;
431
432 public:
433
434 ArrayAddress() {};
435 ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
436 AddressLiteral base() { return _base; }
437 Address index() { return _index; }
438
439 };
440
441 const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512 / wordSize);
442
443 // The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
444 // level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
445 // is what you get. The Assembler is generating code into a CodeBuffer.
446
447 class Assembler : public AbstractAssembler {
448 friend class AbstractAssembler; // for the non-virtual hack
449 friend class LIR_Assembler; // as_Address()
450 friend class StubGenerator;
451
452 public:
453 enum Condition { // The x86 condition codes used for conditional jumps/moves.
454 zero = 0x4,
455 notZero = 0x5,
456 equal = 0x4,
457 notEqual = 0x5,
458 less = 0xc,
459 lessEqual = 0xe,
460 greater = 0xf,
461 greaterEqual = 0xd,
486
487 REX_B = 0x41,
488 REX_X = 0x42,
489 REX_XB = 0x43,
490 REX_R = 0x44,
491 REX_RB = 0x45,
492 REX_RX = 0x46,
493 REX_RXB = 0x47,
494
495 REX_W = 0x48,
496
497 REX_WB = 0x49,
498 REX_WX = 0x4A,
499 REX_WXB = 0x4B,
500 REX_WR = 0x4C,
501 REX_WRB = 0x4D,
502 REX_WRX = 0x4E,
503 REX_WRXB = 0x4F,
504
505 VEX_3bytes = 0xC4,
506 VEX_2bytes = 0xC5
507 };
508
509 enum VexPrefix {
510 VEX_B = 0x20,
511 VEX_X = 0x40,
512 VEX_R = 0x80,
513 VEX_W = 0x80
514 };
515
516 enum VexSimdPrefix {
517 VEX_SIMD_NONE = 0x0,
518 VEX_SIMD_66 = 0x1,
519 VEX_SIMD_F3 = 0x2,
520 VEX_SIMD_F2 = 0x3
521 };
522
523 enum VexOpcode {
524 VEX_OPCODE_NONE = 0x0,
525 VEX_OPCODE_0F = 0x1,
526 VEX_OPCODE_0F_38 = 0x2,
527 VEX_OPCODE_0F_3A = 0x3
528 };
529
530 enum WhichOperand {
531 // input to locate_operand, and format code for relocations
532 imm_operand = 0, // embedded 32-bit|64-bit immediate operand
533 disp32_operand = 1, // embedded 32-bit displacement or address
534 call32_operand = 2, // embedded 32-bit self-relative displacement
535 #ifndef _LP64
536 _WhichOperand_limit = 3
537 #else
538 narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop
539 _WhichOperand_limit = 4
540 #endif
541 };
542
543
544
545 // NOTE: The general philopsophy of the declarations here is that 64bit versions
546 // of instructions are freely declared without the need for wrapping them an ifdef.
547 // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
548 // In the .cpp file the implementations are wrapped so that they are dropped out
549 // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
550 // to the size it was prior to merging up the 32bit and 64bit assemblers.
551 //
552 // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
553 // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
554
555 private:
556
557
558 // 64bit prefixes
559 int prefix_and_encode(int reg_enc, bool byteinst = false);
560 int prefixq_and_encode(int reg_enc);
561
562 int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false);
563 int prefixq_and_encode(int dst_enc, int src_enc);
564
565 void prefix(Register reg);
566 void prefix(Address adr);
567 void prefixq(Address adr);
568
569 void prefix(Address adr, Register reg, bool byteinst = false);
570 void prefix(Address adr, XMMRegister reg);
571 void prefixq(Address adr, Register reg);
572 void prefixq(Address adr, XMMRegister reg);
573
574 void prefetch_prefix(Address src);
575
576 void rex_prefix(Address adr, XMMRegister xreg,
577 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
578 int rex_prefix_and_encode(int dst_enc, int src_enc,
579 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
580
581 void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
582 int nds_enc, VexSimdPrefix pre, VexOpcode opc,
583 bool vector256);
584
585 void vex_prefix(Address adr, int nds_enc, int xreg_enc,
586 VexSimdPrefix pre, VexOpcode opc,
587 bool vex_w, bool vector256);
588
589 void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
590 VexSimdPrefix pre, bool vector256 = false) {
591 int dst_enc = dst->encoding();
592 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
593 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256);
594 }
595
596 void vex_prefix_0F38(Register dst, Register nds, Address src) {
597 bool vex_w = false;
598 bool vector256 = false;
599 vex_prefix(src, nds->encoding(), dst->encoding(),
600 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
601 }
602
603 void vex_prefix_0F38_q(Register dst, Register nds, Address src) {
604 bool vex_w = true;
605 bool vector256 = false;
606 vex_prefix(src, nds->encoding(), dst->encoding(),
607 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
608 }
609 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
610 VexSimdPrefix pre, VexOpcode opc,
611 bool vex_w, bool vector256);
612
613 int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) {
614 bool vex_w = false;
615 bool vector256 = false;
616 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
617 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
618 }
619 int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) {
620 bool vex_w = true;
621 bool vector256 = false;
622 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
623 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
624 }
625 int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
626 VexSimdPrefix pre, bool vector256 = false,
627 VexOpcode opc = VEX_OPCODE_0F) {
628 int src_enc = src->encoding();
629 int dst_enc = dst->encoding();
630 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
631 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256);
632 }
633
634 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
635 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
636 bool rex_w = false, bool vector256 = false);
637
638 void simd_prefix(XMMRegister dst, Address src,
639 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
640 simd_prefix(dst, xnoreg, src, pre, opc);
641 }
642
643 void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
644 simd_prefix(src, dst, pre);
645 }
646 void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
647 VexSimdPrefix pre) {
648 bool rex_w = true;
649 simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
650 }
651
652 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
653 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
654 bool rex_w = false, bool vector256 = false);
655
656 // Move/convert 32-bit integer value.
657 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
658 VexSimdPrefix pre) {
659 // It is OK to cast from Register to XMMRegister to pass argument here
660 // since only encoding is used in simd_prefix_and_encode() and number of
661 // Gen and Xmm registers are the same.
662 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre);
663 }
664 int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) {
665 return simd_prefix_and_encode(dst, xnoreg, src, pre);
666 }
667 int simd_prefix_and_encode(Register dst, XMMRegister src,
668 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
669 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc);
670 }
671
672 // Move/convert 64-bit integer value.
673 int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
674 VexSimdPrefix pre) {
675 bool rex_w = true;
676 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w);
677 }
678 int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) {
679 return simd_prefix_and_encode_q(dst, xnoreg, src, pre);
680 }
681 int simd_prefix_and_encode_q(Register dst, XMMRegister src,
682 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
683 bool rex_w = true;
684 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w);
685 }
686
687 // Helper functions for groups of instructions
688 void emit_arith_b(int op1, int op2, Register dst, int imm8);
689
690 void emit_arith(int op1, int op2, Register dst, int32_t imm32);
691 // Force generation of a 4 byte immediate value even if it fits into 8bit
692 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
693 void emit_arith(int op1, int op2, Register dst, Register src);
694
695 void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
696 void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
697 void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
698 void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
699 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
700 Address src, VexSimdPrefix pre, bool vector256);
701 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
702 XMMRegister src, VexSimdPrefix pre, bool vector256);
703
704 void emit_operand(Register reg,
705 Register base, Register index, Address::ScaleFactor scale,
706 int disp,
707 RelocationHolder const& rspec,
708 int rip_relative_correction = 0);
709
710 void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
711
712 // operands that only take the original 32bit registers
713 void emit_operand32(Register reg, Address adr);
714
715 void emit_operand(XMMRegister reg,
716 Register base, Register index, Address::ScaleFactor scale,
717 int disp,
718 RelocationHolder const& rspec);
719
720 void emit_operand(XMMRegister reg, Address adr);
721
722 void emit_operand(MMXRegister reg, Address adr);
808 // Move Scalar Double-Precision Floating-Point Values
809 void movsd(XMMRegister dst, Address src);
810 void movsd(XMMRegister dst, XMMRegister src);
811 void movsd(Address dst, XMMRegister src);
812 void movlpd(XMMRegister dst, Address src);
813
814 // New cpus require use of movaps and movapd to avoid partial register stall
815 // when moving between registers.
816 void movaps(XMMRegister dst, XMMRegister src);
817 void movapd(XMMRegister dst, XMMRegister src);
818
819 // End avoid using directly
820
821
822 // Instruction prefixes
823 void prefix(Prefix p);
824
825 public:
826
827 // Creation
828 Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
829
830 // Decoding
831 static address locate_operand(address inst, WhichOperand which);
832 static address locate_next_instruction(address inst);
833
834 // Utilities
835 static bool is_polling_page_far() NOT_LP64({ return false;});
836
837 // Generic instructions
838 // Does 32bit or 64bit as needed for the platform. In some sense these
839 // belong in macro assembler but there is no need for both varieties to exist
840
841 void lea(Register dst, Address src);
842
843 void mov(Register dst, Register src);
844
845 void pusha();
846 void popa();
847
848 void pushf();
849 void popf();
850
851 void push(int32_t imm32);
852
853 void push(Register src);
854
855 void pop(Register dst);
856
857 // These are dummies to prevent surprise implicit conversions to Register
858 void push(void* v);
859 void pop(void* v);
860
1321 if (offset < -128) {
1322 offset = -128;
1323 }
1324
1325 lock();
1326 addl(Address(rsp, offset), 0);// Assert the lock# signal here
1327 }
1328 }
1329 }
1330
1331 void mfence();
1332
1333 // Moves
1334
1335 void mov64(Register dst, int64_t imm64);
1336
1337 void movb(Address dst, Register src);
1338 void movb(Address dst, int imm8);
1339 void movb(Register dst, Address src);
1340
1341 void movdl(XMMRegister dst, Register src);
1342 void movdl(Register dst, XMMRegister src);
1343 void movdl(XMMRegister dst, Address src);
1344 void movdl(Address dst, XMMRegister src);
1345
1346 // Move Double Quadword
1347 void movdq(XMMRegister dst, Register src);
1348 void movdq(Register dst, XMMRegister src);
1349
1350 // Move Aligned Double Quadword
1351 void movdqa(XMMRegister dst, XMMRegister src);
1352 void movdqa(XMMRegister dst, Address src);
1353
1354 // Move Unaligned Double Quadword
1355 void movdqu(Address dst, XMMRegister src);
1356 void movdqu(XMMRegister dst, Address src);
1357 void movdqu(XMMRegister dst, XMMRegister src);
1358
1359 // Move Unaligned 256bit Vector
1360 void vmovdqu(Address dst, XMMRegister src);
1361 void vmovdqu(XMMRegister dst, Address src);
1362 void vmovdqu(XMMRegister dst, XMMRegister src);
1363
1364 // Move lower 64bit to high 64bit in 128bit register
1365 void movlhps(XMMRegister dst, XMMRegister src);
1366
1367 void movl(Register dst, int32_t imm32);
1368 void movl(Address dst, int32_t imm32);
1369 void movl(Register dst, Register src);
1370 void movl(Register dst, Address src);
1371 void movl(Address dst, Register src);
1372
1373 // These dummies prevent using movl from converting a zero (like NULL) into Register
1374 // by giving the compiler two choices it can't resolve
1375
1376 void movl(Address dst, void* junk);
1377 void movl(Register dst, void* junk);
1378
1379 #ifdef _LP64
1380 void movq(Register dst, Register src);
1381 void movq(Register dst, Address src);
1382 void movq(Address dst, Register src);
1383 #endif
1469
1470 void notl(Register dst);
1471
1472 #ifdef _LP64
1473 void notq(Register dst);
1474 #endif
1475
1476 void orl(Address dst, int32_t imm32);
1477 void orl(Register dst, int32_t imm32);
1478 void orl(Register dst, Address src);
1479 void orl(Register dst, Register src);
1480
1481 void orq(Address dst, int32_t imm32);
1482 void orq(Register dst, int32_t imm32);
1483 void orq(Register dst, Address src);
1484 void orq(Register dst, Register src);
1485
1486 // Pack with unsigned saturation
1487 void packuswb(XMMRegister dst, XMMRegister src);
1488 void packuswb(XMMRegister dst, Address src);
1489 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1490
1491 // Pemutation of 64bit words
1492 void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256);
1493
1494 void pause();
1495
1496 // SSE4.2 string instructions
1497 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1498 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1499
1500 // SSE 4.1 extract
1501 void pextrd(Register dst, XMMRegister src, int imm8);
1502 void pextrq(Register dst, XMMRegister src, int imm8);
1503
1504 // SSE 4.1 insert
1505 void pinsrd(XMMRegister dst, Register src, int imm8);
1506 void pinsrq(XMMRegister dst, Register src, int imm8);
1507
1508 // SSE4.1 packed move
1509 void pmovzxbw(XMMRegister dst, XMMRegister src);
1510 void pmovzxbw(XMMRegister dst, Address src);
1511
1512 #ifndef _LP64 // no 32bit push/pop on amd64
1717 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1718 void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
1719 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1720 void vdivss(XMMRegister dst, XMMRegister nds, Address src);
1721 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1722 void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
1723 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1724 void vmulss(XMMRegister dst, XMMRegister nds, Address src);
1725 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1726 void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
1727 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1728 void vsubss(XMMRegister dst, XMMRegister nds, Address src);
1729 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1730
1731
1732 //====================VECTOR ARITHMETIC=====================================
1733
1734 // Add Packed Floating-Point Values
1735 void addpd(XMMRegister dst, XMMRegister src);
1736 void addps(XMMRegister dst, XMMRegister src);
1737 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1738 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1739 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1740 void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1741
1742 // Subtract Packed Floating-Point Values
1743 void subpd(XMMRegister dst, XMMRegister src);
1744 void subps(XMMRegister dst, XMMRegister src);
1745 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1746 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1747 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1748 void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1749
1750 // Multiply Packed Floating-Point Values
1751 void mulpd(XMMRegister dst, XMMRegister src);
1752 void mulps(XMMRegister dst, XMMRegister src);
1753 void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1754 void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1755 void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1756 void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1757
1758 // Divide Packed Floating-Point Values
1759 void divpd(XMMRegister dst, XMMRegister src);
1760 void divps(XMMRegister dst, XMMRegister src);
1761 void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1762 void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1763 void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1764 void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1765
1766 // Bitwise Logical AND of Packed Floating-Point Values
1767 void andpd(XMMRegister dst, XMMRegister src);
1768 void andps(XMMRegister dst, XMMRegister src);
1769 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1770 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1771 void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1772 void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1773
1774 // Bitwise Logical XOR of Packed Floating-Point Values
1775 void xorpd(XMMRegister dst, XMMRegister src);
1776 void xorps(XMMRegister dst, XMMRegister src);
1777 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1778 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1779 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1780 void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1781
1782 // Add horizontal packed integers
1783 void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1784 void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1785 void phaddw(XMMRegister dst, XMMRegister src);
1786 void phaddd(XMMRegister dst, XMMRegister src);
1787
1788 // Add packed integers
1789 void paddb(XMMRegister dst, XMMRegister src);
1790 void paddw(XMMRegister dst, XMMRegister src);
1791 void paddd(XMMRegister dst, XMMRegister src);
1792 void paddq(XMMRegister dst, XMMRegister src);
1793 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1794 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1795 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1796 void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1797 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1798 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1799 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1800 void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1801
1802 // Sub packed integers
1803 void psubb(XMMRegister dst, XMMRegister src);
1804 void psubw(XMMRegister dst, XMMRegister src);
1805 void psubd(XMMRegister dst, XMMRegister src);
1806 void psubq(XMMRegister dst, XMMRegister src);
1807 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1808 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1809 void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1810 void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1811 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1812 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1813 void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1814 void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1815
1816 // Multiply packed integers (only shorts and ints)
1817 void pmullw(XMMRegister dst, XMMRegister src);
1818 void pmulld(XMMRegister dst, XMMRegister src);
1819 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1820 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1821 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1822 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1823
1824 // Shift left packed integers
1825 void psllw(XMMRegister dst, int shift);
1826 void pslld(XMMRegister dst, int shift);
1827 void psllq(XMMRegister dst, int shift);
1828 void psllw(XMMRegister dst, XMMRegister shift);
1829 void pslld(XMMRegister dst, XMMRegister shift);
1830 void psllq(XMMRegister dst, XMMRegister shift);
1831 void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1832 void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1833 void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1834 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1835 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1836 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1837
1838 // Logical shift right packed integers
1839 void psrlw(XMMRegister dst, int shift);
1840 void psrld(XMMRegister dst, int shift);
1841 void psrlq(XMMRegister dst, int shift);
1842 void psrlw(XMMRegister dst, XMMRegister shift);
1843 void psrld(XMMRegister dst, XMMRegister shift);
1844 void psrlq(XMMRegister dst, XMMRegister shift);
1845 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1846 void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1847 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1848 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1849 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1850 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1851
1852 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
1853 void psraw(XMMRegister dst, int shift);
1854 void psrad(XMMRegister dst, int shift);
1855 void psraw(XMMRegister dst, XMMRegister shift);
1856 void psrad(XMMRegister dst, XMMRegister shift);
1857 void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1858 void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1859 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1860 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1861
1862 // And packed integers
1863 void pand(XMMRegister dst, XMMRegister src);
1864 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1865 void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1866
1867 // Or packed integers
1868 void por(XMMRegister dst, XMMRegister src);
1869 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1870 void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1871
1872 // Xor packed integers
1873 void pxor(XMMRegister dst, XMMRegister src);
1874 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1875 void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1876
1877 // Copy low 128bit into high 128bit of YMM registers.
1878 void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1879 void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1880 void vextractf128h(XMMRegister dst, XMMRegister src);
1881
1882 // Load/store high 128bit of YMM registers which does not destroy other half.
1883 void vinsertf128h(XMMRegister dst, Address src);
1884 void vinserti128h(XMMRegister dst, Address src);
1885 void vextractf128h(Address dst, XMMRegister src);
1886 void vextracti128h(Address dst, XMMRegister src);
1887
1888 // duplicate 4-bytes integer data from src into 8 locations in dest
1889 void vpbroadcastd(XMMRegister dst, XMMRegister src);
1890
1891 // Carry-Less Multiplication Quadword
1892 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
1893 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
1894
1895 // AVX instruction which is used to clear upper 128 bits of YMM registers and
1896 // to avoid transaction penalty between AVX and SSE states. There is no
1897 // penalty if legacy SSE instructions are encoded using VEX prefix because
1898 // they always clear upper 128 bits. It should be used before calling
1899 // runtime code and native libraries.
1900 void vzeroupper();
1901
1902 protected:
1903 // Next instructions require address alignment 16 bytes SSE mode.
1904 // They should be called only from corresponding MacroAssembler instructions.
1905 void andpd(XMMRegister dst, Address src);
1906 void andps(XMMRegister dst, Address src);
1907 void xorpd(XMMRegister dst, Address src);
1908 void xorps(XMMRegister dst, Address src);
1909
1910 };
|
421
422 // x86 can do array addressing as a single operation since disp can be an absolute
423 // address amd64 can't. We create a class that expresses the concept but does extra
424 // magic on amd64 to get the final result
425
426 class ArrayAddress VALUE_OBJ_CLASS_SPEC {
427 private:
428
429 AddressLiteral _base;
430 Address _index;
431
432 public:
433
434 ArrayAddress() {};
435 ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
436 AddressLiteral base() { return _base; }
437 Address index() { return _index; }
438
439 };
440
441 const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512*2 / wordSize);
442
443 // The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
444 // level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
445 // is what you get. The Assembler is generating code into a CodeBuffer.
446
447 class Assembler : public AbstractAssembler {
448 friend class AbstractAssembler; // for the non-virtual hack
449 friend class LIR_Assembler; // as_Address()
450 friend class StubGenerator;
451
452 public:
453 enum Condition { // The x86 condition codes used for conditional jumps/moves.
454 zero = 0x4,
455 notZero = 0x5,
456 equal = 0x4,
457 notEqual = 0x5,
458 less = 0xc,
459 lessEqual = 0xe,
460 greater = 0xf,
461 greaterEqual = 0xd,
486
487 REX_B = 0x41,
488 REX_X = 0x42,
489 REX_XB = 0x43,
490 REX_R = 0x44,
491 REX_RB = 0x45,
492 REX_RX = 0x46,
493 REX_RXB = 0x47,
494
495 REX_W = 0x48,
496
497 REX_WB = 0x49,
498 REX_WX = 0x4A,
499 REX_WXB = 0x4B,
500 REX_WR = 0x4C,
501 REX_WRB = 0x4D,
502 REX_WRX = 0x4E,
503 REX_WRXB = 0x4F,
504
505 VEX_3bytes = 0xC4,
506 VEX_2bytes = 0xC5,
507 EVEX_4bytes = 0x62
508 };
509
510 enum VexPrefix {
511 VEX_B = 0x20,
512 VEX_X = 0x40,
513 VEX_R = 0x80,
514 VEX_W = 0x80
515 };
516
517 enum ExexPrefix {
518 EVEX_F = 0x04,
519 EVEX_V = 0x08,
520 EVEX_Rb = 0x10,
521 EVEX_X = 0x40,
522 EVEX_Z = 0x80
523 };
524
525 enum VexSimdPrefix {
526 VEX_SIMD_NONE = 0x0,
527 VEX_SIMD_66 = 0x1,
528 VEX_SIMD_F3 = 0x2,
529 VEX_SIMD_F2 = 0x3
530 };
531
532 enum VexOpcode {
533 VEX_OPCODE_NONE = 0x0,
534 VEX_OPCODE_0F = 0x1,
535 VEX_OPCODE_0F_38 = 0x2,
536 VEX_OPCODE_0F_3A = 0x3
537 };
538
539 enum AvxVectorLen {
540 AVX_128bit = 0x0,
541 AVX_256bit = 0x1,
542 AVX_512bit = 0x2,
543 AVX_NoVec = 0x4
544 };
545
546 enum EvexTupleType {
547 EVEX_FV = 0,
548 EVEX_HV = 4,
549 EVEX_FVM = 6,
550 EVEX_T1S = 7,
551 EVEX_T1F = 11,
552 EVEX_T2 = 13,
553 EVEX_T4 = 15,
554 EVEX_T8 = 17,
555 EVEX_HVM = 18,
556 EVEX_QVM = 19,
557 EVEX_OVM = 20,
558 EVEX_M128 = 21,
559 EVEX_DUP = 22,
560 EVEX_ETUP = 23
561 };
562
563 enum EvexInputSizeInBits {
564 EVEX_8bit = 0,
565 EVEX_16bit = 1,
566 EVEX_32bit = 2,
567 EVEX_64bit = 3
568 };
569
570 enum WhichOperand {
571 // input to locate_operand, and format code for relocations
572 imm_operand = 0, // embedded 32-bit|64-bit immediate operand
573 disp32_operand = 1, // embedded 32-bit displacement or address
574 call32_operand = 2, // embedded 32-bit self-relative displacement
575 #ifndef _LP64
576 _WhichOperand_limit = 3
577 #else
578 narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop
579 _WhichOperand_limit = 4
580 #endif
581 };
582
583
584
585 // NOTE: The general philopsophy of the declarations here is that 64bit versions
586 // of instructions are freely declared without the need for wrapping them an ifdef.
587 // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
588 // In the .cpp file the implementations are wrapped so that they are dropped out
589 // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
590 // to the size it was prior to merging up the 32bit and 64bit assemblers.
591 //
592 // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
593 // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
594
595 private:
596
597 int evex_encoding;
598 int input_size_in_bits;
599 int avx_vector_len;
600 int tuple_type;
601 bool is_evex_instruction;
602
603 // 64bit prefixes
604 int prefix_and_encode(int reg_enc, bool byteinst = false);
605 int prefixq_and_encode(int reg_enc);
606
607 int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false);
608 int prefixq_and_encode(int dst_enc, int src_enc);
609
610 void prefix(Register reg);
611 void prefix(Address adr);
612 void prefixq(Address adr);
613
614 void prefix(Address adr, Register reg, bool byteinst = false);
615 void prefix(Address adr, XMMRegister reg);
616 void prefixq(Address adr, Register reg);
617 void prefixq(Address adr, XMMRegister reg);
618
619 void prefetch_prefix(Address src);
620
621 void rex_prefix(Address adr, XMMRegister xreg,
622 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
623 int rex_prefix_and_encode(int dst_enc, int src_enc,
624 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
625
626 void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
627 int nds_enc, VexSimdPrefix pre, VexOpcode opc,
628 int vector_len);
629
630 void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
631 int nds_enc, VexSimdPrefix pre, VexOpcode opc,
632 bool is_extended_context, bool is_merge_context,
633 int vector_len, bool no_mask_reg );
634
635 void vex_prefix(Address adr, int nds_enc, int xreg_enc,
636 VexSimdPrefix pre, VexOpcode opc,
637 bool vex_w, int vector_len,
638 bool legacy_mode = false, bool no_mask_reg = false);
639
640 void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
641 VexSimdPrefix pre, int vector_len = AVX_128bit,
642 bool no_mask_reg = false, bool legacy_mode = false) {
643 int dst_enc = dst->encoding();
644 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
645 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, legacy_mode, no_mask_reg);
646 }
647
648 void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
649 VexSimdPrefix pre, int vector_len = AVX_128bit,
650 bool no_mask_reg = false) {
651 int dst_enc = dst->encoding();
652 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
653 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
654 }
655
656 void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) {
657 bool vex_w = false;
658 int vector_len = AVX_128bit;
659 vex_prefix(src, nds->encoding(), dst->encoding(),
660 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
661 vector_len, no_mask_reg);
662 }
663
664 void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) {
665 bool vex_w = true;
666 int vector_len = AVX_128bit;
667 vex_prefix(src, nds->encoding(), dst->encoding(),
668 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
669 vector_len, no_mask_reg);
670 }
671 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
672 VexSimdPrefix pre, VexOpcode opc,
673 bool vex_w, int vector_len,
674 bool legacy_mode, bool no_mask_reg);
675
676 int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) {
677 bool vex_w = false;
678 int vector_len = AVX_128bit;
679 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
680 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
681 false, no_mask_reg);
682 }
683 int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) {
684 bool vex_w = true;
685 int vector_len = AVX_128bit;
686 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
687 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
688 false, no_mask_reg);
689 }
690 int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
691 VexSimdPrefix pre, int vector_len = AVX_128bit,
692 VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false,
693 bool no_mask_reg = false) {
694 int src_enc = src->encoding();
695 int dst_enc = dst->encoding();
696 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
697 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg);
698 }
699
700 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
701 VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F,
702 bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false);
703
704 void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre,
705 bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) {
706 simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc);
707 }
708
709 void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
710 simd_prefix(src, dst, pre, no_mask_reg);
711 }
712 void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
713 VexSimdPrefix pre, bool no_mask_reg = false) {
714 bool rex_w = true;
715 simd_prefix(dst, nds, src, pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
716 }
717
718 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
719 VexSimdPrefix pre, bool no_mask_reg,
720 VexOpcode opc = VEX_OPCODE_0F,
721 bool rex_w = false, int vector_len = AVX_128bit,
722 bool legacy_mode = false);
723
724 int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src,
725 VexSimdPrefix pre, bool no_mask_reg,
726 VexOpcode opc = VEX_OPCODE_0F,
727 bool rex_w = false, int vector_len = AVX_128bit);
728
729 int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src,
730 VexSimdPrefix pre, bool no_mask_reg,
731 VexOpcode opc = VEX_OPCODE_0F,
732 bool rex_w = false, int vector_len = AVX_128bit);
733
734 // Move/convert 32-bit integer value.
735 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
736 VexSimdPrefix pre, bool no_mask_reg) {
737 // It is OK to cast from Register to XMMRegister to pass argument here
738 // since only encoding is used in simd_prefix_and_encode() and number of
739 // Gen and Xmm registers are the same.
740 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F);
741 }
742 int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
743 return simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg);
744 }
745 int simd_prefix_and_encode(Register dst, XMMRegister src,
746 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
747 bool no_mask_reg = false) {
748 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc);
749 }
750
751 // Move/convert 64-bit integer value.
752 int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
753 VexSimdPrefix pre, bool no_mask_reg = false) {
754 bool rex_w = true;
755 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
756 }
757 int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
758 return simd_prefix_and_encode_q(dst, xnoreg, src, pre, no_mask_reg);
759 }
760 int simd_prefix_and_encode_q(Register dst, XMMRegister src,
761 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
762 bool no_mask_reg = false) {
763 bool rex_w = true;
764 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc, rex_w);
765 }
766
767 // Helper functions for groups of instructions
768 void emit_arith_b(int op1, int op2, Register dst, int imm8);
769
770 void emit_arith(int op1, int op2, Register dst, int32_t imm32);
771 // Force generation of a 4 byte immediate value even if it fits into 8bit
772 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
773 void emit_arith(int op1, int op2, Register dst, Register src);
774
775 void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
776 void emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
777 void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
778 void emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
779 void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
780 void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
781 void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
782 void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
783 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
784 Address src, VexSimdPrefix pre, int vector_len,
785 bool no_mask_reg = false, bool legacy_mode = false);
786 void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
787 Address src, VexSimdPrefix pre, int vector_len,
788 bool no_mask_reg = false);
789 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
790 XMMRegister src, VexSimdPrefix pre, int vector_len,
791 bool no_mask_reg = false, bool legacy_mode = false);
792 void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
793 XMMRegister src, VexSimdPrefix pre, int vector_len,
794 bool no_mask_reg = false);
795
796 bool emit_compressed_disp_byte(int &disp);
797
798 void emit_operand(Register reg,
799 Register base, Register index, Address::ScaleFactor scale,
800 int disp,
801 RelocationHolder const& rspec,
802 int rip_relative_correction = 0);
803
804 void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
805
806 // operands that only take the original 32bit registers
807 void emit_operand32(Register reg, Address adr);
808
809 void emit_operand(XMMRegister reg,
810 Register base, Register index, Address::ScaleFactor scale,
811 int disp,
812 RelocationHolder const& rspec);
813
814 void emit_operand(XMMRegister reg, Address adr);
815
816 void emit_operand(MMXRegister reg, Address adr);
902 // Move Scalar Double-Precision Floating-Point Values
903 void movsd(XMMRegister dst, Address src);
904 void movsd(XMMRegister dst, XMMRegister src);
905 void movsd(Address dst, XMMRegister src);
906 void movlpd(XMMRegister dst, Address src);
907
908 // New cpus require use of movaps and movapd to avoid partial register stall
909 // when moving between registers.
910 void movaps(XMMRegister dst, XMMRegister src);
911 void movapd(XMMRegister dst, XMMRegister src);
912
913 // End avoid using directly
914
915
916 // Instruction prefixes
917 void prefix(Prefix p);
918
919 public:
920
921 // Creation
922 Assembler(CodeBuffer* code) : AbstractAssembler(code) {
923 init_attributes();
924 }
925
926 // Decoding
927 static address locate_operand(address inst, WhichOperand which);
928 static address locate_next_instruction(address inst);
929
930 // Utilities
931 static bool is_polling_page_far() NOT_LP64({ return false;});
932 static bool query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
933 int cur_tuple_type, int in_size_in_bits, int cur_encoding);
934
935 // Generic instructions
936 // Does 32bit or 64bit as needed for the platform. In some sense these
937 // belong in macro assembler but there is no need for both varieties to exist
938
939 void init_attributes(void) {
940 evex_encoding = 0;
941 input_size_in_bits = 0;
942 avx_vector_len = AVX_NoVec;
943 tuple_type = EVEX_ETUP;
944 is_evex_instruction = false;
945 }
946
947 void lea(Register dst, Address src);
948
949 void mov(Register dst, Register src);
950
951 void pusha();
952 void popa();
953
954 void pushf();
955 void popf();
956
957 void push(int32_t imm32);
958
959 void push(Register src);
960
961 void pop(Register dst);
962
963 // These are dummies to prevent surprise implicit conversions to Register
964 void push(void* v);
965 void pop(void* v);
966
1427 if (offset < -128) {
1428 offset = -128;
1429 }
1430
1431 lock();
1432 addl(Address(rsp, offset), 0);// Assert the lock# signal here
1433 }
1434 }
1435 }
1436
1437 void mfence();
1438
1439 // Moves
1440
1441 void mov64(Register dst, int64_t imm64);
1442
1443 void movb(Address dst, Register src);
1444 void movb(Address dst, int imm8);
1445 void movb(Register dst, Address src);
1446
1447 void kmovq(KRegister dst, KRegister src);
1448 void kmovql(KRegister dst, Register src);
1449 void kmovdl(KRegister dst, Register src);
1450 void kmovq(Address dst, KRegister src);
1451 void kmovq(KRegister dst, Address src);
1452
1453 void movdl(XMMRegister dst, Register src);
1454 void movdl(Register dst, XMMRegister src);
1455 void movdl(XMMRegister dst, Address src);
1456 void movdl(Address dst, XMMRegister src);
1457
1458 // Move Double Quadword
1459 void movdq(XMMRegister dst, Register src);
1460 void movdq(Register dst, XMMRegister src);
1461
1462 // Move Aligned Double Quadword
1463 void movdqa(XMMRegister dst, XMMRegister src);
1464 void movdqa(XMMRegister dst, Address src);
1465
1466 // Move Unaligned Double Quadword
1467 void movdqu(Address dst, XMMRegister src);
1468 void movdqu(XMMRegister dst, Address src);
1469 void movdqu(XMMRegister dst, XMMRegister src);
1470
1471 // Move Unaligned 256bit Vector
1472 void vmovdqu(Address dst, XMMRegister src);
1473 void vmovdqu(XMMRegister dst, Address src);
1474 void vmovdqu(XMMRegister dst, XMMRegister src);
1475
1476 // Move Unaligned 512bit Vector
1477 void evmovdqu(Address dst, XMMRegister src, int vector_len);
1478 void evmovdqu(XMMRegister dst, Address src, int vector_len);
1479 void evmovdqu(XMMRegister dst, XMMRegister src, int vector_len);
1480
1481 // Move lower 64bit to high 64bit in 128bit register
1482 void movlhps(XMMRegister dst, XMMRegister src);
1483
1484 void movl(Register dst, int32_t imm32);
1485 void movl(Address dst, int32_t imm32);
1486 void movl(Register dst, Register src);
1487 void movl(Register dst, Address src);
1488 void movl(Address dst, Register src);
1489
1490 // These dummies prevent using movl from converting a zero (like NULL) into Register
1491 // by giving the compiler two choices it can't resolve
1492
1493 void movl(Address dst, void* junk);
1494 void movl(Register dst, void* junk);
1495
1496 #ifdef _LP64
1497 void movq(Register dst, Register src);
1498 void movq(Register dst, Address src);
1499 void movq(Address dst, Register src);
1500 #endif
1586
1587 void notl(Register dst);
1588
1589 #ifdef _LP64
1590 void notq(Register dst);
1591 #endif
1592
1593 void orl(Address dst, int32_t imm32);
1594 void orl(Register dst, int32_t imm32);
1595 void orl(Register dst, Address src);
1596 void orl(Register dst, Register src);
1597
1598 void orq(Address dst, int32_t imm32);
1599 void orq(Register dst, int32_t imm32);
1600 void orq(Register dst, Address src);
1601 void orq(Register dst, Register src);
1602
1603 // Pack with unsigned saturation
1604 void packuswb(XMMRegister dst, XMMRegister src);
1605 void packuswb(XMMRegister dst, Address src);
1606 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1607
1608 // Pemutation of 64bit words
1609 void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1610
1611 void pause();
1612
1613 // SSE4.2 string instructions
1614 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1615 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1616
1617 // SSE 4.1 extract
1618 void pextrd(Register dst, XMMRegister src, int imm8);
1619 void pextrq(Register dst, XMMRegister src, int imm8);
1620
1621 // SSE 4.1 insert
1622 void pinsrd(XMMRegister dst, Register src, int imm8);
1623 void pinsrq(XMMRegister dst, Register src, int imm8);
1624
1625 // SSE4.1 packed move
1626 void pmovzxbw(XMMRegister dst, XMMRegister src);
1627 void pmovzxbw(XMMRegister dst, Address src);
1628
1629 #ifndef _LP64 // no 32bit push/pop on amd64
1834 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1835 void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
1836 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1837 void vdivss(XMMRegister dst, XMMRegister nds, Address src);
1838 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1839 void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
1840 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1841 void vmulss(XMMRegister dst, XMMRegister nds, Address src);
1842 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1843 void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
1844 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1845 void vsubss(XMMRegister dst, XMMRegister nds, Address src);
1846 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1847
1848
1849 //====================VECTOR ARITHMETIC=====================================
1850
1851 // Add Packed Floating-Point Values
1852 void addpd(XMMRegister dst, XMMRegister src);
1853 void addps(XMMRegister dst, XMMRegister src);
1854 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1855 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1856 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1857 void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1858
1859 // Subtract Packed Floating-Point Values
1860 void subpd(XMMRegister dst, XMMRegister src);
1861 void subps(XMMRegister dst, XMMRegister src);
1862 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1863 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1864 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1865 void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1866
1867 // Multiply Packed Floating-Point Values
1868 void mulpd(XMMRegister dst, XMMRegister src);
1869 void mulps(XMMRegister dst, XMMRegister src);
1870 void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1871 void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1872 void vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1873 void vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1874
1875 // Divide Packed Floating-Point Values
1876 void divpd(XMMRegister dst, XMMRegister src);
1877 void divps(XMMRegister dst, XMMRegister src);
1878 void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1879 void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1880 void vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1881 void vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1882
1883 // Bitwise Logical AND of Packed Floating-Point Values
1884 void andpd(XMMRegister dst, XMMRegister src);
1885 void andps(XMMRegister dst, XMMRegister src);
1886 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1887 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1888 void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1889 void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1890
1891 // Bitwise Logical XOR of Packed Floating-Point Values
1892 void xorpd(XMMRegister dst, XMMRegister src);
1893 void xorps(XMMRegister dst, XMMRegister src);
1894 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1895 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1896 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1897 void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1898
1899 // Add horizontal packed integers
1900 void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1901 void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1902 void phaddw(XMMRegister dst, XMMRegister src);
1903 void phaddd(XMMRegister dst, XMMRegister src);
1904
1905 // Add packed integers
1906 void paddb(XMMRegister dst, XMMRegister src);
1907 void paddw(XMMRegister dst, XMMRegister src);
1908 void paddd(XMMRegister dst, XMMRegister src);
1909 void paddq(XMMRegister dst, XMMRegister src);
1910 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1911 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1912 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1913 void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1914 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1915 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1916 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1917 void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1918
1919 // Sub packed integers
1920 void psubb(XMMRegister dst, XMMRegister src);
1921 void psubw(XMMRegister dst, XMMRegister src);
1922 void psubd(XMMRegister dst, XMMRegister src);
1923 void psubq(XMMRegister dst, XMMRegister src);
1924 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1925 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1926 void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1927 void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1928 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1929 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1930 void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1931 void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1932
1933 // Multiply packed integers (only shorts and ints)
1934 void pmullw(XMMRegister dst, XMMRegister src);
1935 void pmulld(XMMRegister dst, XMMRegister src);
1936 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1937 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1938 void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1939 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1940 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1941 void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1942
1943 // Shift left packed integers
1944 void psllw(XMMRegister dst, int shift);
1945 void pslld(XMMRegister dst, int shift);
1946 void psllq(XMMRegister dst, int shift);
1947 void psllw(XMMRegister dst, XMMRegister shift);
1948 void pslld(XMMRegister dst, XMMRegister shift);
1949 void psllq(XMMRegister dst, XMMRegister shift);
1950 void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
1951 void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
1952 void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
1953 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
1954 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
1955 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
1956
1957 // Logical shift right packed integers
1958 void psrlw(XMMRegister dst, int shift);
1959 void psrld(XMMRegister dst, int shift);
1960 void psrlq(XMMRegister dst, int shift);
1961 void psrlw(XMMRegister dst, XMMRegister shift);
1962 void psrld(XMMRegister dst, XMMRegister shift);
1963 void psrlq(XMMRegister dst, XMMRegister shift);
1964 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
1965 void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
1966 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
1967 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
1968 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
1969 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
1970
1971 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
1972 void psraw(XMMRegister dst, int shift);
1973 void psrad(XMMRegister dst, int shift);
1974 void psraw(XMMRegister dst, XMMRegister shift);
1975 void psrad(XMMRegister dst, XMMRegister shift);
1976 void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
1977 void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
1978 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
1979 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
1980
1981 // And packed integers
1982 void pand(XMMRegister dst, XMMRegister src);
1983 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1984 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1985
1986 // Or packed integers
1987 void por(XMMRegister dst, XMMRegister src);
1988 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1989 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1990
1991 // Xor packed integers
1992 void pxor(XMMRegister dst, XMMRegister src);
1993 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1994 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1995
1996 // Copy low 128bit into high 128bit of YMM registers.
1997 void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1998 void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1999 void vextractf128h(XMMRegister dst, XMMRegister src);
2000 void vextracti128h(XMMRegister dst, XMMRegister src);
2001
2002 // Load/store high 128bit of YMM registers which does not destroy other half.
2003 void vinsertf128h(XMMRegister dst, Address src);
2004 void vinserti128h(XMMRegister dst, Address src);
2005 void vextractf128h(Address dst, XMMRegister src);
2006 void vextracti128h(Address dst, XMMRegister src);
2007
2008 // Copy low 256bit into high 256bit of ZMM registers.
2009 void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2010 void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2011 void vextracti64x4h(XMMRegister dst, XMMRegister src);
2012 void vextractf64x4h(XMMRegister dst, XMMRegister src);
2013 void vextractf64x4h(Address dst, XMMRegister src);
2014 void vinsertf64x4h(XMMRegister dst, Address src);
2015
2016 // Copy targeted 128bit segments of the ZMM registers
2017 void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
2018 void vextractf64x2h(XMMRegister dst, XMMRegister src, int value);
2019 void vextractf32x4h(XMMRegister dst, XMMRegister src, int value);
2020
2021 // duplicate 4-bytes integer data from src into 8 locations in dest
2022 void vpbroadcastd(XMMRegister dst, XMMRegister src);
2023
2024 // duplicate 4-bytes integer data from src into vector_len locations in dest
2025 void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2026
2027 // Carry-Less Multiplication Quadword
2028 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2029 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2030
2031 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2032 // to avoid transaction penalty between AVX and SSE states. There is no
2033 // penalty if legacy SSE instructions are encoded using VEX prefix because
2034 // they always clear upper 128 bits. It should be used before calling
2035 // runtime code and native libraries.
2036 void vzeroupper();
2037
2038 protected:
2039 // Next instructions require address alignment 16 bytes SSE mode.
2040 // They should be called only from corresponding MacroAssembler instructions.
2041 void andpd(XMMRegister dst, Address src);
2042 void andps(XMMRegister dst, Address src);
2043 void xorpd(XMMRegister dst, Address src);
2044 void xorps(XMMRegister dst, Address src);
2045
2046 };
|