603 EVEX_8bit = 0,
604 EVEX_16bit = 1,
605 EVEX_32bit = 2,
606 EVEX_64bit = 3,
607 EVEX_NObit = 4
608 };
609
610 enum WhichOperand {
611 // input to locate_operand, and format code for relocations
612 imm_operand = 0, // embedded 32-bit|64-bit immediate operand
613 disp32_operand = 1, // embedded 32-bit displacement or address
614 call32_operand = 2, // embedded 32-bit self-relative displacement
615 #ifndef _LP64
616 _WhichOperand_limit = 3
617 #else
618 narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop
619 _WhichOperand_limit = 4
620 #endif
621 };
622
623 enum ComparisonPredicate {
624 eq = 0,
625 lt = 1,
626 le = 2,
627 _false = 3,
628 neq = 4,
629 nlt = 5,
630 nle = 6,
631 _true = 7
632 };
633
634 //---< calculate length of instruction >---
635 // As instruction size can't be found out easily on x86/x64,
636 // we just use '4' for len and maxlen.
637 // instruction must start at passed address
638 static unsigned int instr_len(unsigned char *instr) { return 4; }
639
640 //---< longest instructions >---
641 // Max instruction length is not specified in architecture documentation.
642 // We could use a "safe enough" estimate (15), but just default to
643 // instruction length guess from above.
644 static unsigned int instr_maxlen() { return 4; }
645
646 // NOTE: The general philopsophy of the declarations here is that 64bit versions
647 // of instructions are freely declared without the need for wrapping them an ifdef.
648 // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
649 // In the .cpp file the implementations are wrapped so that they are dropped out
650 // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
651 // to the size it was prior to merging up the 32bit and 64bit assemblers.
652 //
653 // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
933 void rep_stos();
934 void rep_stosb();
935 void repne_scan();
936 #ifdef _LP64
937 void repne_scanl();
938 #endif
939
940 // Vanilla instructions in lexical order
941
942 void adcl(Address dst, int32_t imm32);
943 void adcl(Address dst, Register src);
944 void adcl(Register dst, int32_t imm32);
945 void adcl(Register dst, Address src);
946 void adcl(Register dst, Register src);
947
948 void adcq(Register dst, int32_t imm32);
949 void adcq(Register dst, Address src);
950 void adcq(Register dst, Register src);
951
952 void addb(Address dst, int imm8);
953 void addw(Address dst, int imm16);
954
955 void addl(Address dst, int32_t imm32);
956 void addl(Address dst, Register src);
957 void addl(Register dst, int32_t imm32);
958 void addl(Register dst, Address src);
959 void addl(Register dst, Register src);
960
961 void addq(Address dst, int32_t imm32);
962 void addq(Address dst, Register src);
963 void addq(Register dst, int32_t imm32);
964 void addq(Register dst, Address src);
965 void addq(Register dst, Register src);
966
967 #ifdef _LP64
968 //Add Unsigned Integers with Carry Flag
969 void adcxq(Register dst, Register src);
970
971 //Add Unsigned Integers with Overflow Flag
972 void adoxq(Register dst, Register src);
983
984 // Add Scalar Single-Precision Floating-Point Values
985 void addss(XMMRegister dst, Address src);
986 void addss(XMMRegister dst, XMMRegister src);
987
988 // AES instructions
989 void aesdec(XMMRegister dst, Address src);
990 void aesdec(XMMRegister dst, XMMRegister src);
991 void aesdeclast(XMMRegister dst, Address src);
992 void aesdeclast(XMMRegister dst, XMMRegister src);
993 void aesenc(XMMRegister dst, Address src);
994 void aesenc(XMMRegister dst, XMMRegister src);
995 void aesenclast(XMMRegister dst, Address src);
996 void aesenclast(XMMRegister dst, XMMRegister src);
997 // Vector AES instructions
998 void vaesenc(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
999 void vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1000 void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1001 void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1002
1003 void andl(Address dst, int32_t imm32);
1004 void andl(Register dst, int32_t imm32);
1005 void andl(Register dst, Address src);
1006 void andl(Register dst, Register src);
1007
1008 void andq(Address dst, int32_t imm32);
1009 void andq(Register dst, int32_t imm32);
1010 void andq(Register dst, Address src);
1011 void andq(Register dst, Register src);
1012
1013 // BMI instructions
1014 void andnl(Register dst, Register src1, Register src2);
1015 void andnl(Register dst, Register src1, Address src2);
1016 void andnq(Register dst, Register src1, Register src2);
1017 void andnq(Register dst, Register src1, Address src2);
1018
1019 void blsil(Register dst, Register src);
1020 void blsil(Register dst, Address src);
1021 void blsiq(Register dst, Register src);
1022 void blsiq(Register dst, Address src);
1108 void crc32(Register crc, Address adr, int8_t sizeInBytes);
1109
1110 // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
1111 void cvtsd2ss(XMMRegister dst, XMMRegister src);
1112 void cvtsd2ss(XMMRegister dst, Address src);
1113
1114 // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
1115 void cvtsi2sdl(XMMRegister dst, Register src);
1116 void cvtsi2sdl(XMMRegister dst, Address src);
1117 void cvtsi2sdq(XMMRegister dst, Register src);
1118 void cvtsi2sdq(XMMRegister dst, Address src);
1119
1120 // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
1121 void cvtsi2ssl(XMMRegister dst, Register src);
1122 void cvtsi2ssl(XMMRegister dst, Address src);
1123 void cvtsi2ssq(XMMRegister dst, Register src);
1124 void cvtsi2ssq(XMMRegister dst, Address src);
1125
1126 // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
1127 void cvtdq2pd(XMMRegister dst, XMMRegister src);
1128
1129 // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
1130 void cvtdq2ps(XMMRegister dst, XMMRegister src);
1131
1132 // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
1133 void cvtss2sd(XMMRegister dst, XMMRegister src);
1134 void cvtss2sd(XMMRegister dst, Address src);
1135
1136 // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
1137 void cvttsd2sil(Register dst, Address src);
1138 void cvttsd2sil(Register dst, XMMRegister src);
1139 void cvttsd2siq(Register dst, Address src);
1140 void cvttsd2siq(Register dst, XMMRegister src);
1141
1142 // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
1143 void cvttss2sil(Register dst, XMMRegister src);
1144 void cvttss2siq(Register dst, XMMRegister src);
1145
1146 void cvttpd2dq(XMMRegister dst, XMMRegister src);
1147
1148 //Abs of packed Integer values
1149 void pabsb(XMMRegister dst, XMMRegister src);
1150 void pabsw(XMMRegister dst, XMMRegister src);
1151 void pabsd(XMMRegister dst, XMMRegister src);
1152 void vpabsb(XMMRegister dst, XMMRegister src, int vector_len);
1153 void vpabsw(XMMRegister dst, XMMRegister src, int vector_len);
1154 void vpabsd(XMMRegister dst, XMMRegister src, int vector_len);
1155 void evpabsq(XMMRegister dst, XMMRegister src, int vector_len);
1156
1157 // Divide Scalar Double-Precision Floating-Point Values
1158 void divsd(XMMRegister dst, Address src);
1159 void divsd(XMMRegister dst, XMMRegister src);
1160
1161 // Divide Scalar Single-Precision Floating-Point Values
1162 void divss(XMMRegister dst, Address src);
1163 void divss(XMMRegister dst, XMMRegister src);
1164
1165
1166 #ifndef _LP64
1167 private:
1487
1488 // Move Double Quadword
1489 void movdq(XMMRegister dst, Register src);
1490 void movdq(Register dst, XMMRegister src);
1491
1492 // Move Aligned Double Quadword
1493 void movdqa(XMMRegister dst, XMMRegister src);
1494 void movdqa(XMMRegister dst, Address src);
1495
1496 // Move Unaligned Double Quadword
1497 void movdqu(Address dst, XMMRegister src);
1498 void movdqu(XMMRegister dst, Address src);
1499 void movdqu(XMMRegister dst, XMMRegister src);
1500
1501 // Move Unaligned 256bit Vector
1502 void vmovdqu(Address dst, XMMRegister src);
1503 void vmovdqu(XMMRegister dst, Address src);
1504 void vmovdqu(XMMRegister dst, XMMRegister src);
1505
1506 // Move Unaligned 512bit Vector
1507 void evmovdqub(Address dst, XMMRegister src, int vector_len);
1508 void evmovdqub(XMMRegister dst, Address src, int vector_len);
1509 void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
1510 void evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len);
1511 void evmovdquw(Address dst, XMMRegister src, int vector_len);
1512 void evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len);
1513 void evmovdquw(XMMRegister dst, Address src, int vector_len);
1514 void evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1515 void evmovdqul(Address dst, XMMRegister src, int vector_len);
1516 void evmovdqul(XMMRegister dst, Address src, int vector_len);
1517 void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
1518 void evmovdquq(Address dst, XMMRegister src, int vector_len);
1519 void evmovdquq(XMMRegister dst, Address src, int vector_len);
1520 void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
1521
1522 // Move lower 64bit to high 64bit in 128bit register
1523 void movlhps(XMMRegister dst, XMMRegister src);
1524
1525 void movl(Register dst, int32_t imm32);
1526 void movl(Address dst, int32_t imm32);
1527 void movl(Register dst, Register src);
1528 void movl(Register dst, Address src);
1529 void movl(Address dst, Register src);
1530
1531 // These dummies prevent using movl from converting a zero (like NULL) into Register
1532 // by giving the compiler two choices it can't resolve
1533
1534 void movl(Address dst, void* junk);
1535 void movl(Register dst, void* junk);
1536
1537 #ifdef _LP64
1538 void movq(Register dst, Register src);
1539 void movq(Register dst, Address src);
1540 void movq(Address dst, Register src);
1541
1542 // These dummies prevent using movq from converting a zero (like NULL) into Register
1543 // by giving the compiler two choices it can't resolve
1544
1545 void movq(Address dst, void* dummy);
1546 void movq(Register dst, void* dummy);
1547 #endif
1548
1549 // Move Quadword
1550 void movq(Address dst, XMMRegister src);
1551 void movq(XMMRegister dst, Address src);
1552
1553 void movsbl(Register dst, Address src);
1554 void movsbl(Register dst, Register src);
1555
1556 #ifdef _LP64
1557 void movsbq(Register dst, Address src);
1558 void movsbq(Register dst, Register src);
1559
1560 // Move signed 32bit immediate to 64bit extending sign
1561 void movslq(Address dst, int32_t imm64);
1562 void movslq(Register dst, int32_t imm64);
1563
1564 void movslq(Register dst, Address src);
1565 void movslq(Register dst, Register src);
1566 void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous
1567 #endif
1568
1569 void movswl(Register dst, Address src);
1570 void movswl(Register dst, Register src);
1571
1612 void mulss(XMMRegister dst, Address src);
1613 void mulss(XMMRegister dst, XMMRegister src);
1614
1615 void negl(Register dst);
1616
1617 #ifdef _LP64
1618 void negq(Register dst);
1619 #endif
1620
1621 void nop(int i = 1);
1622
1623 void notl(Register dst);
1624
1625 #ifdef _LP64
1626 void notq(Register dst);
1627
1628 void btsq(Address dst, int imm8);
1629 void btrq(Address dst, int imm8);
1630 #endif
1631
1632 void orl(Address dst, int32_t imm32);
1633 void orl(Register dst, int32_t imm32);
1634 void orl(Register dst, Address src);
1635 void orl(Register dst, Register src);
1636 void orl(Address dst, Register src);
1637
1638 void orb(Address dst, int imm8);
1639
1640 void orq(Address dst, int32_t imm32);
1641 void orq(Register dst, int32_t imm32);
1642 void orq(Register dst, Address src);
1643 void orq(Register dst, Register src);
1644
1645 // Pack with unsigned saturation
1646 void packuswb(XMMRegister dst, XMMRegister src);
1647 void packuswb(XMMRegister dst, Address src);
1648 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1649
1650 // Pemutation of 64bit words
1651 void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1652 void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1653 void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1654 void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1655 void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1656 void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1657
1658 void pause();
1659
1660 // Undefined Instruction
1661 void ud2();
1662
1663 // SSE4.2 string instructions
1664 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1665 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1666
1667 void pcmpeqb(XMMRegister dst, XMMRegister src);
1668 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1669 void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1670 void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1671 void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1672
1673 void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1674 void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1675
1676 void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
1677 void evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate of, int vector_len);
1678 void evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len);
1679
1680 void pcmpeqw(XMMRegister dst, XMMRegister src);
1681 void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1682 void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1683 void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1684
1685 void pcmpeqd(XMMRegister dst, XMMRegister src);
1686 void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1687 void evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1688 void evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1689
1690 void pcmpeqq(XMMRegister dst, XMMRegister src);
1691 void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1692 void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1693 void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1694
1695 void pmovmskb(Register dst, XMMRegister src);
1696 void vpmovmskb(Register dst, XMMRegister src);
1697
1698 // SSE 4.1 extract
1699 void pextrd(Register dst, XMMRegister src, int imm8);
1700 void pextrq(Register dst, XMMRegister src, int imm8);
1701 void pextrd(Address dst, XMMRegister src, int imm8);
1702 void pextrq(Address dst, XMMRegister src, int imm8);
1703 void pextrb(Address dst, XMMRegister src, int imm8);
1704 // SSE 2 extract
1705 void pextrw(Register dst, XMMRegister src, int imm8);
1706 void pextrw(Address dst, XMMRegister src, int imm8);
1707
1708 // SSE 4.1 insert
1709 void pinsrd(XMMRegister dst, Register src, int imm8);
1710 void pinsrq(XMMRegister dst, Register src, int imm8);
1711 void pinsrd(XMMRegister dst, Address src, int imm8);
1712 void pinsrq(XMMRegister dst, Address src, int imm8);
1713 void pinsrb(XMMRegister dst, Address src, int imm8);
1714 // SSE 2 insert
1715 void pinsrw(XMMRegister dst, Register src, int imm8);
1716 void pinsrw(XMMRegister dst, Address src, int imm8);
1717
1718 // SSE4.1 packed move
1719 void pmovzxbw(XMMRegister dst, XMMRegister src);
1720 void pmovzxbw(XMMRegister dst, Address src);
1721
1722 void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
1723 void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
1724 void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1725
1726 void evpmovwb(Address dst, XMMRegister src, int vector_len);
1727 void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
1728
1729 void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);
1730
1731 void evpmovdb(Address dst, XMMRegister src, int vector_len);
1732
1733 // Sign extend moves
1734 void pmovsxbw(XMMRegister dst, XMMRegister src);
1735 void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
1736
1737 // Multiply add
1738 void pmaddwd(XMMRegister dst, XMMRegister src);
1739 void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1740 // Multiply add accumulate
1741 void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1742
1743 #ifndef _LP64 // no 32bit push/pop on amd64
1744 void popl(Address dst);
1745 #endif
1746
1747 #ifdef _LP64
1748 void popq(Address dst);
1749 #endif
1750
1751 void popcntl(Register dst, Address src);
1752 void popcntl(Register dst, Register src);
1753
1754 void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1755
1756 #ifdef _LP64
1760
1761 // Prefetches (SSE, SSE2, 3DNOW only)
1762
1763 void prefetchnta(Address src);
1764 void prefetchr(Address src);
1765 void prefetcht0(Address src);
1766 void prefetcht1(Address src);
1767 void prefetcht2(Address src);
1768 void prefetchw(Address src);
1769
1770 // Shuffle Bytes
1771 void pshufb(XMMRegister dst, XMMRegister src);
1772 void pshufb(XMMRegister dst, Address src);
1773 void vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1774
1775 // Shuffle Packed Doublewords
1776 void pshufd(XMMRegister dst, XMMRegister src, int mode);
1777 void pshufd(XMMRegister dst, Address src, int mode);
1778 void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len);
1779
1780 // Shuffle Packed Low Words
1781 void pshuflw(XMMRegister dst, XMMRegister src, int mode);
1782 void pshuflw(XMMRegister dst, Address src, int mode);
1783
1784 // Shuffle packed values at 128 bit granularity
1785 void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
1786
1787 // Shift Right by bytes Logical DoubleQuadword Immediate
1788 void psrldq(XMMRegister dst, int shift);
1789 // Shift Left by bytes Logical DoubleQuadword Immediate
1790 void pslldq(XMMRegister dst, int shift);
1791
1792 // Logical Compare 128bit
1793 void ptest(XMMRegister dst, XMMRegister src);
1794 void ptest(XMMRegister dst, Address src);
1795 // Logical Compare 256bit
1796 void vptest(XMMRegister dst, XMMRegister src);
1797 void vptest(XMMRegister dst, Address src);
1798
1799 // Interleave Low Bytes
1800 void punpcklbw(XMMRegister dst, XMMRegister src);
1801 void punpcklbw(XMMRegister dst, Address src);
1802
1803 // Interleave Low Doublewords
1804 void punpckldq(XMMRegister dst, XMMRegister src);
1805 void punpckldq(XMMRegister dst, Address src);
1806
1807 // Interleave Low Quadwords
1808 void punpcklqdq(XMMRegister dst, XMMRegister src);
1809
1810 #ifndef _LP64 // no 32bit push/pop on amd64
1811 void pushl(Address src);
1812 #endif
1813
1814 void pushq(Address src);
1815
1816 void rcll(Register dst, int imm8);
1817
1818 void rclq(Register dst, int imm8);
1841 void sarq(Register dst, int imm8);
1842 void sarq(Register dst);
1843
1844 void sbbl(Address dst, int32_t imm32);
1845 void sbbl(Register dst, int32_t imm32);
1846 void sbbl(Register dst, Address src);
1847 void sbbl(Register dst, Register src);
1848
1849 void sbbq(Address dst, int32_t imm32);
1850 void sbbq(Register dst, int32_t imm32);
1851 void sbbq(Register dst, Address src);
1852 void sbbq(Register dst, Register src);
1853
1854 void setb(Condition cc, Register dst);
1855
1856 void palignr(XMMRegister dst, XMMRegister src, int imm8);
1857 void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
1858 void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
1859
1860 void pblendw(XMMRegister dst, XMMRegister src, int imm8);
1861
1862 void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
1863 void sha1nexte(XMMRegister dst, XMMRegister src);
1864 void sha1msg1(XMMRegister dst, XMMRegister src);
1865 void sha1msg2(XMMRegister dst, XMMRegister src);
1866 // xmm0 is implicit additional source to the following instruction.
1867 void sha256rnds2(XMMRegister dst, XMMRegister src);
1868 void sha256msg1(XMMRegister dst, XMMRegister src);
1869 void sha256msg2(XMMRegister dst, XMMRegister src);
1870
1871 void shldl(Register dst, Register src);
1872 void shldl(Register dst, Register src, int8_t imm8);
1873 void shrdl(Register dst, Register src);
1874 void shrdl(Register dst, Register src, int8_t imm8);
1875
1876 void shll(Register dst, int imm8);
1877 void shll(Register dst);
1878
1879 void shlq(Register dst, int imm8);
1880 void shlq(Register dst);
1959 void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none);
1960
1961 void xchgb(Register reg, Address adr);
1962 void xchgw(Register reg, Address adr);
1963 void xchgl(Register reg, Address adr);
1964 void xchgl(Register dst, Register src);
1965
1966 void xchgq(Register reg, Address adr);
1967 void xchgq(Register dst, Register src);
1968
1969 void xend();
1970
1971 // Get Value of Extended Control Register
1972 void xgetbv();
1973
1974 void xorl(Register dst, int32_t imm32);
1975 void xorl(Register dst, Address src);
1976 void xorl(Register dst, Register src);
1977
1978 void xorb(Register dst, Address src);
1979
1980 void xorq(Register dst, Address src);
1981 void xorq(Register dst, Register src);
1982
1983 void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
1984
1985 // AVX 3-operands scalar instructions (encoded with VEX prefix)
1986
1987 void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
1988 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1989 void vaddss(XMMRegister dst, XMMRegister nds, Address src);
1990 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1991 void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
1992 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1993 void vdivss(XMMRegister dst, XMMRegister nds, Address src);
1994 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1995 void vfmadd231sd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1996 void vfmadd231ss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1997 void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
1998 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1999 void vmulss(XMMRegister dst, XMMRegister nds, Address src);
2000 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2001 void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
2002 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2003 void vsubss(XMMRegister dst, XMMRegister nds, Address src);
2004 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2005
2006 void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2007 void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2008 void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2009 void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2010
2011 void shlxl(Register dst, Register src1, Register src2);
2012 void shlxq(Register dst, Register src1, Register src2);
2013
2014 //====================VECTOR ARITHMETIC=====================================
2015
2016 // Add Packed Floating-Point Values
2017 void addpd(XMMRegister dst, XMMRegister src);
2018 void addpd(XMMRegister dst, Address src);
2019 void addps(XMMRegister dst, XMMRegister src);
2020 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2021 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2022 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2023 void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2024
2025 // Subtract Packed Floating-Point Values
2026 void subpd(XMMRegister dst, XMMRegister src);
2027 void subps(XMMRegister dst, XMMRegister src);
2028 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2029 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2030 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2031 void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2032
2033 // Multiply Packed Floating-Point Values
2034 void mulpd(XMMRegister dst, XMMRegister src);
2104 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2105 void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2106
2107 // Sub packed integers
2108 void psubb(XMMRegister dst, XMMRegister src);
2109 void psubw(XMMRegister dst, XMMRegister src);
2110 void psubd(XMMRegister dst, XMMRegister src);
2111 void psubq(XMMRegister dst, XMMRegister src);
2112 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2113 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2114 void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2115 void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2116 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2117 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2118 void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2119 void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2120
2121 // Multiply packed integers (only shorts and ints)
2122 void pmullw(XMMRegister dst, XMMRegister src);
2123 void pmulld(XMMRegister dst, XMMRegister src);
2124 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2125 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2126 void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2127 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2128 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2129 void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2130
2131 // Shift left packed integers
2132 void psllw(XMMRegister dst, int shift);
2133 void pslld(XMMRegister dst, int shift);
2134 void psllq(XMMRegister dst, int shift);
2135 void psllw(XMMRegister dst, XMMRegister shift);
2136 void pslld(XMMRegister dst, XMMRegister shift);
2137 void psllq(XMMRegister dst, XMMRegister shift);
2138 void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2139 void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2140 void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2141 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2142 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2143 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2144 void vpslldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2145
2146 // Logical shift right packed integers
2147 void psrlw(XMMRegister dst, int shift);
2148 void psrld(XMMRegister dst, int shift);
2149 void psrlq(XMMRegister dst, int shift);
2150 void psrlw(XMMRegister dst, XMMRegister shift);
2152 void psrlq(XMMRegister dst, XMMRegister shift);
2153 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2154 void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2155 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2156 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2157 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2158 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2159 void vpsrldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2160 void evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2161 void evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2162
2163 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2164 void psraw(XMMRegister dst, int shift);
2165 void psrad(XMMRegister dst, int shift);
2166 void psraw(XMMRegister dst, XMMRegister shift);
2167 void psrad(XMMRegister dst, XMMRegister shift);
2168 void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2169 void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2170 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2171 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2172 void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2173 void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2174
2175 void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2176 void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2177
2178 // And packed integers
2179 void pand(XMMRegister dst, XMMRegister src);
2180 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2181 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2182 void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2183
2184 // Andn packed integers
2185 void pandn(XMMRegister dst, XMMRegister src);
2186 void vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2187
2188 // Or packed integers
2189 void por(XMMRegister dst, XMMRegister src);
2190 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2191 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2192 void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2193
2194 // Xor packed integers
2195 void pxor(XMMRegister dst, XMMRegister src);
2196 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2197 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2198 void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2199 void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2200
2201
2202 // vinserti forms
2203 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2204 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2205 void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2206 void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2207 void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2208
2209 // vinsertf forms
2210 void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2211 void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2212 void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2213 void vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2214 void vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2215 void vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2216
2217 // vextracti forms
2218 void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8);
2219 void vextracti128(Address dst, XMMRegister src, uint8_t imm8);
2220 void vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2221 void vextracti32x4(Address dst, XMMRegister src, uint8_t imm8);
2240 void vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2241 void vpbroadcastd(XMMRegister dst, Address src, int vector_len);
2242 void vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
2243 void vpbroadcastq(XMMRegister dst, Address src, int vector_len);
2244
2245 void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len);
2246 void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);
2247
2248 // scalar single/double precision replicate
2249 void vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
2250 void vbroadcastss(XMMRegister dst, Address src, int vector_len);
2251 void vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
2252 void vbroadcastsd(XMMRegister dst, Address src, int vector_len);
2253
2254 // gpr sourced byte/word/dword/qword replicate
2255 void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2256 void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2257 void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2258 void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
2259
2260 void evpgatherdd(XMMRegister dst, KRegister k1, Address src, int vector_len);
2261
2262 // Carry-Less Multiplication Quadword
2263 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2264 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2265 void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
2266 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2267 // to avoid transaction penalty between AVX and SSE states. There is no
2268 // penalty if legacy SSE instructions are encoded using VEX prefix because
2269 // they always clear upper 128 bits. It should be used before calling
2270 // runtime code and native libraries.
2271 void vzeroupper();
2272
2273 // AVX support for vectorized conditional move (float/double). The following two instructions used only coupled.
2274 void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2275 void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2276 void cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2277 void blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2278 void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2279
2280 protected:
2281 // Next instructions require address alignment 16 bytes SSE mode.
2282 // They should be called only from corresponding MacroAssembler instructions.
2283 void andpd(XMMRegister dst, Address src);
2284 void andps(XMMRegister dst, Address src);
2285 void xorpd(XMMRegister dst, Address src);
2286 void xorps(XMMRegister dst, Address src);
2287
2288 };
2289
2290 // The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
2291 // Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
2292 // are applied.
2293 class InstructionAttr {
2294 public:
2295 InstructionAttr(
2296 int vector_len, // The length of vector to be applied in encoding - for both AVX and EVEX
2297 bool rex_vex_w, // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
2298 bool legacy_mode, // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
2299 bool no_reg_mask, // when true, k0 is used when EVEX encoding is chosen, else embedded_opmask_register_specifier is used
2355 int get_embedded_opmask_register_specifier(void) const { return _embedded_opmask_register_specifier; }
2356
2357 // Set the vector len manually
2358 void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
2359
2360 // Set revert rex_vex_w for avx encoding
2361 void set_rex_vex_w_reverted(void) { _rex_vex_w_reverted = true; }
2362
2363 // Set rex_vex_w based on state
2364 void set_rex_vex_w(bool state) { _rex_vex_w = state; }
2365
2366 // Set the instruction to be encoded in AVX mode
2367 void set_is_legacy_mode(void) { _legacy_mode = true; }
2368
2369 // Set the current instuction to be encoded as an EVEX instuction
2370 void set_is_evex_instruction(void) { _is_evex_instruction = true; }
2371
2372 // Internal encoding data used in compressed immediate offset programming
2373 void set_evex_encoding(int value) { _evex_encoding = value; }
2374
2375 // Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components
2376 void reset_is_clear_context(void) { _is_clear_context = false; }
2377
2378 // Map back to current asembler so that we can manage object level assocation
2379 void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
2380
2381 // Address modifiers used for compressed displacement calculation
2382 void set_address_attributes(int tuple_type, int input_size_in_bits) {
2383 if (VM_Version::supports_evex()) {
2384 _tuple_type = tuple_type;
2385 _input_size_in_bits = input_size_in_bits;
2386 }
2387 }
2388
2389 // Set embedded opmask register specifier.
2390 void set_embedded_opmask_register_specifier(KRegister mask) {
2391 _embedded_opmask_register_specifier = (*mask).encoding() & 0x7;
2392 }
2393
2394 };
2395
|
603 EVEX_8bit = 0,
604 EVEX_16bit = 1,
605 EVEX_32bit = 2,
606 EVEX_64bit = 3,
607 EVEX_NObit = 4
608 };
609
610 enum WhichOperand {
611 // input to locate_operand, and format code for relocations
612 imm_operand = 0, // embedded 32-bit|64-bit immediate operand
613 disp32_operand = 1, // embedded 32-bit displacement or address
614 call32_operand = 2, // embedded 32-bit self-relative displacement
615 #ifndef _LP64
616 _WhichOperand_limit = 3
617 #else
618 narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop
619 _WhichOperand_limit = 4
620 #endif
621 };
622
623 // Comparison predicates for integral types & FP types when using SSE
624 enum ComparisonPredicate {
625 eq = 0,
626 lt = 1,
627 le = 2,
628 _false = 3,
629 neq = 4,
630 nlt = 5,
631 nle = 6,
632 _true = 7
633 };
634
635 // Comparison predicates for FP types when using AVX
636 // O means ordered. U is unordered. When using ordered, any NaN comparison is false. Otherwise, it is true.
637 // S means signaling. Q means non-signaling. When signaling is true, instruction signals #IA on NaN.
638 enum ComparisonPredicateFP {
639 EQ_OQ = 0,
640 LT_OS = 1,
641 LE_OS = 2,
642 UNORD_Q = 3,
643 NEQ_UQ = 4,
644 NLT_US = 5,
645 NLE_US = 6,
646 ORD_Q = 7,
647 EQ_UQ = 8,
648 NGE_US = 9,
649 NGT_US = 0xA,
650 FALSE_OQ = 0XB,
651 NEQ_OQ = 0xC,
652 GE_OS = 0xD,
653 GT_OS = 0xE,
654 TRUE_UQ = 0xF,
655 EQ_OS = 0x10,
656 LT_OQ = 0x11,
657 LE_OQ = 0x12,
658 UNORD_S = 0x13,
659 NEQ_US = 0x14,
660 NLT_UQ = 0x15,
661 NLE_UQ = 0x16,
662 ORD_S = 0x17,
663 EQ_US = 0x18,
664 NGE_UQ = 0x19,
665 NGT_UQ = 0x1A,
666 FALSE_OS = 0x1B,
667 NEQ_OS = 0x1C,
668 GE_OQ = 0x1D,
669 GT_OQ = 0x1E,
670 TRUE_US =0x1F
671 };
672
673 enum Width {
674 B = 0,
675 W = 1,
676 D = 2,
677 Q = 3
678 };
679
680 //---< calculate length of instruction >---
681 // As instruction size can't be found out easily on x86/x64,
682 // we just use '4' for len and maxlen.
683 // instruction must start at passed address
684 static unsigned int instr_len(unsigned char *instr) { return 4; }
685
686 //---< longest instructions >---
687 // Max instruction length is not specified in architecture documentation.
688 // We could use a "safe enough" estimate (15), but just default to
689 // instruction length guess from above.
690 static unsigned int instr_maxlen() { return 4; }
691
692 // NOTE: The general philopsophy of the declarations here is that 64bit versions
693 // of instructions are freely declared without the need for wrapping them an ifdef.
694 // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
695 // In the .cpp file the implementations are wrapped so that they are dropped out
696 // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
697 // to the size it was prior to merging up the 32bit and 64bit assemblers.
698 //
699 // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
979 void rep_stos();
980 void rep_stosb();
981 void repne_scan();
982 #ifdef _LP64
983 void repne_scanl();
984 #endif
985
986 // Vanilla instructions in lexical order
987
988 void adcl(Address dst, int32_t imm32);
989 void adcl(Address dst, Register src);
990 void adcl(Register dst, int32_t imm32);
991 void adcl(Register dst, Address src);
992 void adcl(Register dst, Register src);
993
994 void adcq(Register dst, int32_t imm32);
995 void adcq(Register dst, Address src);
996 void adcq(Register dst, Register src);
997
998 void addb(Address dst, int imm8);
999 void addw(Register dst, Register src);
1000 void addw(Address dst, int imm16);
1001
1002 void addl(Address dst, int32_t imm32);
1003 void addl(Address dst, Register src);
1004 void addl(Register dst, int32_t imm32);
1005 void addl(Register dst, Address src);
1006 void addl(Register dst, Register src);
1007
1008 void addq(Address dst, int32_t imm32);
1009 void addq(Address dst, Register src);
1010 void addq(Register dst, int32_t imm32);
1011 void addq(Register dst, Address src);
1012 void addq(Register dst, Register src);
1013
1014 #ifdef _LP64
1015 //Add Unsigned Integers with Carry Flag
1016 void adcxq(Register dst, Register src);
1017
1018 //Add Unsigned Integers with Overflow Flag
1019 void adoxq(Register dst, Register src);
1030
1031 // Add Scalar Single-Precision Floating-Point Values
1032 void addss(XMMRegister dst, Address src);
1033 void addss(XMMRegister dst, XMMRegister src);
1034
1035 // AES instructions
1036 void aesdec(XMMRegister dst, Address src);
1037 void aesdec(XMMRegister dst, XMMRegister src);
1038 void aesdeclast(XMMRegister dst, Address src);
1039 void aesdeclast(XMMRegister dst, XMMRegister src);
1040 void aesenc(XMMRegister dst, Address src);
1041 void aesenc(XMMRegister dst, XMMRegister src);
1042 void aesenclast(XMMRegister dst, Address src);
1043 void aesenclast(XMMRegister dst, XMMRegister src);
1044 // Vector AES instructions
1045 void vaesenc(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1046 void vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1047 void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1048 void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1049
1050 void andw(Register dst, Register src);
1051
1052 void andl(Address dst, int32_t imm32);
1053 void andl(Register dst, int32_t imm32);
1054 void andl(Register dst, Address src);
1055 void andl(Register dst, Register src);
1056
1057 void andq(Address dst, int32_t imm32);
1058 void andq(Register dst, int32_t imm32);
1059 void andq(Register dst, Address src);
1060 void andq(Register dst, Register src);
1061
1062 // BMI instructions
1063 void andnl(Register dst, Register src1, Register src2);
1064 void andnl(Register dst, Register src1, Address src2);
1065 void andnq(Register dst, Register src1, Register src2);
1066 void andnq(Register dst, Register src1, Address src2);
1067
1068 void blsil(Register dst, Register src);
1069 void blsil(Register dst, Address src);
1070 void blsiq(Register dst, Register src);
1071 void blsiq(Register dst, Address src);
1157 void crc32(Register crc, Address adr, int8_t sizeInBytes);
1158
1159 // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
1160 void cvtsd2ss(XMMRegister dst, XMMRegister src);
1161 void cvtsd2ss(XMMRegister dst, Address src);
1162
1163 // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
1164 void cvtsi2sdl(XMMRegister dst, Register src);
1165 void cvtsi2sdl(XMMRegister dst, Address src);
1166 void cvtsi2sdq(XMMRegister dst, Register src);
1167 void cvtsi2sdq(XMMRegister dst, Address src);
1168
1169 // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
1170 void cvtsi2ssl(XMMRegister dst, Register src);
1171 void cvtsi2ssl(XMMRegister dst, Address src);
1172 void cvtsi2ssq(XMMRegister dst, Register src);
1173 void cvtsi2ssq(XMMRegister dst, Address src);
1174
1175 // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
1176 void cvtdq2pd(XMMRegister dst, XMMRegister src);
1177 void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len);
1178
1179 // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
1180 void cvtdq2ps(XMMRegister dst, XMMRegister src);
1181 void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len);
1182
1183 // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
1184 void cvtss2sd(XMMRegister dst, XMMRegister src);
1185 void cvtss2sd(XMMRegister dst, Address src);
1186
1187 // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
1188 void cvttsd2sil(Register dst, Address src);
1189 void cvttsd2sil(Register dst, XMMRegister src);
1190 void cvttsd2siq(Register dst, Address src);
1191 void cvttsd2siq(Register dst, XMMRegister src);
1192
1193 // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
1194 void cvttss2sil(Register dst, XMMRegister src);
1195 void cvttss2siq(Register dst, XMMRegister src);
1196
1197 // Convert vector double to int
1198 void cvttpd2dq(XMMRegister dst, XMMRegister src);
1199
1200 // Convert vector float and double
1201 void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
1202 void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
1203
1204 // Convert vector long to vector FP
1205 void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
1206 void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
1207
1208 // Evex casts with truncation
1209 void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len);
1210 void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len);
1211 void evpmovdb(XMMRegister dst, XMMRegister src, int vector_len);
1212 void evpmovqd(XMMRegister dst, XMMRegister src, int vector_len);
1213 void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
1214 void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);
1215
1216 //Abs of packed Integer values
1217 void pabsb(XMMRegister dst, XMMRegister src);
1218 void pabsw(XMMRegister dst, XMMRegister src);
1219 void pabsd(XMMRegister dst, XMMRegister src);
1220 void vpabsb(XMMRegister dst, XMMRegister src, int vector_len);
1221 void vpabsw(XMMRegister dst, XMMRegister src, int vector_len);
1222 void vpabsd(XMMRegister dst, XMMRegister src, int vector_len);
1223 void evpabsq(XMMRegister dst, XMMRegister src, int vector_len);
1224
1225 // Divide Scalar Double-Precision Floating-Point Values
1226 void divsd(XMMRegister dst, Address src);
1227 void divsd(XMMRegister dst, XMMRegister src);
1228
1229 // Divide Scalar Single-Precision Floating-Point Values
1230 void divss(XMMRegister dst, Address src);
1231 void divss(XMMRegister dst, XMMRegister src);
1232
1233
1234 #ifndef _LP64
1235 private:
1555
1556 // Move Double Quadword
1557 void movdq(XMMRegister dst, Register src);
1558 void movdq(Register dst, XMMRegister src);
1559
1560 // Move Aligned Double Quadword
1561 void movdqa(XMMRegister dst, XMMRegister src);
1562 void movdqa(XMMRegister dst, Address src);
1563
1564 // Move Unaligned Double Quadword
1565 void movdqu(Address dst, XMMRegister src);
1566 void movdqu(XMMRegister dst, Address src);
1567 void movdqu(XMMRegister dst, XMMRegister src);
1568
1569 // Move Unaligned 256bit Vector
1570 void vmovdqu(Address dst, XMMRegister src);
1571 void vmovdqu(XMMRegister dst, Address src);
1572 void vmovdqu(XMMRegister dst, XMMRegister src);
1573
1574 // Move Unaligned 512bit Vector
1575 void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len);
1576 void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len);
1577 void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len);
1578 void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1579 void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len);
1580 void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1581 void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len);
1582 void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1583 void evmovdqul(Address dst, XMMRegister src, int vector_len);
1584 void evmovdqul(XMMRegister dst, Address src, int vector_len);
1585 void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
1586 void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1587 void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1588 void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1589 void evmovdquq(Address dst, XMMRegister src, int vector_len);
1590 void evmovdquq(XMMRegister dst, Address src, int vector_len);
1591 void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
1592 void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1593 void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1594 void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1595
1596 // Move lower 64bit to high 64bit in 128bit register
1597 void movlhps(XMMRegister dst, XMMRegister src);
1598
1599 void movl(Register dst, int32_t imm32);
1600 void movl(Address dst, int32_t imm32);
1601 void movl(Register dst, Register src);
1602 void movl(Register dst, Address src);
1603 void movl(Address dst, Register src);
1604
1605 // These dummies prevent using movl from converting a zero (like NULL) into Register
1606 // by giving the compiler two choices it can't resolve
1607
1608 void movl(Address dst, void* junk);
1609 void movl(Register dst, void* junk);
1610
1611 #ifdef _LP64
1612 void movq(Register dst, Register src);
1613 void movq(Register dst, Address src);
1614 void movq(Address dst, Register src);
1615
1616 // These dummies prevent using movq from converting a zero (like NULL) into Register
1617 // by giving the compiler two choices it can't resolve
1618
1619 void movq(Address dst, void* dummy);
1620 void movq(Register dst, void* dummy);
1621 #endif
1622
1623 // Move Quadword
1624 void movq(Address dst, XMMRegister src);
1625 void movq(XMMRegister dst, Address src);
1626 void movq(XMMRegister dst, XMMRegister src);
1627 void movq(Register dst, XMMRegister src);
1628 void movq(XMMRegister dst, Register src);
1629
1630 void movsbl(Register dst, Address src);
1631 void movsbl(Register dst, Register src);
1632
1633 #ifdef _LP64
1634 void movsbq(Register dst, Address src);
1635 void movsbq(Register dst, Register src);
1636
1637 // Move signed 32bit immediate to 64bit extending sign
1638 void movslq(Address dst, int32_t imm64);
1639 void movslq(Register dst, int32_t imm64);
1640
1641 void movslq(Register dst, Address src);
1642 void movslq(Register dst, Register src);
1643 void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous
1644 #endif
1645
1646 void movswl(Register dst, Address src);
1647 void movswl(Register dst, Register src);
1648
1689 void mulss(XMMRegister dst, Address src);
1690 void mulss(XMMRegister dst, XMMRegister src);
1691
1692 void negl(Register dst);
1693
1694 #ifdef _LP64
1695 void negq(Register dst);
1696 #endif
1697
1698 void nop(int i = 1);
1699
1700 void notl(Register dst);
1701
1702 #ifdef _LP64
1703 void notq(Register dst);
1704
1705 void btsq(Address dst, int imm8);
1706 void btrq(Address dst, int imm8);
1707 #endif
1708
1709 void orw(Register dst, Register src);
1710
1711 void orl(Address dst, int32_t imm32);
1712 void orl(Register dst, int32_t imm32);
1713 void orl(Register dst, Address src);
1714 void orl(Register dst, Register src);
1715 void orl(Address dst, Register src);
1716
1717 void orb(Address dst, int imm8);
1718
1719 void orq(Address dst, int32_t imm32);
1720 void orq(Register dst, int32_t imm32);
1721 void orq(Register dst, Address src);
1722 void orq(Register dst, Register src);
1723
1724 // Pack with signed saturation
1725 void packsswb(XMMRegister dst, XMMRegister src);
1726 void vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1727 void packssdw(XMMRegister dst, XMMRegister src);
1728 void vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1729
1730 // Pack with unsigned saturation
1731 void packuswb(XMMRegister dst, XMMRegister src);
1732 void packuswb(XMMRegister dst, Address src);
1733 void packusdw(XMMRegister dst, XMMRegister src);
1734 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1735 void vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1736
1737 // Permutations
1738 void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1739 void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1740 void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1741 void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1742 void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1743 void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1744 void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1745 void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1746 void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1747 void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1748 void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1749 void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1750 void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1751
1752 void pause();
1753
1754 // Undefined Instruction
1755 void ud2();
1756
1757 // SSE4.2 string instructions
1758 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1759 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1760
1761 void pcmpeqb(XMMRegister dst, XMMRegister src);
1762 void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
1763
1764 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1765 void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1766 void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1767 void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1768
1769 void vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1770 void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1771 void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1772
1773 void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
1774 void evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate of, int vector_len);
1775 void evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len);
1776
1777 void pcmpeqw(XMMRegister dst, XMMRegister src);
1778 void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1779 void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1780 void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1781
1782 void vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1783
1784 void pcmpeqd(XMMRegister dst, XMMRegister src);
1785 void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1786 void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
1787 void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1788
1789 void pcmpeqq(XMMRegister dst, XMMRegister src);
1790 void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
1791 void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1792 void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1793 void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1794
1795 void pcmpgtq(XMMRegister dst, XMMRegister src);
1796 void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1797
1798 void pmovmskb(Register dst, XMMRegister src);
1799 void vpmovmskb(Register dst, XMMRegister src);
1800
1801 // SSE 4.1 extract
1802 void pextrd(Register dst, XMMRegister src, int imm8);
1803 void pextrq(Register dst, XMMRegister src, int imm8);
1804 void pextrd(Address dst, XMMRegister src, int imm8);
1805 void pextrq(Address dst, XMMRegister src, int imm8);
1806 void pextrb(Register dst, XMMRegister src, int imm8);
1807 void pextrb(Address dst, XMMRegister src, int imm8);
1808 // SSE 2 extract
1809 void pextrw(Register dst, XMMRegister src, int imm8);
1810 void pextrw(Address dst, XMMRegister src, int imm8);
1811
1812 // SSE 4.1 insert
1813 void pinsrd(XMMRegister dst, Register src, int imm8);
1814 void pinsrq(XMMRegister dst, Register src, int imm8);
1815 void pinsrb(XMMRegister dst, Register src, int imm8);
1816 void pinsrd(XMMRegister dst, Address src, int imm8);
1817 void pinsrq(XMMRegister dst, Address src, int imm8);
1818 void pinsrb(XMMRegister dst, Address src, int imm8);
1819 void insertps(XMMRegister dst, XMMRegister src, int imm8);
1820 // SSE 2 insert
1821 void pinsrw(XMMRegister dst, Register src, int imm8);
1822 void pinsrw(XMMRegister dst, Address src, int imm8);
1823
1824 // AVX insert
1825 void vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1826 void vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1827 void vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1828 void vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1829 void vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1830
1831 // Zero extend moves
1832 void pmovzxbw(XMMRegister dst, XMMRegister src);
1833 void pmovzxbw(XMMRegister dst, Address src);
1834 void pmovzxbd(XMMRegister dst, XMMRegister src);
1835 void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
1836 void pmovzxdq(XMMRegister dst, XMMRegister src);
1837 void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
1838 void vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len);
1839 void vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len);
1840 void vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len);
1841 void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1842
1843 // Sign extend moves
1844 void pmovsxbd(XMMRegister dst, XMMRegister src);
1845 void pmovsxbq(XMMRegister dst, XMMRegister src);
1846 void pmovsxbw(XMMRegister dst, XMMRegister src);
1847 void pmovsxwd(XMMRegister dst, XMMRegister src);
1848 void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len);
1849 void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len);
1850 void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
1851 void vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len);
1852 void vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len);
1853 void vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len);
1854
1855 void evpmovwb(Address dst, XMMRegister src, int vector_len);
1856 void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
1857
1858 void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);
1859
1860 void evpmovdb(Address dst, XMMRegister src, int vector_len);
1861
1862 // Multiply add
1863 void pmaddwd(XMMRegister dst, XMMRegister src);
1864 void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1865 // Multiply add accumulate
1866 void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1867
1868 #ifndef _LP64 // no 32bit push/pop on amd64
1869 void popl(Address dst);
1870 #endif
1871
1872 #ifdef _LP64
1873 void popq(Address dst);
1874 #endif
1875
1876 void popcntl(Register dst, Address src);
1877 void popcntl(Register dst, Register src);
1878
1879 void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1880
1881 #ifdef _LP64
1885
1886 // Prefetches (SSE, SSE2, 3DNOW only)
1887
1888 void prefetchnta(Address src);
1889 void prefetchr(Address src);
1890 void prefetcht0(Address src);
1891 void prefetcht1(Address src);
1892 void prefetcht2(Address src);
1893 void prefetchw(Address src);
1894
1895 // Shuffle Bytes
1896 void pshufb(XMMRegister dst, XMMRegister src);
1897 void pshufb(XMMRegister dst, Address src);
1898 void vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1899
1900 // Shuffle Packed Doublewords
1901 void pshufd(XMMRegister dst, XMMRegister src, int mode);
1902 void pshufd(XMMRegister dst, Address src, int mode);
1903 void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len);
1904
1905 // Shuffle Packed High/Low Words
1906 void pshufhw(XMMRegister dst, XMMRegister src, int mode);
1907 void pshuflw(XMMRegister dst, XMMRegister src, int mode);
1908 void pshuflw(XMMRegister dst, Address src, int mode);
1909
1910 //shuffle floats and doubles
1911 void pshufps(XMMRegister, XMMRegister, int);
1912 void pshufpd(XMMRegister, XMMRegister, int);
1913 void vpshufps(XMMRegister, XMMRegister, XMMRegister, int, int);
1914 void vpshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);
1915
1916 // Shuffle packed values at 128 bit granularity
1917 void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
1918
1919 // Shift Right by bytes Logical DoubleQuadword Immediate
1920 void psrldq(XMMRegister dst, int shift);
1921 // Shift Left by bytes Logical DoubleQuadword Immediate
1922 void pslldq(XMMRegister dst, int shift);
1923
1924 // Logical Compare 128bit
1925 void ptest(XMMRegister dst, XMMRegister src);
1926 void ptest(XMMRegister dst, Address src);
1927 // Logical Compare 256bit
1928 void vptest(XMMRegister dst, XMMRegister src);
1929 void vptest(XMMRegister dst, Address src);
1930
1931 // Vector compare
1932 void vptest(XMMRegister dst, XMMRegister src, int vector_len);
1933
1934 // Interleave Low Bytes
1935 void punpcklbw(XMMRegister dst, XMMRegister src);
1936 void punpcklbw(XMMRegister dst, Address src);
1937
1938 // Interleave Low Doublewords
1939 void punpckldq(XMMRegister dst, XMMRegister src);
1940 void punpckldq(XMMRegister dst, Address src);
1941
1942 // Interleave Low Quadwords
1943 void punpcklqdq(XMMRegister dst, XMMRegister src);
1944
1945 #ifndef _LP64 // no 32bit push/pop on amd64
1946 void pushl(Address src);
1947 #endif
1948
1949 void pushq(Address src);
1950
1951 void rcll(Register dst, int imm8);
1952
1953 void rclq(Register dst, int imm8);
1976 void sarq(Register dst, int imm8);
1977 void sarq(Register dst);
1978
1979 void sbbl(Address dst, int32_t imm32);
1980 void sbbl(Register dst, int32_t imm32);
1981 void sbbl(Register dst, Address src);
1982 void sbbl(Register dst, Register src);
1983
1984 void sbbq(Address dst, int32_t imm32);
1985 void sbbq(Register dst, int32_t imm32);
1986 void sbbq(Register dst, Address src);
1987 void sbbq(Register dst, Register src);
1988
1989 void setb(Condition cc, Register dst);
1990
1991 void palignr(XMMRegister dst, XMMRegister src, int imm8);
1992 void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
1993 void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
1994
1995 void pblendw(XMMRegister dst, XMMRegister src, int imm8);
1996 void vblendps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
1997
1998 void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
1999 void sha1nexte(XMMRegister dst, XMMRegister src);
2000 void sha1msg1(XMMRegister dst, XMMRegister src);
2001 void sha1msg2(XMMRegister dst, XMMRegister src);
2002 // xmm0 is implicit additional source to the following instruction.
2003 void sha256rnds2(XMMRegister dst, XMMRegister src);
2004 void sha256msg1(XMMRegister dst, XMMRegister src);
2005 void sha256msg2(XMMRegister dst, XMMRegister src);
2006
2007 void shldl(Register dst, Register src);
2008 void shldl(Register dst, Register src, int8_t imm8);
2009 void shrdl(Register dst, Register src);
2010 void shrdl(Register dst, Register src, int8_t imm8);
2011
2012 void shll(Register dst, int imm8);
2013 void shll(Register dst);
2014
2015 void shlq(Register dst, int imm8);
2016 void shlq(Register dst);
2095 void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none);
2096
2097 void xchgb(Register reg, Address adr);
2098 void xchgw(Register reg, Address adr);
2099 void xchgl(Register reg, Address adr);
2100 void xchgl(Register dst, Register src);
2101
2102 void xchgq(Register reg, Address adr);
2103 void xchgq(Register dst, Register src);
2104
2105 void xend();
2106
2107 // Get Value of Extended Control Register
2108 void xgetbv();
2109
2110 void xorl(Register dst, int32_t imm32);
2111 void xorl(Register dst, Address src);
2112 void xorl(Register dst, Register src);
2113
2114 void xorb(Register dst, Address src);
2115 void xorw(Register dst, Register src);
2116
2117 void xorq(Register dst, Address src);
2118 void xorq(Register dst, Register src);
2119
2120 void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
2121
2122 // AVX 3-operands scalar instructions (encoded with VEX prefix)
2123
2124 void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
2125 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2126 void vaddss(XMMRegister dst, XMMRegister nds, Address src);
2127 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2128 void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
2129 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2130 void vdivss(XMMRegister dst, XMMRegister nds, Address src);
2131 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2132 void vfmadd231sd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2133 void vfmadd231ss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2134 void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
2135 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2136 void vmulss(XMMRegister dst, XMMRegister nds, Address src);
2137 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2138 void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
2139 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2140 void vsubss(XMMRegister dst, XMMRegister nds, Address src);
2141 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2142
2143 void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2144 void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2145 void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2146 void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2147
2148 void shlxl(Register dst, Register src1, Register src2);
2149 void shlxq(Register dst, Register src1, Register src2);
2150
2151 //====================VECTOR ARITHMETIC=====================================
2152 void evpmovd2m(KRegister kdst, XMMRegister src, int vector_len);
2153 void evpmovq2m(KRegister kdst, XMMRegister src, int vector_len);
2154
2155 // Add Packed Floating-Point Values
2156 void addpd(XMMRegister dst, XMMRegister src);
2157 void addpd(XMMRegister dst, Address src);
2158 void addps(XMMRegister dst, XMMRegister src);
2159 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2160 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2161 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2162 void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2163
2164 // Subtract Packed Floating-Point Values
2165 void subpd(XMMRegister dst, XMMRegister src);
2166 void subps(XMMRegister dst, XMMRegister src);
2167 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2168 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2169 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2170 void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2171
2172 // Multiply Packed Floating-Point Values
2173 void mulpd(XMMRegister dst, XMMRegister src);
2243 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2244 void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2245
2246 // Sub packed integers
2247 void psubb(XMMRegister dst, XMMRegister src);
2248 void psubw(XMMRegister dst, XMMRegister src);
2249 void psubd(XMMRegister dst, XMMRegister src);
2250 void psubq(XMMRegister dst, XMMRegister src);
2251 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2252 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2253 void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2254 void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2255 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2256 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2257 void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2258 void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2259
2260 // Multiply packed integers (only shorts and ints)
2261 void pmullw(XMMRegister dst, XMMRegister src);
2262 void pmulld(XMMRegister dst, XMMRegister src);
2263 void pmuludq(XMMRegister dst, XMMRegister src);
2264 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2265 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2266 void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2267 void vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2268 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2269 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2270 void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2271
2272 // Minimum of packed integers
2273 void pminsb(XMMRegister dst, XMMRegister src);
2274 void vpminsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2275 void pminsw(XMMRegister dst, XMMRegister src);
2276 void vpminsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2277 void pminsd(XMMRegister dst, XMMRegister src);
2278 void vpminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2279 void vpminsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2280 void minps(XMMRegister dst, XMMRegister src);
2281 void vminps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2282 void minpd(XMMRegister dst, XMMRegister src);
2283 void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2284
2285 // Maximum of packed integers
2286 void pmaxsb(XMMRegister dst, XMMRegister src);
2287 void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2288 void pmaxsw(XMMRegister dst, XMMRegister src);
2289 void vpmaxsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2290 void pmaxsd(XMMRegister dst, XMMRegister src);
2291 void vpmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2292 void vpmaxsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2293 void maxps(XMMRegister dst, XMMRegister src);
2294 void vmaxps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2295 void maxpd(XMMRegister dst, XMMRegister src);
2296 void vmaxpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2297
2298 // Shift left packed integers
2299 void psllw(XMMRegister dst, int shift);
2300 void pslld(XMMRegister dst, int shift);
2301 void psllq(XMMRegister dst, int shift);
2302 void psllw(XMMRegister dst, XMMRegister shift);
2303 void pslld(XMMRegister dst, XMMRegister shift);
2304 void psllq(XMMRegister dst, XMMRegister shift);
2305 void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2306 void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2307 void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2308 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2309 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2310 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2311 void vpslldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2312
2313 // Logical shift right packed integers
2314 void psrlw(XMMRegister dst, int shift);
2315 void psrld(XMMRegister dst, int shift);
2316 void psrlq(XMMRegister dst, int shift);
2317 void psrlw(XMMRegister dst, XMMRegister shift);
2319 void psrlq(XMMRegister dst, XMMRegister shift);
2320 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2321 void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2322 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2323 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2324 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2325 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2326 void vpsrldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2327 void evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2328 void evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2329
2330 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2331 void psraw(XMMRegister dst, int shift);
2332 void psrad(XMMRegister dst, int shift);
2333 void psraw(XMMRegister dst, XMMRegister shift);
2334 void psrad(XMMRegister dst, XMMRegister shift);
2335 void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2336 void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2337 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2338 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2339 void evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2340 void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2341 void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2342
2343 // Variable shift left packed integers
2344 void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2345 void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2346
2347 // Variable shift right packed integers
2348 void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2349 void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2350
2351 // Variable shift right arithmetic packed integers
2352 void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2353 void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2354
2355 void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2356 void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2357
2358 // And packed integers
2359 void pand(XMMRegister dst, XMMRegister src);
2360 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2361 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2362 void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2363 void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2364
2365 // Andn packed integers
2366 void pandn(XMMRegister dst, XMMRegister src);
2367 void vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2368
2369 // Or packed integers
2370 void por(XMMRegister dst, XMMRegister src);
2371 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2372 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2373 void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2374
2375 void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2376 void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2377
2378 // Xor packed integers
2379 void pxor(XMMRegister dst, XMMRegister src);
2380 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2381 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2382 void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2383 void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2384 void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2385 void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2386
2387 // vinserti forms
2388 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2389 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2390 void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2391 void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2392 void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2393
2394 // vinsertf forms
2395 void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2396 void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2397 void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2398 void vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2399 void vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2400 void vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2401
2402 // vextracti forms
2403 void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8);
2404 void vextracti128(Address dst, XMMRegister src, uint8_t imm8);
2405 void vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2406 void vextracti32x4(Address dst, XMMRegister src, uint8_t imm8);
2425 void vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2426 void vpbroadcastd(XMMRegister dst, Address src, int vector_len);
2427 void vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
2428 void vpbroadcastq(XMMRegister dst, Address src, int vector_len);
2429
2430 void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len);
2431 void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);
2432
2433 // scalar single/double precision replicate
2434 void vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
2435 void vbroadcastss(XMMRegister dst, Address src, int vector_len);
2436 void vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
2437 void vbroadcastsd(XMMRegister dst, Address src, int vector_len);
2438
2439 // gpr sourced byte/word/dword/qword replicate
2440 void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2441 void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2442 void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2443 void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
2444
2445 // Gather AVX2 and AVX3
2446 void vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2447 void vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2448 void vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2449 void vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2450 void evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len);
2451 void evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len);
2452 void evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len);
2453 void evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len);
2454
2455 //Scatter AVX3 only
2456 void evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len);
2457 void evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len);
2458 void evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len);
2459 void evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len);
2460
2461 // Carry-Less Multiplication Quadword
2462 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2463 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2464 void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
2465 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2466 // to avoid transaction penalty between AVX and SSE states. There is no
2467 // penalty if legacy SSE instructions are encoded using VEX prefix because
2468 // they always clear upper 128 bits. It should be used before calling
2469 // runtime code and native libraries.
2470 void vzeroupper();
2471
2472 // Vector double compares
2473 void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2474 void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2475 ComparisonPredicateFP comparison, int vector_len);
2476
2477 // Vector float compares
2478 void vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len);
2479 void evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2480 ComparisonPredicateFP comparison, int vector_len);
2481
2482 // Vector integer compares
2483 void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2484 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2485 int comparison, int vector_len);
2486 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2487 int comparison, int vector_len);
2488
2489 // Vector long compares
2490 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2491 int comparison, int vector_len);
2492 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2493 int comparison, int vector_len);
2494
2495 // Vector byte compares
2496 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2497 int comparison, int vector_len);
2498 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2499 int comparison, int vector_len);
2500
2501 // Vector short compares
2502 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2503 int comparison, int vector_len);
2504 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2505 int comparison, int vector_len);
2506
2507 // Vector blends
2508 void blendvps(XMMRegister dst, XMMRegister src);
2509 void blendvpd(XMMRegister dst, XMMRegister src);
2510 void pblendvb(XMMRegister dst, XMMRegister src);
2511 void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2512 void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2513 void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2514 void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2515 void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2516 void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2517 void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2518 void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2519 void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2520 void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2521 protected:
2522 // Next instructions require address alignment 16 bytes SSE mode.
2523 // They should be called only from corresponding MacroAssembler instructions.
2524 void andpd(XMMRegister dst, Address src);
2525 void andps(XMMRegister dst, Address src);
2526 void xorpd(XMMRegister dst, Address src);
2527 void xorps(XMMRegister dst, Address src);
2528
2529 };
2530
2531 // The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
2532 // Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
2533 // are applied.
2534 class InstructionAttr {
2535 public:
2536 InstructionAttr(
2537 int vector_len, // The length of vector to be applied in encoding - for both AVX and EVEX
2538 bool rex_vex_w, // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
2539 bool legacy_mode, // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
2540 bool no_reg_mask, // when true, k0 is used when EVEX encoding is chosen, else embedded_opmask_register_specifier is used
2596 int get_embedded_opmask_register_specifier(void) const { return _embedded_opmask_register_specifier; }
2597
2598 // Set the vector len manually
2599 void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
2600
2601 // Set revert rex_vex_w for avx encoding
2602 void set_rex_vex_w_reverted(void) { _rex_vex_w_reverted = true; }
2603
2604 // Set rex_vex_w based on state
2605 void set_rex_vex_w(bool state) { _rex_vex_w = state; }
2606
2607 // Set the instruction to be encoded in AVX mode
2608 void set_is_legacy_mode(void) { _legacy_mode = true; }
2609
2610 // Set the current instuction to be encoded as an EVEX instuction
2611 void set_is_evex_instruction(void) { _is_evex_instruction = true; }
2612
2613 // Internal encoding data used in compressed immediate offset programming
2614 void set_evex_encoding(int value) { _evex_encoding = value; }
2615
2616 // When the Evex.Z field is set (true), it is used to clear all non directed XMM/YMM/ZMM components.
2617 // This method unsets it so that merge semantics are used instead.
2618 void reset_is_clear_context(void) { _is_clear_context = false; }
2619
2620 // Map back to current asembler so that we can manage object level assocation
2621 void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
2622
2623 // Address modifiers used for compressed displacement calculation
2624 void set_address_attributes(int tuple_type, int input_size_in_bits) {
2625 if (VM_Version::supports_evex()) {
2626 _tuple_type = tuple_type;
2627 _input_size_in_bits = input_size_in_bits;
2628 }
2629 }
2630
2631 // Set embedded opmask register specifier.
2632 void set_embedded_opmask_register_specifier(KRegister mask) {
2633 _embedded_opmask_register_specifier = (*mask).encoding() & 0x7;
2634 }
2635
2636 };
2637
|