1085 void cvtdq2pd(XMMRegister dst, XMMRegister src);
1086
1087 // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
1088 void cvtdq2ps(XMMRegister dst, XMMRegister src);
1089
1090 // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
1091 void cvtss2sd(XMMRegister dst, XMMRegister src);
1092 void cvtss2sd(XMMRegister dst, Address src);
1093
1094 // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
1095 void cvttsd2sil(Register dst, Address src);
1096 void cvttsd2sil(Register dst, XMMRegister src);
1097 void cvttsd2siq(Register dst, XMMRegister src);
1098
1099 // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
1100 void cvttss2sil(Register dst, XMMRegister src);
1101 void cvttss2siq(Register dst, XMMRegister src);
1102
1103 void cvttpd2dq(XMMRegister dst, XMMRegister src);
1104
1105 // Divide Scalar Double-Precision Floating-Point Values
1106 void divsd(XMMRegister dst, Address src);
1107 void divsd(XMMRegister dst, XMMRegister src);
1108
1109 // Divide Scalar Single-Precision Floating-Point Values
1110 void divss(XMMRegister dst, Address src);
1111 void divss(XMMRegister dst, XMMRegister src);
1112
1113 void emms();
1114
1115 void fabs();
1116
1117 void fadd(int i);
1118
1119 void fadd_d(Address src);
1120 void fadd_s(Address src);
1121
1122 // "Alternate" versions of x87 instructions place result down in FPU
1123 // stack instead of on TOS
1124
1572 void orl(Register dst, int32_t imm32);
1573 void orl(Register dst, Address src);
1574 void orl(Register dst, Register src);
1575 void orl(Address dst, Register src);
1576
1577 void orb(Address dst, int imm8);
1578
1579 void orq(Address dst, int32_t imm32);
1580 void orq(Register dst, int32_t imm32);
1581 void orq(Register dst, Address src);
1582 void orq(Register dst, Register src);
1583
1584 // Pack with unsigned saturation
1585 void packuswb(XMMRegister dst, XMMRegister src);
1586 void packuswb(XMMRegister dst, Address src);
1587 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1588
1589 // Pemutation of 64bit words
1590 void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1591 void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1592 void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1593 void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1594 void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1595
1596 void pause();
1597
1598 // Undefined Instruction
1599 void ud2();
1600
1601 // SSE4.2 string instructions
1602 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1603 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1604
1605 void pcmpeqb(XMMRegister dst, XMMRegister src);
1606 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1607 void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1608 void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1609 void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1610
1611 void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1651 void pinsrb(XMMRegister dst, Address src, int imm8);
1652 // SSE 2 insert
1653 void pinsrw(XMMRegister dst, Register src, int imm8);
1654 void pinsrw(XMMRegister dst, Address src, int imm8);
1655
1656 // SSE4.1 packed move
1657 void pmovzxbw(XMMRegister dst, XMMRegister src);
1658 void pmovzxbw(XMMRegister dst, Address src);
1659
1660 void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
1661 void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
1662 void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1663
1664 void evpmovwb(Address dst, XMMRegister src, int vector_len);
1665 void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
1666
1667 void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);
1668
1669 void evpmovdb(Address dst, XMMRegister src, int vector_len);
1670
1671 // Multiply add
1672 void pmaddwd(XMMRegister dst, XMMRegister src);
1673 void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1674 // Multiply add accumulate
1675 void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1676
1677 #ifndef _LP64 // no 32bit push/pop on amd64
1678 void popl(Address dst);
1679 #endif
1680
1681 #ifdef _LP64
1682 void popq(Address dst);
1683 #endif
1684
1685 void popcntl(Register dst, Address src);
1686 void popcntl(Register dst, Register src);
1687
1688 void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1689
1690 #ifdef _LP64
2077 void psrlq(XMMRegister dst, XMMRegister shift);
2078 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2079 void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2080 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2081 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2082 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2083 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2084 void vpsrldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2085 void evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2086 void evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2087
2088 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2089 void psraw(XMMRegister dst, int shift);
2090 void psrad(XMMRegister dst, int shift);
2091 void psraw(XMMRegister dst, XMMRegister shift);
2092 void psrad(XMMRegister dst, XMMRegister shift);
2093 void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2094 void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2095 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2096 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2097
2098 // And packed integers
2099 void pand(XMMRegister dst, XMMRegister src);
2100 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2101 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2102 void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2103
2104 // Andn packed integers
2105 void pandn(XMMRegister dst, XMMRegister src);
2106 void vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2107
2108 // Or packed integers
2109 void por(XMMRegister dst, XMMRegister src);
2110 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2111 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2112 void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2113
2114 // Xor packed integers
2115 void pxor(XMMRegister dst, XMMRegister src);
2116 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
1085 void cvtdq2pd(XMMRegister dst, XMMRegister src);
1086
1087 // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
1088 void cvtdq2ps(XMMRegister dst, XMMRegister src);
1089
1090 // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
1091 void cvtss2sd(XMMRegister dst, XMMRegister src);
1092 void cvtss2sd(XMMRegister dst, Address src);
1093
1094 // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
1095 void cvttsd2sil(Register dst, Address src);
1096 void cvttsd2sil(Register dst, XMMRegister src);
1097 void cvttsd2siq(Register dst, XMMRegister src);
1098
1099 // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
1100 void cvttss2sil(Register dst, XMMRegister src);
1101 void cvttss2siq(Register dst, XMMRegister src);
1102
1103 void cvttpd2dq(XMMRegister dst, XMMRegister src);
1104
1105 //Abs of packed Integer values
1106 void pabsb(XMMRegister dst, XMMRegister src);
1107 void pabsw(XMMRegister dst, XMMRegister src);
1108 void pabsd(XMMRegister dst, XMMRegister src);
1109 void vpabsb(XMMRegister dst, XMMRegister src, int vector_len);
1110 void vpabsw(XMMRegister dst, XMMRegister src, int vector_len);
1111 void vpabsd(XMMRegister dst, XMMRegister src, int vector_len);
1112 void evpabsq(XMMRegister dst, XMMRegister src, int vector_len);
1113
1114 // Divide Scalar Double-Precision Floating-Point Values
1115 void divsd(XMMRegister dst, Address src);
1116 void divsd(XMMRegister dst, XMMRegister src);
1117
1118 // Divide Scalar Single-Precision Floating-Point Values
1119 void divss(XMMRegister dst, Address src);
1120 void divss(XMMRegister dst, XMMRegister src);
1121
1122 void emms();
1123
1124 void fabs();
1125
1126 void fadd(int i);
1127
1128 void fadd_d(Address src);
1129 void fadd_s(Address src);
1130
1131 // "Alternate" versions of x87 instructions place result down in FPU
1132 // stack instead of on TOS
1133
1581 void orl(Register dst, int32_t imm32);
1582 void orl(Register dst, Address src);
1583 void orl(Register dst, Register src);
1584 void orl(Address dst, Register src);
1585
1586 void orb(Address dst, int imm8);
1587
1588 void orq(Address dst, int32_t imm32);
1589 void orq(Register dst, int32_t imm32);
1590 void orq(Register dst, Address src);
1591 void orq(Register dst, Register src);
1592
1593 // Pack with unsigned saturation
1594 void packuswb(XMMRegister dst, XMMRegister src);
1595 void packuswb(XMMRegister dst, Address src);
1596 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1597
1598 // Pemutation of 64bit words
1599 void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1600 void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1601 void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1602 void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1603 void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1604 void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1605
1606 void pause();
1607
1608 // Undefined Instruction
1609 void ud2();
1610
1611 // SSE4.2 string instructions
1612 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1613 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1614
1615 void pcmpeqb(XMMRegister dst, XMMRegister src);
1616 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1617 void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1618 void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1619 void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1620
1621 void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1661 void pinsrb(XMMRegister dst, Address src, int imm8);
1662 // SSE 2 insert
1663 void pinsrw(XMMRegister dst, Register src, int imm8);
1664 void pinsrw(XMMRegister dst, Address src, int imm8);
1665
1666 // SSE4.1 packed move
1667 void pmovzxbw(XMMRegister dst, XMMRegister src);
1668 void pmovzxbw(XMMRegister dst, Address src);
1669
1670 void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
1671 void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
1672 void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1673
1674 void evpmovwb(Address dst, XMMRegister src, int vector_len);
1675 void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
1676
1677 void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);
1678
1679 void evpmovdb(Address dst, XMMRegister src, int vector_len);
1680
1681 // Sign extend moves
1682 void pmovsxbw(XMMRegister dst, XMMRegister src);
1683 void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
1684
1685 // Multiply add
1686 void pmaddwd(XMMRegister dst, XMMRegister src);
1687 void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1688 // Multiply add accumulate
1689 void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1690
1691 #ifndef _LP64 // no 32bit push/pop on amd64
1692 void popl(Address dst);
1693 #endif
1694
1695 #ifdef _LP64
1696 void popq(Address dst);
1697 #endif
1698
1699 void popcntl(Register dst, Address src);
1700 void popcntl(Register dst, Register src);
1701
1702 void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1703
1704 #ifdef _LP64
2091 void psrlq(XMMRegister dst, XMMRegister shift);
2092 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2093 void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2094 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2095 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2096 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2097 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2098 void vpsrldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2099 void evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2100 void evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2101
2102 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2103 void psraw(XMMRegister dst, int shift);
2104 void psrad(XMMRegister dst, int shift);
2105 void psraw(XMMRegister dst, XMMRegister shift);
2106 void psrad(XMMRegister dst, XMMRegister shift);
2107 void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2108 void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2109 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2110 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2111 void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2112 void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2113
2114 // And packed integers
2115 void pand(XMMRegister dst, XMMRegister src);
2116 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2117 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2118 void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2119
2120 // Andn packed integers
2121 void pandn(XMMRegister dst, XMMRegister src);
2122 void vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2123
2124 // Or packed integers
2125 void por(XMMRegister dst, XMMRegister src);
2126 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2127 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2128 void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2129
2130 // Xor packed integers
2131 void pxor(XMMRegister dst, XMMRegister src);
2132 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|