< prev index next >

src/hotspot/cpu/x86/assembler_x86.hpp

Print this page
rev 61868 : manual merge with default

*** 618,627 **** --- 618,628 ---- narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop _WhichOperand_limit = 4 #endif }; + // Comparison predicates for integral types & FP types when using SSE enum ComparisonPredicate { eq = 0, lt = 1, le = 2, _false = 3,
*** 629,638 **** --- 630,684 ---- nlt = 5, nle = 6, _true = 7 }; + // Comparison predicates for FP types when using AVX + // O means ordered. U is unordered. When using ordered, any NaN comparison is false. Otherwise, it is true. + // S means signaling. Q means non-signaling. When signaling is true, instruction signals #IA on NaN. + enum ComparisonPredicateFP { + EQ_OQ = 0, + LT_OS = 1, + LE_OS = 2, + UNORD_Q = 3, + NEQ_UQ = 4, + NLT_US = 5, + NLE_US = 6, + ORD_Q = 7, + EQ_UQ = 8, + NGE_US = 9, + NGT_US = 0xA, + FALSE_OQ = 0XB, + NEQ_OQ = 0xC, + GE_OS = 0xD, + GT_OS = 0xE, + TRUE_UQ = 0xF, + EQ_OS = 0x10, + LT_OQ = 0x11, + LE_OQ = 0x12, + UNORD_S = 0x13, + NEQ_US = 0x14, + NLT_UQ = 0x15, + NLE_UQ = 0x16, + ORD_S = 0x17, + EQ_US = 0x18, + NGE_UQ = 0x19, + NGT_UQ = 0x1A, + FALSE_OS = 0x1B, + NEQ_OS = 0x1C, + GE_OQ = 0x1D, + GT_OQ = 0x1E, + TRUE_US =0x1F + }; + + enum Width { + B = 0, + W = 1, + D = 2, + Q = 3 + }; + //---< calculate length of instruction >--- // As instruction size can't be found out easily on x86/x64, // we just use '4' for len and maxlen. // instruction must start at passed address static unsigned int instr_len(unsigned char *instr) { return 4; }
*** 948,957 **** --- 994,1004 ---- void adcq(Register dst, int32_t imm32); void adcq(Register dst, Address src); void adcq(Register dst, Register src); void addb(Address dst, int imm8); + void addw(Register dst, Register src); void addw(Address dst, int imm16); void addl(Address dst, int32_t imm32); void addl(Address dst, Register src); void addl(Register dst, int32_t imm32);
*** 998,1007 **** --- 1045,1056 ---- void vaesenc(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void andw(Register dst, Register src); + void andl(Address dst, int32_t imm32); void andl(Register dst, int32_t imm32); void andl(Register dst, Address src); void andl(Register dst, Register src);
*** 1123,1135 **** --- 1172,1186 ---- void cvtsi2ssq(XMMRegister dst, Register src); void cvtsi2ssq(XMMRegister dst, Address src); // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value void cvtdq2pd(XMMRegister dst, XMMRegister src); + void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len); // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value void cvtdq2ps(XMMRegister dst, XMMRegister src); + void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len); // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value void cvtss2sd(XMMRegister dst, XMMRegister src); void cvtss2sd(XMMRegister dst, Address src);
*** 1141,1152 **** --- 1192,1220 ---- // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer void cvttss2sil(Register dst, XMMRegister src); void cvttss2siq(Register dst, XMMRegister src); + // Convert vector double to int void cvttpd2dq(XMMRegister dst, XMMRegister src); + // Convert vector float and double + void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len); + void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len); + + // Convert vector long to vector FP + void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len); + void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len); + + // Evex casts with truncation + void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len); + void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len); + void evpmovdb(XMMRegister dst, XMMRegister src, int vector_len); + void evpmovqd(XMMRegister dst, XMMRegister src, int vector_len); + void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len); + void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len); + //Abs of packed Integer values void pabsb(XMMRegister dst, XMMRegister src); void pabsw(XMMRegister dst, XMMRegister src); void pabsd(XMMRegister dst, XMMRegister src); void vpabsb(XMMRegister dst, XMMRegister src, int vector_len);
*** 1502,1525 **** void vmovdqu(Address dst, XMMRegister src); void vmovdqu(XMMRegister dst, Address src); void vmovdqu(XMMRegister dst, XMMRegister src); // Move Unaligned 512bit Vector ! void evmovdqub(Address dst, XMMRegister src, int vector_len); ! void evmovdqub(XMMRegister dst, Address src, int vector_len); ! void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len); ! void evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len); ! void evmovdquw(Address dst, XMMRegister src, int vector_len); ! void evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len); ! void evmovdquw(XMMRegister dst, Address src, int vector_len); ! void evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len); void evmovdqul(Address dst, XMMRegister src, int vector_len); void evmovdqul(XMMRegister dst, Address src, int vector_len); void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len); void evmovdquq(Address dst, XMMRegister src, int vector_len); void evmovdquq(XMMRegister dst, Address src, int vector_len); void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len); // Move lower 64bit to high 64bit in 128bit register void movlhps(XMMRegister dst, XMMRegister src); void movl(Register dst, int32_t imm32); --- 1570,1599 ---- void vmovdqu(Address dst, XMMRegister src); void vmovdqu(XMMRegister dst, Address src); void vmovdqu(XMMRegister dst, XMMRegister src); // Move Unaligned 512bit Vector ! void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len); ! void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len); ! void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len); ! void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len); ! void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len); ! void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len); ! void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len); ! void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len); void evmovdqul(Address dst, XMMRegister src, int vector_len); void evmovdqul(XMMRegister dst, Address src, int vector_len); void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len); + void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len); + void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len); + void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len); void evmovdquq(Address dst, XMMRegister src, int vector_len); void evmovdquq(XMMRegister dst, Address src, int vector_len); void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len); + void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len); + void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len); + void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len); // Move lower 64bit to high 64bit in 128bit register void movlhps(XMMRegister dst, XMMRegister src); void movl(Register dst, int32_t imm32);
*** 1547,1556 **** --- 1621,1633 ---- #endif // Move Quadword void movq(Address dst, XMMRegister src); void movq(XMMRegister dst, Address src); + void movq(XMMRegister dst, XMMRegister src); + void movq(Register dst, XMMRegister src); + void movq(XMMRegister dst, Register src); void movsbl(Register dst, Address src); void movsbl(Register dst, Register src); #ifdef _LP64
*** 1627,1636 **** --- 1704,1715 ---- void btsq(Address dst, int imm8); void btrq(Address dst, int imm8); #endif + void orw(Register dst, Register src); + void orl(Address dst, int32_t imm32); void orl(Register dst, int32_t imm32); void orl(Register dst, Address src); void orl(Register dst, Register src); void orl(Address dst, Register src);
*** 1640,1660 **** void orq(Address dst, int32_t imm32); void orq(Register dst, int32_t imm32); void orq(Register dst, Address src); void orq(Register dst, Register src); // Pack with unsigned saturation void packuswb(XMMRegister dst, XMMRegister src); void packuswb(XMMRegister dst, Address src); void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); ! // Pemutation of 64bit words void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len); void vpermq(XMMRegister dst, XMMRegister src, int imm8); void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void pause(); // Undefined Instruction --- 1719,1754 ---- void orq(Address dst, int32_t imm32); void orq(Register dst, int32_t imm32); void orq(Register dst, Address src); void orq(Register dst, Register src); + // Pack with signed saturation + void packsswb(XMMRegister dst, XMMRegister src); + void vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void packssdw(XMMRegister dst, XMMRegister src); + void vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + // Pack with unsigned saturation void packuswb(XMMRegister dst, XMMRegister src); void packuswb(XMMRegister dst, Address src); + void packusdw(XMMRegister dst, XMMRegister src); void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); ! // Permutations void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len); void vpermq(XMMRegister dst, XMMRegister src, int imm8); void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); + void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len); + void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len); + void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len); void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void pause(); // Undefined Instruction
*** 1663,1677 **** --- 1757,1774 ---- // SSE4.2 string instructions void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8); void pcmpestri(XMMRegister xmm1, Address src, int imm8); void pcmpeqb(XMMRegister dst, XMMRegister src); + void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len); + void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len); void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len); + void vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len); void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len); void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len); void evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate of, int vector_len);
*** 1680,1741 **** void pcmpeqw(XMMRegister dst, XMMRegister src); void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len); void pcmpeqd(XMMRegister dst, XMMRegister src); void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); ! void evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len); ! void evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len); void pcmpeqq(XMMRegister dst, XMMRegister src); void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len); void pmovmskb(Register dst, XMMRegister src); void vpmovmskb(Register dst, XMMRegister src); // SSE 4.1 extract void pextrd(Register dst, XMMRegister src, int imm8); void pextrq(Register dst, XMMRegister src, int imm8); void pextrd(Address dst, XMMRegister src, int imm8); void pextrq(Address dst, XMMRegister src, int imm8); void pextrb(Address dst, XMMRegister src, int imm8); // SSE 2 extract void pextrw(Register dst, XMMRegister src, int imm8); void pextrw(Address dst, XMMRegister src, int imm8); // SSE 4.1 insert void pinsrd(XMMRegister dst, Register src, int imm8); void pinsrq(XMMRegister dst, Register src, int imm8); void pinsrd(XMMRegister dst, Address src, int imm8); void pinsrq(XMMRegister dst, Address src, int imm8); void pinsrb(XMMRegister dst, Address src, int imm8); // SSE 2 insert void pinsrw(XMMRegister dst, Register src, int imm8); void pinsrw(XMMRegister dst, Address src, int imm8); ! // SSE4.1 packed move void pmovzxbw(XMMRegister dst, XMMRegister src); void pmovzxbw(XMMRegister dst, Address src); ! void vpmovzxbw( XMMRegister dst, Address src, int vector_len); void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len); void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len); void evpmovwb(Address dst, XMMRegister src, int vector_len); void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len); void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len); void evpmovdb(Address dst, XMMRegister src, int vector_len); - // Sign extend moves - void pmovsxbw(XMMRegister dst, XMMRegister src); - void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len); - // Multiply add void pmaddwd(XMMRegister dst, XMMRegister src); void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); // Multiply add accumulate void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); --- 1777,1866 ---- void pcmpeqw(XMMRegister dst, XMMRegister src); void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len); + void vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void pcmpeqd(XMMRegister dst, XMMRegister src); void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); ! void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len); ! void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len); void pcmpeqq(XMMRegister dst, XMMRegister src); + void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len); void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len); + void pcmpgtq(XMMRegister dst, XMMRegister src); + void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void pmovmskb(Register dst, XMMRegister src); void vpmovmskb(Register dst, XMMRegister src); // SSE 4.1 extract void pextrd(Register dst, XMMRegister src, int imm8); void pextrq(Register dst, XMMRegister src, int imm8); void pextrd(Address dst, XMMRegister src, int imm8); void pextrq(Address dst, XMMRegister src, int imm8); + void pextrb(Register dst, XMMRegister src, int imm8); void pextrb(Address dst, XMMRegister src, int imm8); // SSE 2 extract void pextrw(Register dst, XMMRegister src, int imm8); void pextrw(Address dst, XMMRegister src, int imm8); // SSE 4.1 insert void pinsrd(XMMRegister dst, Register src, int imm8); void pinsrq(XMMRegister dst, Register src, int imm8); + void pinsrb(XMMRegister dst, Register src, int imm8); void pinsrd(XMMRegister dst, Address src, int imm8); void pinsrq(XMMRegister dst, Address src, int imm8); void pinsrb(XMMRegister dst, Address src, int imm8); + void insertps(XMMRegister dst, XMMRegister src, int imm8); // SSE 2 insert void pinsrw(XMMRegister dst, Register src, int imm8); void pinsrw(XMMRegister dst, Address src, int imm8); ! // AVX insert ! void vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8); ! void vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8); ! void vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8); ! void vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8); ! void vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); ! ! // Zero extend moves void pmovzxbw(XMMRegister dst, XMMRegister src); void pmovzxbw(XMMRegister dst, Address src); ! void pmovzxbd(XMMRegister dst, XMMRegister src); void vpmovzxbw( XMMRegister dst, Address src, int vector_len); + void pmovzxdq(XMMRegister dst, XMMRegister src); void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len); + void vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len); + void vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len); + void vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len); void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len); + // Sign extend moves + void pmovsxbd(XMMRegister dst, XMMRegister src); + void pmovsxbq(XMMRegister dst, XMMRegister src); + void pmovsxbw(XMMRegister dst, XMMRegister src); + void pmovsxwd(XMMRegister dst, XMMRegister src); + void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len); + void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len); + void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len); + void vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len); + void vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len); + void vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len); + void evpmovwb(Address dst, XMMRegister src, int vector_len); void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len); void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len); void evpmovdb(Address dst, XMMRegister src, int vector_len); // Multiply add void pmaddwd(XMMRegister dst, XMMRegister src); void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); // Multiply add accumulate void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
*** 1775,1788 **** // Shuffle Packed Doublewords void pshufd(XMMRegister dst, XMMRegister src, int mode); void pshufd(XMMRegister dst, Address src, int mode); void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len); ! // Shuffle Packed Low Words void pshuflw(XMMRegister dst, XMMRegister src, int mode); void pshuflw(XMMRegister dst, Address src, int mode); // Shuffle packed values at 128 bit granularity void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len); // Shift Right by bytes Logical DoubleQuadword Immediate void psrldq(XMMRegister dst, int shift); --- 1900,1920 ---- // Shuffle Packed Doublewords void pshufd(XMMRegister dst, XMMRegister src, int mode); void pshufd(XMMRegister dst, Address src, int mode); void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len); ! // Shuffle Packed High/Low Words ! void pshufhw(XMMRegister dst, XMMRegister src, int mode); void pshuflw(XMMRegister dst, XMMRegister src, int mode); void pshuflw(XMMRegister dst, Address src, int mode); + //shuffle floats and doubles + void pshufps(XMMRegister, XMMRegister, int); + void pshufpd(XMMRegister, XMMRegister, int); + void vpshufps(XMMRegister, XMMRegister, XMMRegister, int, int); + void vpshufpd(XMMRegister, XMMRegister, XMMRegister, int, int); + // Shuffle packed values at 128 bit granularity void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len); // Shift Right by bytes Logical DoubleQuadword Immediate void psrldq(XMMRegister dst, int shift);
*** 1794,1803 **** --- 1926,1938 ---- void ptest(XMMRegister dst, Address src); // Logical Compare 256bit void vptest(XMMRegister dst, XMMRegister src); void vptest(XMMRegister dst, Address src); + // Vector compare + void vptest(XMMRegister dst, XMMRegister src, int vector_len); + // Interleave Low Bytes void punpcklbw(XMMRegister dst, XMMRegister src); void punpcklbw(XMMRegister dst, Address src); // Interleave Low Doublewords
*** 1856,1865 **** --- 1991,2001 ---- void palignr(XMMRegister dst, XMMRegister src, int imm8); void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len); void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8); void pblendw(XMMRegister dst, XMMRegister src, int imm8); + void vblendps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len); void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8); void sha1nexte(XMMRegister dst, XMMRegister src); void sha1msg1(XMMRegister dst, XMMRegister src); void sha1msg2(XMMRegister dst, XMMRegister src);
*** 1974,1983 **** --- 2110,2120 ---- void xorl(Register dst, int32_t imm32); void xorl(Register dst, Address src); void xorl(Register dst, Register src); void xorb(Register dst, Address src); + void xorw(Register dst, Register src); void xorq(Register dst, Address src); void xorq(Register dst, Register src); void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
*** 2010,2019 **** --- 2147,2158 ---- void shlxl(Register dst, Register src1, Register src2); void shlxq(Register dst, Register src1, Register src2); //====================VECTOR ARITHMETIC===================================== + void evpmovd2m(KRegister kdst, XMMRegister src, int vector_len); + void evpmovq2m(KRegister kdst, XMMRegister src, int vector_len); // Add Packed Floating-Point Values void addpd(XMMRegister dst, XMMRegister src); void addpd(XMMRegister dst, Address src); void addps(XMMRegister dst, XMMRegister src);
*** 2119,2135 **** --- 2258,2302 ---- void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len); // Multiply packed integers (only shorts and ints) void pmullw(XMMRegister dst, XMMRegister src); void pmulld(XMMRegister dst, XMMRegister src); + void pmuludq(XMMRegister dst, XMMRegister src); void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + // Minimum of packed integers + void pminsb(XMMRegister dst, XMMRegister src); + void vpminsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void pminsw(XMMRegister dst, XMMRegister src); + void vpminsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void pminsd(XMMRegister dst, XMMRegister src); + void vpminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void vpminsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void minps(XMMRegister dst, XMMRegister src); + void vminps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void minpd(XMMRegister dst, XMMRegister src); + void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + + // Maximum of packed integers + void pmaxsb(XMMRegister dst, XMMRegister src); + void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void pmaxsw(XMMRegister dst, XMMRegister src); + void vpmaxsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void pmaxsd(XMMRegister dst, XMMRegister src); + void vpmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void vpmaxsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void maxps(XMMRegister dst, XMMRegister src); + void vmaxps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void maxpd(XMMRegister dst, XMMRegister src); + void vmaxpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + // Shift left packed integers void psllw(XMMRegister dst, int shift); void pslld(XMMRegister dst, int shift); void psllq(XMMRegister dst, int shift); void psllw(XMMRegister dst, XMMRegister shift);
*** 2167,2186 **** --- 2334,2367 ---- void psrad(XMMRegister dst, XMMRegister shift); void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len); void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len); void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + void evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len); void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + // Variable shift left packed integers + void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + + // Variable shift right packed integers + void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + + // Variable shift right arithmetic packed integers + void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); // And packed integers void pand(XMMRegister dst, XMMRegister src); void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); // Andn packed integers void pandn(XMMRegister dst, XMMRegister src); void vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
*** 2189,2202 **** --- 2370,2388 ---- void por(XMMRegister dst, XMMRegister src); void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + // Xor packed integers void pxor(XMMRegister dst, XMMRegister src); void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len); // Ternary logic instruction. void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
*** 2260,2270 **** void evpbroadcastb(XMMRegister dst, Register src, int vector_len); void evpbroadcastw(XMMRegister dst, Register src, int vector_len); void evpbroadcastd(XMMRegister dst, Register src, int vector_len); void evpbroadcastq(XMMRegister dst, Register src, int vector_len); ! void evpgatherdd(XMMRegister dst, KRegister k1, Address src, int vector_len); // Carry-Less Multiplication Quadword void pclmulqdq(XMMRegister dst, XMMRegister src, int mask); void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask); void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len); --- 2446,2470 ---- void evpbroadcastb(XMMRegister dst, Register src, int vector_len); void evpbroadcastw(XMMRegister dst, Register src, int vector_len); void evpbroadcastd(XMMRegister dst, Register src, int vector_len); void evpbroadcastq(XMMRegister dst, Register src, int vector_len); ! // Gather AVX2 and AVX3 ! void vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len); ! void vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len); ! void vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len); ! void vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len); ! void evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len); ! void evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len); ! void evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len); ! void evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len); ! ! //Scatter AVX3 only ! void evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len); ! void evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len); ! void evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len); ! void evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len); // Carry-Less Multiplication Quadword void pclmulqdq(XMMRegister dst, XMMRegister src, int mask); void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask); void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
*** 2273,2290 **** // penalty if legacy SSE instructions are encoded using VEX prefix because // they always clear upper 128 bits. It should be used before calling // runtime code and native libraries. void vzeroupper(); ! // AVX support for vectorized conditional move (float/double). The following two instructions used only coupled. void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len); ! void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len); ! void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len); ! void cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len); ! void blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len); void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len); ! protected: // Next instructions require address alignment 16 bytes SSE mode. // They should be called only from corresponding MacroAssembler instructions. void andpd(XMMRegister dst, Address src); void andps(XMMRegister dst, Address src); --- 2473,2532 ---- // penalty if legacy SSE instructions are encoded using VEX prefix because // they always clear upper 128 bits. It should be used before calling // runtime code and native libraries. void vzeroupper(); ! // Vector double compares ! void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len); ! void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ! ComparisonPredicateFP comparison, int vector_len); ! ! // Vector float compares ! void vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len); ! void evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ! ComparisonPredicateFP comparison, int vector_len); ! ! // Vector integer compares ! void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); ! void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ! int comparison, int vector_len); ! void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, ! int comparison, int vector_len); ! ! // Vector long compares ! void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ! int comparison, int vector_len); ! void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src, ! int comparison, int vector_len); ! ! // Vector byte compares ! void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ! int comparison, int vector_len); ! void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, ! int comparison, int vector_len); ! ! // Vector short compares ! void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ! int comparison, int vector_len); ! void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src, ! int comparison, int vector_len); ! ! // Vector blends ! void blendvps(XMMRegister dst, XMMRegister src); ! void blendvpd(XMMRegister dst, XMMRegister src); ! void pblendvb(XMMRegister dst, XMMRegister src); void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len); ! void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len); ! void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len); ! void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len); void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len); ! void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); ! void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); ! void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); ! void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); ! void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); ! void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); protected: // Next instructions require address alignment 16 bytes SSE mode. // They should be called only from corresponding MacroAssembler instructions. void andpd(XMMRegister dst, Address src); void andps(XMMRegister dst, Address src);
*** 2376,2386 **** void set_is_evex_instruction(void) { _is_evex_instruction = true; } // Internal encoding data used in compressed immediate offset programming void set_evex_encoding(int value) { _evex_encoding = value; } ! // Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components void reset_is_clear_context(void) { _is_clear_context = false; } // Map back to current asembler so that we can manage object level assocation void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; } --- 2618,2629 ---- void set_is_evex_instruction(void) { _is_evex_instruction = true; } // Internal encoding data used in compressed immediate offset programming void set_evex_encoding(int value) { _evex_encoding = value; } ! // When the Evex.Z field is set (true), it is used to clear all non directed XMM/YMM/ZMM components. ! // This method unsets it so that merge semantics are used instead. void reset_is_clear_context(void) { _is_clear_context = false; } // Map back to current asembler so that we can manage object level assocation void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
< prev index next >