< prev index next >
src/hotspot/cpu/x86/assembler_x86.hpp
Print this page
rev 60516 : manual merge with default
@@ -618,10 +618,11 @@
narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop
_WhichOperand_limit = 4
#endif
};
+ // Comparison predicates for integral types & FP types when using SSE
enum ComparisonPredicate {
eq = 0,
lt = 1,
le = 2,
_false = 3,
@@ -629,10 +630,55 @@
nlt = 5,
nle = 6,
_true = 7
};
+ // Comparison predicates for FP types when using AVX
+ // O means ordered. U is unordered. When using ordered, any NaN comparison is false. Otherwise, it is true.
+ // S means signaling. Q means non-signaling. When signaling is true, instruction signals #IA on NaN.
+ enum ComparisonPredicateFP {
+ EQ_OQ = 0,
+ LT_OS = 1,
+ LE_OS = 2,
+ UNORD_Q = 3,
+ NEQ_UQ = 4,
+ NLT_US = 5,
+ NLE_US = 6,
+ ORD_Q = 7,
+ EQ_UQ = 8,
+ NGE_US = 9,
+ NGT_US = 0xA,
+ FALSE_OQ = 0XB,
+ NEQ_OQ = 0xC,
+ GE_OS = 0xD,
+ GT_OS = 0xE,
+ TRUE_UQ = 0xF,
+ EQ_OS = 0x10,
+ LT_OQ = 0x11,
+ LE_OQ = 0x12,
+ UNORD_S = 0x13,
+ NEQ_US = 0x14,
+ NLT_UQ = 0x15,
+ NLE_UQ = 0x16,
+ ORD_S = 0x17,
+ EQ_US = 0x18,
+ NGE_UQ = 0x19,
+ NGT_UQ = 0x1A,
+ FALSE_OS = 0x1B,
+ NEQ_OS = 0x1C,
+ GE_OQ = 0x1D,
+ GT_OQ = 0x1E,
+ TRUE_US =0x1F
+ };
+
+ enum Width {
+ B = 0,
+ W = 1,
+ D = 2,
+ Q = 3
+ };
+
//---< calculate length of instruction >---
// As instruction size can't be found out easily on x86/x64,
// we just use '4' for len and maxlen.
// instruction must start at passed address
static unsigned int instr_len(unsigned char *instr) { return 4; }
@@ -948,10 +994,11 @@
void adcq(Register dst, int32_t imm32);
void adcq(Register dst, Address src);
void adcq(Register dst, Register src);
void addb(Address dst, int imm8);
+ void addw(Register dst, Register src);
void addw(Address dst, int imm16);
void addl(Address dst, int32_t imm32);
void addl(Address dst, Register src);
void addl(Register dst, int32_t imm32);
@@ -998,10 +1045,12 @@
void vaesenc(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void andw(Register dst, Register src);
+
void andl(Address dst, int32_t imm32);
void andl(Register dst, int32_t imm32);
void andl(Register dst, Address src);
void andl(Register dst, Register src);
@@ -1123,13 +1172,15 @@
void cvtsi2ssq(XMMRegister dst, Register src);
void cvtsi2ssq(XMMRegister dst, Address src);
// Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
void cvtdq2pd(XMMRegister dst, XMMRegister src);
+ void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len);
// Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
void cvtdq2ps(XMMRegister dst, XMMRegister src);
+ void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len);
// Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
void cvtss2sd(XMMRegister dst, XMMRegister src);
void cvtss2sd(XMMRegister dst, Address src);
@@ -1141,12 +1192,29 @@
// Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
void cvttss2sil(Register dst, XMMRegister src);
void cvttss2siq(Register dst, XMMRegister src);
+ // Convert vector double to int
void cvttpd2dq(XMMRegister dst, XMMRegister src);
+ // Convert vector float and double
+ void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
+
+ // Convert vector long to vector FP
+ void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
+ void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
+
+ // Evex casts with truncation
+ void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovdb(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovqd(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);
+
//Abs of packed Integer values
void pabsb(XMMRegister dst, XMMRegister src);
void pabsw(XMMRegister dst, XMMRegister src);
void pabsd(XMMRegister dst, XMMRegister src);
void vpabsb(XMMRegister dst, XMMRegister src, int vector_len);
@@ -1502,24 +1570,30 @@
void vmovdqu(Address dst, XMMRegister src);
void vmovdqu(XMMRegister dst, Address src);
void vmovdqu(XMMRegister dst, XMMRegister src);
// Move Unaligned 512bit Vector
- void evmovdqub(Address dst, XMMRegister src, int vector_len);
- void evmovdqub(XMMRegister dst, Address src, int vector_len);
- void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
- void evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len);
- void evmovdquw(Address dst, XMMRegister src, int vector_len);
- void evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len);
- void evmovdquw(XMMRegister dst, Address src, int vector_len);
- void evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len);
+ void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len);
+ void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len);
+ void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len);
+ void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
+ void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len);
+ void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
+ void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len);
+ void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
void evmovdqul(Address dst, XMMRegister src, int vector_len);
void evmovdqul(XMMRegister dst, Address src, int vector_len);
void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
+ void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
+ void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
+ void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evmovdquq(Address dst, XMMRegister src, int vector_len);
void evmovdquq(XMMRegister dst, Address src, int vector_len);
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
+ void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
+ void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
+ void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
// Move lower 64bit to high 64bit in 128bit register
void movlhps(XMMRegister dst, XMMRegister src);
void movl(Register dst, int32_t imm32);
@@ -1547,10 +1621,13 @@
#endif
// Move Quadword
void movq(Address dst, XMMRegister src);
void movq(XMMRegister dst, Address src);
+ void movq(XMMRegister dst, XMMRegister src);
+ void movq(Register dst, XMMRegister src);
+ void movq(XMMRegister dst, Register src);
void movsbl(Register dst, Address src);
void movsbl(Register dst, Register src);
#ifdef _LP64
@@ -1627,10 +1704,12 @@
void btsq(Address dst, int imm8);
void btrq(Address dst, int imm8);
#endif
+ void orw(Register dst, Register src);
+
void orl(Address dst, int32_t imm32);
void orl(Register dst, int32_t imm32);
void orl(Register dst, Address src);
void orl(Register dst, Register src);
void orl(Address dst, Register src);
@@ -1640,21 +1719,36 @@
void orq(Address dst, int32_t imm32);
void orq(Register dst, int32_t imm32);
void orq(Register dst, Address src);
void orq(Register dst, Register src);
+ // Pack with signed saturation
+ void packsswb(XMMRegister dst, XMMRegister src);
+ void vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void packssdw(XMMRegister dst, XMMRegister src);
+ void vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
// Pack with unsigned saturation
void packuswb(XMMRegister dst, XMMRegister src);
void packuswb(XMMRegister dst, Address src);
+ void packusdw(XMMRegister dst, XMMRegister src);
void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
- // Pemutation of 64bit words
+ // Permutations
void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void vpermq(XMMRegister dst, XMMRegister src, int imm8);
void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+ void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
+ void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
+ void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
+ void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void pause();
// Undefined Instruction
@@ -1663,15 +1757,18 @@
// SSE4.2 string instructions
void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
void pcmpestri(XMMRegister xmm1, Address src, int imm8);
void pcmpeqb(XMMRegister dst, XMMRegister src);
+ void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
+
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
+ void vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
void evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate of, int vector_len);
@@ -1680,62 +1777,90 @@
void pcmpeqw(XMMRegister dst, XMMRegister src);
void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
+ void vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
void pcmpeqd(XMMRegister dst, XMMRegister src);
void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
- void evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
- void evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len);
+ void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
+ void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
void pcmpeqq(XMMRegister dst, XMMRegister src);
+ void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
+ void pcmpgtq(XMMRegister dst, XMMRegister src);
+ void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
void pmovmskb(Register dst, XMMRegister src);
void vpmovmskb(Register dst, XMMRegister src);
// SSE 4.1 extract
void pextrd(Register dst, XMMRegister src, int imm8);
void pextrq(Register dst, XMMRegister src, int imm8);
void pextrd(Address dst, XMMRegister src, int imm8);
void pextrq(Address dst, XMMRegister src, int imm8);
+ void pextrb(Register dst, XMMRegister src, int imm8);
void pextrb(Address dst, XMMRegister src, int imm8);
// SSE 2 extract
void pextrw(Register dst, XMMRegister src, int imm8);
void pextrw(Address dst, XMMRegister src, int imm8);
// SSE 4.1 insert
void pinsrd(XMMRegister dst, Register src, int imm8);
void pinsrq(XMMRegister dst, Register src, int imm8);
+ void pinsrb(XMMRegister dst, Register src, int imm8);
void pinsrd(XMMRegister dst, Address src, int imm8);
void pinsrq(XMMRegister dst, Address src, int imm8);
void pinsrb(XMMRegister dst, Address src, int imm8);
+ void insertps(XMMRegister dst, XMMRegister src, int imm8);
// SSE 2 insert
void pinsrw(XMMRegister dst, Register src, int imm8);
void pinsrw(XMMRegister dst, Address src, int imm8);
- // SSE4.1 packed move
+ // AVX insert
+ void vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8);
+ void vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8);
+ void vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8);
+ void vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8);
+ void vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
+
+ // Zero extend moves
void pmovzxbw(XMMRegister dst, XMMRegister src);
void pmovzxbw(XMMRegister dst, Address src);
-
+ void pmovzxbd(XMMRegister dst, XMMRegister src);
void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
+ void pmovzxdq(XMMRegister dst, XMMRegister src);
void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len);
void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
+ // Sign extend moves
+ void pmovsxbd(XMMRegister dst, XMMRegister src);
+ void pmovsxbq(XMMRegister dst, XMMRegister src);
+ void pmovsxbw(XMMRegister dst, XMMRegister src);
+ void pmovsxwd(XMMRegister dst, XMMRegister src);
+ void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len);
+
void evpmovwb(Address dst, XMMRegister src, int vector_len);
void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);
void evpmovdb(Address dst, XMMRegister src, int vector_len);
- // Sign extend moves
- void pmovsxbw(XMMRegister dst, XMMRegister src);
- void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
-
// Multiply add
void pmaddwd(XMMRegister dst, XMMRegister src);
void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Multiply add accumulate
void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@@ -1775,14 +1900,21 @@
// Shuffle Packed Doublewords
void pshufd(XMMRegister dst, XMMRegister src, int mode);
void pshufd(XMMRegister dst, Address src, int mode);
void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len);
- // Shuffle Packed Low Words
+ // Shuffle Packed High/Low Words
+ void pshufhw(XMMRegister dst, XMMRegister src, int mode);
void pshuflw(XMMRegister dst, XMMRegister src, int mode);
void pshuflw(XMMRegister dst, Address src, int mode);
+ //shuffle floats and doubles
+ void pshufps(XMMRegister, XMMRegister, int);
+ void pshufpd(XMMRegister, XMMRegister, int);
+ void vpshufps(XMMRegister, XMMRegister, XMMRegister, int, int);
+ void vpshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);
+
// Shuffle packed values at 128 bit granularity
void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
// Shift Right by bytes Logical DoubleQuadword Immediate
void psrldq(XMMRegister dst, int shift);
@@ -1794,10 +1926,13 @@
void ptest(XMMRegister dst, Address src);
// Logical Compare 256bit
void vptest(XMMRegister dst, XMMRegister src);
void vptest(XMMRegister dst, Address src);
+ // Vector compare
+ void vptest(XMMRegister dst, XMMRegister src, int vector_len);
+
// Interleave Low Bytes
void punpcklbw(XMMRegister dst, XMMRegister src);
void punpcklbw(XMMRegister dst, Address src);
// Interleave Low Doublewords
@@ -1856,10 +1991,11 @@
void palignr(XMMRegister dst, XMMRegister src, int imm8);
void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void pblendw(XMMRegister dst, XMMRegister src, int imm8);
+ void vblendps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
void sha1nexte(XMMRegister dst, XMMRegister src);
void sha1msg1(XMMRegister dst, XMMRegister src);
void sha1msg2(XMMRegister dst, XMMRegister src);
@@ -1974,10 +2110,11 @@
void xorl(Register dst, int32_t imm32);
void xorl(Register dst, Address src);
void xorl(Register dst, Register src);
void xorb(Register dst, Address src);
+ void xorw(Register dst, Register src);
void xorq(Register dst, Address src);
void xorq(Register dst, Register src);
void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
@@ -2010,10 +2147,12 @@
void shlxl(Register dst, Register src1, Register src2);
void shlxq(Register dst, Register src1, Register src2);
//====================VECTOR ARITHMETIC=====================================
+ void evpmovd2m(KRegister kdst, XMMRegister src, int vector_len);
+ void evpmovq2m(KRegister kdst, XMMRegister src, int vector_len);
// Add Packed Floating-Point Values
void addpd(XMMRegister dst, XMMRegister src);
void addpd(XMMRegister dst, Address src);
void addps(XMMRegister dst, XMMRegister src);
@@ -2119,17 +2258,45 @@
void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
// Multiply packed integers (only shorts and ints)
void pmullw(XMMRegister dst, XMMRegister src);
void pmulld(XMMRegister dst, XMMRegister src);
+ void pmuludq(XMMRegister dst, XMMRegister src);
void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+ // Minimum of packed integers
+ void pminsb(XMMRegister dst, XMMRegister src);
+ void vpminsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void pminsw(XMMRegister dst, XMMRegister src);
+ void vpminsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void pminsd(XMMRegister dst, XMMRegister src);
+ void vpminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void vpminsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void minps(XMMRegister dst, XMMRegister src);
+ void vminps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void minpd(XMMRegister dst, XMMRegister src);
+ void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+
+ // Maximum of packed integers
+ void pmaxsb(XMMRegister dst, XMMRegister src);
+ void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void pmaxsw(XMMRegister dst, XMMRegister src);
+ void vpmaxsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void pmaxsd(XMMRegister dst, XMMRegister src);
+ void vpmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void vpmaxsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void maxps(XMMRegister dst, XMMRegister src);
+ void vmaxps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void maxpd(XMMRegister dst, XMMRegister src);
+ void vmaxpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+
// Shift left packed integers
void psllw(XMMRegister dst, int shift);
void pslld(XMMRegister dst, int shift);
void psllq(XMMRegister dst, int shift);
void psllw(XMMRegister dst, XMMRegister shift);
@@ -2167,20 +2334,34 @@
void psrad(XMMRegister dst, XMMRegister shift);
void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ // Variable shift left packed integers
+ void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+
+ // Variable shift right packed integers
+ void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+
+ // Variable shift right arithmetic packed integers
+ void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+
void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
// And packed integers
void pand(XMMRegister dst, XMMRegister src);
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+ void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Andn packed integers
void pandn(XMMRegister dst, XMMRegister src);
void vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@@ -2189,18 +2370,22 @@
void por(XMMRegister dst, XMMRegister src);
void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
+
// Xor packed integers
void pxor(XMMRegister dst, XMMRegister src);
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+ void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
-
// vinserti forms
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
@@ -2255,11 +2440,25 @@
void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
- void evpgatherdd(XMMRegister dst, KRegister k1, Address src, int vector_len);
+ // Gather AVX2 and AVX3
+ void vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
+ void vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
+ void vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
+ void vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
+ void evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len);
+ void evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len);
+ void evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len);
+ void evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len);
+
+ //Scatter AVX3 only
+ void evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len);
+ void evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len);
+ void evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len);
+ void evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len);
// Carry-Less Multiplication Quadword
void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
@@ -2268,17 +2467,59 @@
// penalty if legacy SSE instructions are encoded using VEX prefix because
// they always clear upper 128 bits. It should be used before calling
// runtime code and native libraries.
void vzeroupper();
- // AVX support for vectorized conditional move (float/double). The following two instructions used only coupled.
- void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
- void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
- void cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
- void blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
+ // Vector double compares
+ void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
+ void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ ComparisonPredicateFP comparison, int vector_len);
+
+ // Vector float compares
+ void vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len);
+ void evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ ComparisonPredicateFP comparison, int vector_len);
+
+ // Vector integer compares
+ void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len);
+ void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len);
+
+ // Vector long compares
+ void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len);
+ void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len);
+
+ // Vector byte compares
+ void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len);
+ void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len);
+
+ // Vector short compares
+ void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len);
+ void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len);
+
+ // Vector blends
+ void blendvps(XMMRegister dst, XMMRegister src);
+ void blendvpd(XMMRegister dst, XMMRegister src);
+ void pblendvb(XMMRegister dst, XMMRegister src);
+ void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
+ void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
+ void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
-
+ void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
protected:
// Next instructions require address alignment 16 bytes SSE mode.
// They should be called only from corresponding MacroAssembler instructions.
void andpd(XMMRegister dst, Address src);
void andps(XMMRegister dst, Address src);
@@ -2370,11 +2611,12 @@
void set_is_evex_instruction(void) { _is_evex_instruction = true; }
// Internal encoding data used in compressed immediate offset programming
void set_evex_encoding(int value) { _evex_encoding = value; }
- // Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components
+ // When the Evex.Z field is set (true), it is used to clear all non directed XMM/YMM/ZMM components.
+ // This method unsets it so that merge semantics are used instead.
void reset_is_clear_context(void) { _is_clear_context = false; }
// Map back to current asembler so that we can manage object level assocation
void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
< prev index next >