< prev index next >
src/hotspot/cpu/x86/assembler_x86.hpp
Print this page
rev 60516 : manual merge with default
*** 618,627 ****
--- 618,628 ----
narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop
_WhichOperand_limit = 4
#endif
};
+ // Comparison predicates for integral types & FP types when using SSE
enum ComparisonPredicate {
eq = 0,
lt = 1,
le = 2,
_false = 3,
*** 629,638 ****
--- 630,684 ----
nlt = 5,
nle = 6,
_true = 7
};
+ // Comparison predicates for FP types when using AVX
+ // O means ordered. U is unordered. When using ordered, any NaN comparison is false. Otherwise, it is true.
+ // S means signaling. Q means non-signaling. When signaling is true, instruction signals #IA on NaN.
+ enum ComparisonPredicateFP {
+ EQ_OQ = 0,
+ LT_OS = 1,
+ LE_OS = 2,
+ UNORD_Q = 3,
+ NEQ_UQ = 4,
+ NLT_US = 5,
+ NLE_US = 6,
+ ORD_Q = 7,
+ EQ_UQ = 8,
+ NGE_US = 9,
+ NGT_US = 0xA,
+ FALSE_OQ = 0XB,
+ NEQ_OQ = 0xC,
+ GE_OS = 0xD,
+ GT_OS = 0xE,
+ TRUE_UQ = 0xF,
+ EQ_OS = 0x10,
+ LT_OQ = 0x11,
+ LE_OQ = 0x12,
+ UNORD_S = 0x13,
+ NEQ_US = 0x14,
+ NLT_UQ = 0x15,
+ NLE_UQ = 0x16,
+ ORD_S = 0x17,
+ EQ_US = 0x18,
+ NGE_UQ = 0x19,
+ NGT_UQ = 0x1A,
+ FALSE_OS = 0x1B,
+ NEQ_OS = 0x1C,
+ GE_OQ = 0x1D,
+ GT_OQ = 0x1E,
+ TRUE_US =0x1F
+ };
+
+ enum Width {
+ B = 0,
+ W = 1,
+ D = 2,
+ Q = 3
+ };
+
//---< calculate length of instruction >---
// As instruction size can't be found out easily on x86/x64,
// we just use '4' for len and maxlen.
// instruction must start at passed address
static unsigned int instr_len(unsigned char *instr) { return 4; }
*** 948,957 ****
--- 994,1004 ----
void adcq(Register dst, int32_t imm32);
void adcq(Register dst, Address src);
void adcq(Register dst, Register src);
void addb(Address dst, int imm8);
+ void addw(Register dst, Register src);
void addw(Address dst, int imm16);
void addl(Address dst, int32_t imm32);
void addl(Address dst, Register src);
void addl(Register dst, int32_t imm32);
*** 998,1007 ****
--- 1045,1056 ----
void vaesenc(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void andw(Register dst, Register src);
+
void andl(Address dst, int32_t imm32);
void andl(Register dst, int32_t imm32);
void andl(Register dst, Address src);
void andl(Register dst, Register src);
*** 1123,1135 ****
--- 1172,1186 ----
void cvtsi2ssq(XMMRegister dst, Register src);
void cvtsi2ssq(XMMRegister dst, Address src);
// Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
void cvtdq2pd(XMMRegister dst, XMMRegister src);
+ void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len);
// Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
void cvtdq2ps(XMMRegister dst, XMMRegister src);
+ void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len);
// Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
void cvtss2sd(XMMRegister dst, XMMRegister src);
void cvtss2sd(XMMRegister dst, Address src);
*** 1141,1152 ****
--- 1192,1220 ----
// Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
void cvttss2sil(Register dst, XMMRegister src);
void cvttss2siq(Register dst, XMMRegister src);
+ // Convert vector double to int
void cvttpd2dq(XMMRegister dst, XMMRegister src);
+ // Convert vector float and double
+ void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
+
+ // Convert vector long to vector FP
+ void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
+ void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
+
+ // Evex casts with truncation
+ void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovdb(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovqd(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);
+
//Abs of packed Integer values
void pabsb(XMMRegister dst, XMMRegister src);
void pabsw(XMMRegister dst, XMMRegister src);
void pabsd(XMMRegister dst, XMMRegister src);
void vpabsb(XMMRegister dst, XMMRegister src, int vector_len);
*** 1502,1525 ****
void vmovdqu(Address dst, XMMRegister src);
void vmovdqu(XMMRegister dst, Address src);
void vmovdqu(XMMRegister dst, XMMRegister src);
// Move Unaligned 512bit Vector
! void evmovdqub(Address dst, XMMRegister src, int vector_len);
! void evmovdqub(XMMRegister dst, Address src, int vector_len);
! void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
! void evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len);
! void evmovdquw(Address dst, XMMRegister src, int vector_len);
! void evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len);
! void evmovdquw(XMMRegister dst, Address src, int vector_len);
! void evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len);
void evmovdqul(Address dst, XMMRegister src, int vector_len);
void evmovdqul(XMMRegister dst, Address src, int vector_len);
void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
void evmovdquq(Address dst, XMMRegister src, int vector_len);
void evmovdquq(XMMRegister dst, Address src, int vector_len);
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
// Move lower 64bit to high 64bit in 128bit register
void movlhps(XMMRegister dst, XMMRegister src);
void movl(Register dst, int32_t imm32);
--- 1570,1599 ----
void vmovdqu(Address dst, XMMRegister src);
void vmovdqu(XMMRegister dst, Address src);
void vmovdqu(XMMRegister dst, XMMRegister src);
// Move Unaligned 512bit Vector
! void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len);
! void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len);
! void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len);
! void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
! void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len);
! void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
! void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len);
! void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
void evmovdqul(Address dst, XMMRegister src, int vector_len);
void evmovdqul(XMMRegister dst, Address src, int vector_len);
void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
+ void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
+ void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
+ void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evmovdquq(Address dst, XMMRegister src, int vector_len);
void evmovdquq(XMMRegister dst, Address src, int vector_len);
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
+ void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
+ void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
+ void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
// Move lower 64bit to high 64bit in 128bit register
void movlhps(XMMRegister dst, XMMRegister src);
void movl(Register dst, int32_t imm32);
*** 1547,1556 ****
--- 1621,1633 ----
#endif
// Move Quadword
void movq(Address dst, XMMRegister src);
void movq(XMMRegister dst, Address src);
+ void movq(XMMRegister dst, XMMRegister src);
+ void movq(Register dst, XMMRegister src);
+ void movq(XMMRegister dst, Register src);
void movsbl(Register dst, Address src);
void movsbl(Register dst, Register src);
#ifdef _LP64
*** 1627,1636 ****
--- 1704,1715 ----
void btsq(Address dst, int imm8);
void btrq(Address dst, int imm8);
#endif
+ void orw(Register dst, Register src);
+
void orl(Address dst, int32_t imm32);
void orl(Register dst, int32_t imm32);
void orl(Register dst, Address src);
void orl(Register dst, Register src);
void orl(Address dst, Register src);
*** 1640,1660 ****
void orq(Address dst, int32_t imm32);
void orq(Register dst, int32_t imm32);
void orq(Register dst, Address src);
void orq(Register dst, Register src);
// Pack with unsigned saturation
void packuswb(XMMRegister dst, XMMRegister src);
void packuswb(XMMRegister dst, Address src);
void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
! // Pemutation of 64bit words
void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void vpermq(XMMRegister dst, XMMRegister src, int imm8);
void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void pause();
// Undefined Instruction
--- 1719,1754 ----
void orq(Address dst, int32_t imm32);
void orq(Register dst, int32_t imm32);
void orq(Register dst, Address src);
void orq(Register dst, Register src);
+ // Pack with signed saturation
+ void packsswb(XMMRegister dst, XMMRegister src);
+ void vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void packssdw(XMMRegister dst, XMMRegister src);
+ void vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
// Pack with unsigned saturation
void packuswb(XMMRegister dst, XMMRegister src);
void packuswb(XMMRegister dst, Address src);
+ void packusdw(XMMRegister dst, XMMRegister src);
void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
! // Permutations
void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void vpermq(XMMRegister dst, XMMRegister src, int imm8);
void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+ void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
+ void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
+ void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
+ void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void pause();
// Undefined Instruction
*** 1663,1677 ****
--- 1757,1774 ----
// SSE4.2 string instructions
void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
void pcmpestri(XMMRegister xmm1, Address src, int imm8);
void pcmpeqb(XMMRegister dst, XMMRegister src);
+ void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
+
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
+ void vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
void evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate of, int vector_len);
*** 1680,1741 ****
void pcmpeqw(XMMRegister dst, XMMRegister src);
void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void pcmpeqd(XMMRegister dst, XMMRegister src);
void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
! void evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
! void evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void pcmpeqq(XMMRegister dst, XMMRegister src);
void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void pmovmskb(Register dst, XMMRegister src);
void vpmovmskb(Register dst, XMMRegister src);
// SSE 4.1 extract
void pextrd(Register dst, XMMRegister src, int imm8);
void pextrq(Register dst, XMMRegister src, int imm8);
void pextrd(Address dst, XMMRegister src, int imm8);
void pextrq(Address dst, XMMRegister src, int imm8);
void pextrb(Address dst, XMMRegister src, int imm8);
// SSE 2 extract
void pextrw(Register dst, XMMRegister src, int imm8);
void pextrw(Address dst, XMMRegister src, int imm8);
// SSE 4.1 insert
void pinsrd(XMMRegister dst, Register src, int imm8);
void pinsrq(XMMRegister dst, Register src, int imm8);
void pinsrd(XMMRegister dst, Address src, int imm8);
void pinsrq(XMMRegister dst, Address src, int imm8);
void pinsrb(XMMRegister dst, Address src, int imm8);
// SSE 2 insert
void pinsrw(XMMRegister dst, Register src, int imm8);
void pinsrw(XMMRegister dst, Address src, int imm8);
! // SSE4.1 packed move
void pmovzxbw(XMMRegister dst, XMMRegister src);
void pmovzxbw(XMMRegister dst, Address src);
!
void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
void evpmovwb(Address dst, XMMRegister src, int vector_len);
void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);
void evpmovdb(Address dst, XMMRegister src, int vector_len);
- // Sign extend moves
- void pmovsxbw(XMMRegister dst, XMMRegister src);
- void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
-
// Multiply add
void pmaddwd(XMMRegister dst, XMMRegister src);
void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Multiply add accumulate
void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
--- 1777,1866 ----
void pcmpeqw(XMMRegister dst, XMMRegister src);
void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
+ void vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
void pcmpeqd(XMMRegister dst, XMMRegister src);
void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
! void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
! void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
void pcmpeqq(XMMRegister dst, XMMRegister src);
+ void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
+ void pcmpgtq(XMMRegister dst, XMMRegister src);
+ void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
void pmovmskb(Register dst, XMMRegister src);
void vpmovmskb(Register dst, XMMRegister src);
// SSE 4.1 extract
void pextrd(Register dst, XMMRegister src, int imm8);
void pextrq(Register dst, XMMRegister src, int imm8);
void pextrd(Address dst, XMMRegister src, int imm8);
void pextrq(Address dst, XMMRegister src, int imm8);
+ void pextrb(Register dst, XMMRegister src, int imm8);
void pextrb(Address dst, XMMRegister src, int imm8);
// SSE 2 extract
void pextrw(Register dst, XMMRegister src, int imm8);
void pextrw(Address dst, XMMRegister src, int imm8);
// SSE 4.1 insert
void pinsrd(XMMRegister dst, Register src, int imm8);
void pinsrq(XMMRegister dst, Register src, int imm8);
+ void pinsrb(XMMRegister dst, Register src, int imm8);
void pinsrd(XMMRegister dst, Address src, int imm8);
void pinsrq(XMMRegister dst, Address src, int imm8);
void pinsrb(XMMRegister dst, Address src, int imm8);
+ void insertps(XMMRegister dst, XMMRegister src, int imm8);
// SSE 2 insert
void pinsrw(XMMRegister dst, Register src, int imm8);
void pinsrw(XMMRegister dst, Address src, int imm8);
! // AVX insert
! void vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8);
! void vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8);
! void vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8);
! void vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8);
! void vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
!
! // Zero extend moves
void pmovzxbw(XMMRegister dst, XMMRegister src);
void pmovzxbw(XMMRegister dst, Address src);
! void pmovzxbd(XMMRegister dst, XMMRegister src);
void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
+ void pmovzxdq(XMMRegister dst, XMMRegister src);
void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len);
void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
+ // Sign extend moves
+ void pmovsxbd(XMMRegister dst, XMMRegister src);
+ void pmovsxbq(XMMRegister dst, XMMRegister src);
+ void pmovsxbw(XMMRegister dst, XMMRegister src);
+ void pmovsxwd(XMMRegister dst, XMMRegister src);
+ void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len);
+
void evpmovwb(Address dst, XMMRegister src, int vector_len);
void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);
void evpmovdb(Address dst, XMMRegister src, int vector_len);
// Multiply add
void pmaddwd(XMMRegister dst, XMMRegister src);
void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Multiply add accumulate
void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
*** 1775,1788 ****
// Shuffle Packed Doublewords
void pshufd(XMMRegister dst, XMMRegister src, int mode);
void pshufd(XMMRegister dst, Address src, int mode);
void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len);
! // Shuffle Packed Low Words
void pshuflw(XMMRegister dst, XMMRegister src, int mode);
void pshuflw(XMMRegister dst, Address src, int mode);
// Shuffle packed values at 128 bit granularity
void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
// Shift Right by bytes Logical DoubleQuadword Immediate
void psrldq(XMMRegister dst, int shift);
--- 1900,1920 ----
// Shuffle Packed Doublewords
void pshufd(XMMRegister dst, XMMRegister src, int mode);
void pshufd(XMMRegister dst, Address src, int mode);
void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len);
! // Shuffle Packed High/Low Words
! void pshufhw(XMMRegister dst, XMMRegister src, int mode);
void pshuflw(XMMRegister dst, XMMRegister src, int mode);
void pshuflw(XMMRegister dst, Address src, int mode);
+ //shuffle floats and doubles
+ void pshufps(XMMRegister, XMMRegister, int);
+ void pshufpd(XMMRegister, XMMRegister, int);
+ void vpshufps(XMMRegister, XMMRegister, XMMRegister, int, int);
+ void vpshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);
+
// Shuffle packed values at 128 bit granularity
void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
// Shift Right by bytes Logical DoubleQuadword Immediate
void psrldq(XMMRegister dst, int shift);
*** 1794,1803 ****
--- 1926,1938 ----
void ptest(XMMRegister dst, Address src);
// Logical Compare 256bit
void vptest(XMMRegister dst, XMMRegister src);
void vptest(XMMRegister dst, Address src);
+ // Vector compare
+ void vptest(XMMRegister dst, XMMRegister src, int vector_len);
+
// Interleave Low Bytes
void punpcklbw(XMMRegister dst, XMMRegister src);
void punpcklbw(XMMRegister dst, Address src);
// Interleave Low Doublewords
*** 1856,1865 ****
--- 1991,2001 ----
void palignr(XMMRegister dst, XMMRegister src, int imm8);
void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void pblendw(XMMRegister dst, XMMRegister src, int imm8);
+ void vblendps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
void sha1nexte(XMMRegister dst, XMMRegister src);
void sha1msg1(XMMRegister dst, XMMRegister src);
void sha1msg2(XMMRegister dst, XMMRegister src);
*** 1974,1983 ****
--- 2110,2120 ----
void xorl(Register dst, int32_t imm32);
void xorl(Register dst, Address src);
void xorl(Register dst, Register src);
void xorb(Register dst, Address src);
+ void xorw(Register dst, Register src);
void xorq(Register dst, Address src);
void xorq(Register dst, Register src);
void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
*** 2010,2019 ****
--- 2147,2158 ----
void shlxl(Register dst, Register src1, Register src2);
void shlxq(Register dst, Register src1, Register src2);
//====================VECTOR ARITHMETIC=====================================
+ void evpmovd2m(KRegister kdst, XMMRegister src, int vector_len);
+ void evpmovq2m(KRegister kdst, XMMRegister src, int vector_len);
// Add Packed Floating-Point Values
void addpd(XMMRegister dst, XMMRegister src);
void addpd(XMMRegister dst, Address src);
void addps(XMMRegister dst, XMMRegister src);
*** 2119,2135 ****
--- 2258,2302 ----
void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
// Multiply packed integers (only shorts and ints)
void pmullw(XMMRegister dst, XMMRegister src);
void pmulld(XMMRegister dst, XMMRegister src);
+ void pmuludq(XMMRegister dst, XMMRegister src);
void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+ // Minimum of packed integers
+ void pminsb(XMMRegister dst, XMMRegister src);
+ void vpminsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void pminsw(XMMRegister dst, XMMRegister src);
+ void vpminsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void pminsd(XMMRegister dst, XMMRegister src);
+ void vpminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void vpminsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void minps(XMMRegister dst, XMMRegister src);
+ void vminps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void minpd(XMMRegister dst, XMMRegister src);
+ void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+
+ // Maximum of packed integers
+ void pmaxsb(XMMRegister dst, XMMRegister src);
+ void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void pmaxsw(XMMRegister dst, XMMRegister src);
+ void vpmaxsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void pmaxsd(XMMRegister dst, XMMRegister src);
+ void vpmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void vpmaxsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void maxps(XMMRegister dst, XMMRegister src);
+ void vmaxps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void maxpd(XMMRegister dst, XMMRegister src);
+ void vmaxpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+
// Shift left packed integers
void psllw(XMMRegister dst, int shift);
void pslld(XMMRegister dst, int shift);
void psllq(XMMRegister dst, int shift);
void psllw(XMMRegister dst, XMMRegister shift);
*** 2167,2186 ****
--- 2334,2367 ----
void psrad(XMMRegister dst, XMMRegister shift);
void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ // Variable shift left packed integers
+ void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+
+ // Variable shift right packed integers
+ void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+
+ // Variable shift right arithmetic packed integers
+ void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+
void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
// And packed integers
void pand(XMMRegister dst, XMMRegister src);
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+ void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Andn packed integers
void pandn(XMMRegister dst, XMMRegister src);
void vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
*** 2189,2206 ****
void por(XMMRegister dst, XMMRegister src);
void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Xor packed integers
void pxor(XMMRegister dst, XMMRegister src);
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
-
// vinserti forms
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
--- 2370,2391 ----
void por(XMMRegister dst, XMMRegister src);
void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
+
// Xor packed integers
void pxor(XMMRegister dst, XMMRegister src);
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+ void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
// vinserti forms
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
*** 2255,2265 ****
void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
! void evpgatherdd(XMMRegister dst, KRegister k1, Address src, int vector_len);
// Carry-Less Multiplication Quadword
void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
--- 2440,2464 ----
void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
! // Gather AVX2 and AVX3
! void vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
! void vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
! void vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
! void vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
! void evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len);
! void evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len);
! void evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len);
! void evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len);
!
! //Scatter AVX3 only
! void evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len);
! void evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len);
! void evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len);
! void evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len);
// Carry-Less Multiplication Quadword
void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
*** 2268,2284 ****
// penalty if legacy SSE instructions are encoded using VEX prefix because
// they always clear upper 128 bits. It should be used before calling
// runtime code and native libraries.
void vzeroupper();
! // AVX support for vectorized conditional move (float/double). The following two instructions used only coupled.
! void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
! void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
! void cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
! void blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
!
protected:
// Next instructions require address alignment 16 bytes SSE mode.
// They should be called only from corresponding MacroAssembler instructions.
void andpd(XMMRegister dst, Address src);
void andps(XMMRegister dst, Address src);
--- 2467,2525 ----
// penalty if legacy SSE instructions are encoded using VEX prefix because
// they always clear upper 128 bits. It should be used before calling
// runtime code and native libraries.
void vzeroupper();
! // Vector double compares
! void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
! void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
! ComparisonPredicateFP comparison, int vector_len);
!
! // Vector float compares
! void vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len);
! void evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
! ComparisonPredicateFP comparison, int vector_len);
!
! // Vector integer compares
! void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
! void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
! int comparison, int vector_len);
! void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
! int comparison, int vector_len);
!
! // Vector long compares
! void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
! int comparison, int vector_len);
! void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
! int comparison, int vector_len);
!
! // Vector byte compares
! void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
! int comparison, int vector_len);
! void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
! int comparison, int vector_len);
!
! // Vector short compares
! void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
! int comparison, int vector_len);
! void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
! int comparison, int vector_len);
!
! // Vector blends
! void blendvps(XMMRegister dst, XMMRegister src);
! void blendvpd(XMMRegister dst, XMMRegister src);
! void pblendvb(XMMRegister dst, XMMRegister src);
! void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
! void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
! void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
! void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
! void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
! void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
! void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
! void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
! void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
protected:
// Next instructions require address alignment 16 bytes SSE mode.
// They should be called only from corresponding MacroAssembler instructions.
void andpd(XMMRegister dst, Address src);
void andps(XMMRegister dst, Address src);
*** 2370,2380 ****
void set_is_evex_instruction(void) { _is_evex_instruction = true; }
// Internal encoding data used in compressed immediate offset programming
void set_evex_encoding(int value) { _evex_encoding = value; }
! // Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components
void reset_is_clear_context(void) { _is_clear_context = false; }
// Map back to current asembler so that we can manage object level assocation
void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
--- 2611,2622 ----
void set_is_evex_instruction(void) { _is_evex_instruction = true; }
// Internal encoding data used in compressed immediate offset programming
void set_evex_encoding(int value) { _evex_encoding = value; }
! // When the Evex.Z field is set (true), it is used to clear all non directed XMM/YMM/ZMM components.
! // This method unsets it so that merge semantics are used instead.
void reset_is_clear_context(void) { _is_clear_context = false; }
// Map back to current asembler so that we can manage object level assocation
void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
< prev index next >