--- old/src/cpu/x86/vm/assembler_x86.cpp 2015-06-02 20:15:00.734684000 -0700 +++ new/src/cpu/x86/vm/assembler_x86.cpp 2015-06-02 20:15:00.413266700 -0700 @@ -1347,7 +1347,9 @@ void Assembler::andnl(Register dst, Register src1, Register src2) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode(dst, src1, src2, false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, false, AVX_128bit, + true, false); emit_int8((unsigned char)0xF2); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1355,7 +1357,9 @@ void Assembler::andnl(Register dst, Register src1, Address src2) { InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38(dst, src1, src2, false); + vex_prefix(src2, src1->encoding(), dst->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, false, + AVX_128bit, true, false); emit_int8((unsigned char)0xF2); emit_operand(dst, src2); } @@ -1382,7 +1386,9 @@ void Assembler::blsil(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode(rbx, dst, src, false); + int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, false, AVX_128bit, + true, false); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1390,14 +1396,18 @@ void Assembler::blsil(Register dst, Address src) { InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38(rbx, dst, src, false); + vex_prefix(src, dst->encoding(), rbx->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, false, + AVX_128bit, true, false); emit_int8((unsigned char)0xF3); emit_operand(rbx, src); } void Assembler::blsmskl(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode(rdx, dst, src, false); + int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, false, AVX_128bit, + true, false); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1412,7 +1422,9 @@ void Assembler::blsrl(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode(rcx, dst, src, false); + int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, false, AVX_128bit, + true, false); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1420,7 +1432,9 @@ void Assembler::blsrl(Register dst, Address src) { InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38(rcx, dst, src, false); + vex_prefix(src, dst->encoding(), rcx->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, false, + AVX_128bit, true, false); emit_int8((unsigned char)0xF3); emit_operand(rcx, src); } @@ -3099,15 +3113,16 @@ assert(VM_Version::supports_sse4_1(), ""); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, false, + VEX_OPCODE_0F_38, false, AVX_128bit, true); emit_int8(0x17); emit_operand(dst, src); } void Assembler::ptest(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, - false, VEX_OPCODE_0F_38); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false, + VEX_OPCODE_0F_38, false, AVX_128bit, true); emit_int8(0x17); emit_int8((unsigned char)(0xC0 | encode)); } @@ -3119,7 +3134,7 @@ assert(dst != xnoreg, "sanity"); int dst_enc = dst->encoding(); // swap src<->dst for encoding - vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len); + vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len, true, false); emit_int8(0x17); emit_operand(dst, src); } @@ -3128,7 +3143,7 @@ assert(VM_Version::supports_avx(), ""); int vector_len = AVX_256bit; int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, - vector_len, VEX_OPCODE_0F_38); + vector_len, VEX_OPCODE_0F_38, true, false); emit_int8(0x17); emit_int8((unsigned char)(0xC0 | encode)); } @@ -4972,7 +4987,51 @@ emit_int8((unsigned char)(0xC0 | encode)); } -// duplicate 4-bytes integer data from src into 8 locations in dest +// duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL +void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, + vector_len, VEX_OPCODE_0F_38, false); + emit_int8(0x78); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_8bit; + InstructionMark im(this); + assert(dst != xnoreg, "sanity"); + int dst_enc = dst->encoding(); + // swap src<->dst for encoding + vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len); + emit_int8(0x78); + emit_operand(dst, src); +} + +// duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL +void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, + vector_len, VEX_OPCODE_0F_38, false); + emit_int8(0x79); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_16bit; + InstructionMark im(this); + assert(dst != xnoreg, "sanity"); + int dst_enc = dst->encoding(); + // swap src<->dst for encoding + vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len); + emit_int8(0x79); + emit_operand(dst, src); +} + +// duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) { assert(VM_Version::supports_evex(), ""); int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, @@ -4981,6 +5040,121 @@ emit_int8((unsigned char)(0xC0 | encode)); } +void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + InstructionMark im(this); + assert(dst != xnoreg, "sanity"); + int dst_enc = dst->encoding(); + // swap src<->dst for encoding + vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len); + emit_int8(0x58); + emit_operand(dst, src); +} + +// duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL +void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, + VEX_OPCODE_0F_38, true, vector_len, false, false); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + InstructionMark im(this); + assert(dst != xnoreg, "sanity"); + int dst_enc = dst->encoding(); + // swap src<->dst for encoding + vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, true, vector_len); + emit_int8(0x59); + emit_operand(dst, src); +} + +// duplicate single precision fp from src into 4|8|16 locations in dest : requires AVX512VL +void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, + VEX_OPCODE_0F_38, false, vector_len, false, false); + emit_int8(0x18); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + InstructionMark im(this); + assert(dst != xnoreg, "sanity"); + int dst_enc = dst->encoding(); + // swap src<->dst for encoding + vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len); + emit_int8(0x18); + emit_operand(dst, src); +} + +// duplicate double precision fp from src into 2|4|8 locations in dest : requires AVX512VL +void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, + VEX_OPCODE_0F_38, true, vector_len, false, false); + emit_int8(0x19); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + InstructionMark im(this); + assert(dst != xnoreg, "sanity"); + int dst_enc = dst->encoding(); + // swap src<->dst for encoding + vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, true, vector_len); + emit_int8(0x19); + emit_operand(dst, src); +} + +// duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL +void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, + VEX_OPCODE_0F_38, false, vector_len, false, false); + emit_int8(0x7A); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL +void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, + VEX_OPCODE_0F_38, false, vector_len, false, false); + emit_int8(0x7B); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL +void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, + VEX_OPCODE_0F_38, false, vector_len, false, false); + emit_int8(0x7C); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL +void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, + VEX_OPCODE_0F_38, true, vector_len, false, false); + emit_int8(0x7C); + emit_int8((unsigned char)(0xC0 | encode)); +} + // Carry-Less Multiplication Quadword void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) { assert(VM_Version::supports_clmul(), ""); @@ -5591,7 +5765,7 @@ void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) { - bool vex_r = (xreg_enc >= 8); + bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0; bool vex_b = adr.base_needs_rex(); bool vex_x = adr.index_needs_rex(); avx_vector_len = vector_len; @@ -5619,8 +5793,8 @@ int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) { - bool vex_r = (dst_enc >= 8); - bool vex_b = (src_enc >= 8); + bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0; + bool vex_b = ((src_enc & 8) == 8) ? 1 : 0; bool vex_x = false; avx_vector_len = vector_len; @@ -6265,19 +6439,19 @@ void Assembler::andnq(Register dst, Register src1, Register src2) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_q(dst, src1, src2); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, true, AVX_128bit, + true, false); emit_int8((unsigned char)0xF2); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::andnq(Register dst, Register src1, Address src2) { - if (VM_Version::supports_evex()) { - tuple_type = EVEX_T1S; - input_size_in_bits = EVEX_64bit; - } InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_q(dst, src1, src2); + vex_prefix(src2, src1->encoding(), dst->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, true, + AVX_128bit, true, false); emit_int8((unsigned char)0xF2); emit_operand(dst, src2); } @@ -6304,7 +6478,9 @@ void Assembler::blsiq(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_q(rbx, dst, src); + int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, true, AVX_128bit, + true, false); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } @@ -6312,14 +6488,18 @@ void Assembler::blsiq(Register dst, Address src) { InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_q(rbx, dst, src); + vex_prefix(src, dst->encoding(), rbx->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, true, + AVX_128bit, true, false); emit_int8((unsigned char)0xF3); emit_operand(rbx, src); } void Assembler::blsmskq(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_q(rdx, dst, src); + int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, true, AVX_128bit, + true, false); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } @@ -6327,14 +6507,18 @@ void Assembler::blsmskq(Register dst, Address src) { InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_q(rdx, dst, src); + vex_prefix(src, dst->encoding(), rdx->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, true, + AVX_128bit, true, false); emit_int8((unsigned char)0xF3); emit_operand(rdx, src); } void Assembler::blsrq(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_q(rcx, dst, src); + int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, true, AVX_128bit, + true, false); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } @@ -6342,7 +6526,9 @@ void Assembler::blsrq(Register dst, Address src) { InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_q(rcx, dst, src); + vex_prefix(src, dst->encoding(), rcx->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, true, + AVX_128bit, true, false); emit_int8((unsigned char)0xF3); emit_operand(rcx, src); }