--- old/src/cpu/x86/vm/assembler_x86.cpp 2015-04-23 08:25:11.149343300 -0700 +++ new/src/cpu/x86/vm/assembler_x86.cpp 2015-04-23 08:25:10.933343300 -0700 @@ -54,6 +54,36 @@ #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") // Implementation of AddressLiteral +// A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms. +unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = { + // -----------------Table 4.5 -------------------- // + 16, 32, 64, // EVEX_FV(0) + 4, 4, 4, // EVEX_FV(1) - with Evex.b + 16, 32, 64, // EVEX_FV(2) - with Evex.w + 8, 8, 8, // EVEX_FV(3) - with Evex.w and Evex.b + 8, 16, 32, // EVEX_HV(0) + 4, 4, 4, // EVEX_HV(1) - with Evex.b + // -----------------Table 4.6 -------------------- // + 16, 32, 64, // EVEX_FVM(0) + 1, 1, 1, // EVEX_T1S(0) + 2, 2, 2, // EVEX_T1S(1) + 4, 4, 4, // EVEX_T1S(2) + 8, 8, 8, // EVEX_T1S(3) + 4, 4, 4, // EVEX_T1F(0) + 8, 8, 8, // EVEX_T1F(1) + 8, 8, 8, // EVEX_T2(0) + 0, 16, 16, // EVEX_T2(1) + 0, 16, 16, // EVEX_T4(0) + 0, 0, 32, // EVEX_T4(1) + 0, 0, 32, // EVEX_T8(0) + 8, 16, 32, // EVEX_HVM(0) + 4, 8, 16, // EVEX_QVM(0) + 2, 4, 8, // EVEX_OVM(0) + 16, 16, 16, // EVEX_M128(0) + 8, 32, 64, // EVEX_DUP(0) + 0, 0, 0 // EVEX_NTUP +}; + AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { _is_lval = false; _target = target; @@ -183,8 +213,9 @@ // make this go away someday void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) { if (rtype == relocInfo::none) - emit_int32(data); - else emit_data(data, Relocation::spec_simple(rtype), format); + emit_int32(data); + else + emit_data(data, Relocation::spec_simple(rtype), format); } void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) { @@ -273,6 +304,177 @@ } +bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len, + int cur_tuple_type, int in_size_in_bits, int cur_encoding) { + int mod_idx = 0; + // We will test if the displacement fits the compressed format and if so + // apply the compression to the displacment iff the result is8bit. + if (VM_Version::supports_evex() && is_evex_inst) { + switch (cur_tuple_type) { + case EVEX_FV: + if ((cur_encoding & VEX_W) == VEX_W) { + mod_idx += 2 + ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; + } else { + mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; + } + break; + + case EVEX_HV: + mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; + break; + + case EVEX_FVM: + break; + + case EVEX_T1S: + switch (in_size_in_bits) { + case EVEX_8bit: + break; + + case EVEX_16bit: + mod_idx = 1; + break; + + case EVEX_32bit: + mod_idx = 2; + break; + + case EVEX_64bit: + mod_idx = 3; + break; + } + break; + + case EVEX_T1F: + case EVEX_T2: + case EVEX_T4: + mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0; + break; + + case EVEX_T8: + break; + + case EVEX_HVM: + break; + + case EVEX_QVM: + break; + + case EVEX_OVM: + break; + + case EVEX_M128: + break; + + case EVEX_DUP: + break; + + default: + assert(0, "no valid evex tuple_table entry"); + break; + } + + if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) { + int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len]; + if ((disp % disp_factor) == 0) { + int new_disp = disp / disp_factor; + if ((-0x80 <= new_disp && new_disp < 0x80)) { + disp = new_disp; + } + } else { + return false; + } + } + } + return (-0x80 <= disp && disp < 0x80); +} + + +bool Assembler::emit_compressed_disp_byte(int &disp) { + int mod_idx = 0; + // We will test if the displacement fits the compressed format and if so + // apply the compression to the displacment iff the result is8bit. + if (VM_Version::supports_evex() && is_evex_instruction) { + switch (tuple_type) { + case EVEX_FV: + if ((evex_encoding & VEX_W) == VEX_W) { + mod_idx += 2 + ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; + } else { + mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; + } + break; + + case EVEX_HV: + mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; + break; + + case EVEX_FVM: + break; + + case EVEX_T1S: + switch (input_size_in_bits) { + case EVEX_8bit: + break; + + case EVEX_16bit: + mod_idx = 1; + break; + + case EVEX_32bit: + mod_idx = 2; + break; + + case EVEX_64bit: + mod_idx = 3; + break; + } + break; + + case EVEX_T1F: + case EVEX_T2: + case EVEX_T4: + mod_idx = (input_size_in_bits == EVEX_64bit) ? 1 : 0; + break; + + case EVEX_T8: + break; + + case EVEX_HVM: + break; + + case EVEX_QVM: + break; + + case EVEX_OVM: + break; + + case EVEX_M128: + break; + + case EVEX_DUP: + break; + + default: + assert(0, "no valid evex tuple_table entry"); + break; + } + + if (avx_vector_len >= AVX_128bit && avx_vector_len <= AVX_512bit) { + int disp_factor = tuple_table[tuple_type + mod_idx][avx_vector_len]; + if ((disp % disp_factor) == 0) { + int new_disp = disp / disp_factor; + if (is8bit(new_disp)) { + disp = new_disp; + } + } else { + return false; + } + } + } + return is8bit(disp); +} + + void Assembler::emit_operand(Register reg, Register base, Register index, Address::ScaleFactor scale, int disp, RelocationHolder const& rspec, @@ -296,7 +498,7 @@ assert(index != rsp, "illegal addressing mode"); emit_int8(0x04 | regenc); emit_int8(scale << 6 | indexenc | baseenc); - } else if (is8bit(disp) && rtype == relocInfo::none) { + } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) { // [base + index*scale + imm8] // [01 reg 100][ss index base] imm8 assert(index != rsp, "illegal addressing mode"); @@ -318,7 +520,7 @@ // [00 reg 100][00 100 100] emit_int8(0x04 | regenc); emit_int8(0x24); - } else if (is8bit(disp) && rtype == relocInfo::none) { + } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) { // [rsp + imm8] // [01 reg 100][00 100 100] disp8 emit_int8(0x44 | regenc); @@ -339,7 +541,7 @@ // [base] // [00 reg base] emit_int8(0x00 | regenc | baseenc); - } else if (is8bit(disp) && rtype == relocInfo::none) { + } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) { // [base + disp8] // [01 reg base] disp8 emit_int8(0x40 | regenc | baseenc); @@ -389,11 +591,20 @@ emit_data(disp, rspec, disp32_operand); } } + is_evex_instruction = false; } void Assembler::emit_operand(XMMRegister reg, Register base, Register index, Address::ScaleFactor scale, int disp, RelocationHolder const& rspec) { + if (UseAVX > 2) { + int xreg_enc = reg->encoding(); + if (xreg_enc > 15) { + XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf); + emit_operand((Register)new_reg, base, index, scale, disp, rspec); + return; + } + } emit_operand((Register)reg, base, index, scale, disp, rspec); } @@ -686,6 +897,29 @@ debug_only(has_disp32 = true); // has both kinds of operands! break; + case 0x62: // EVEX_4bytes + assert((UseAVX > 0), "shouldn't have EVEX prefix"); + assert(ip == inst+1, "no prefixes allowed"); + // no EVEX collisions, all instructions that have 0x62 opcodes + // have EVEX versions and are subopcodes of 0x66 + ip++; // skip P0 and exmaine W in P1 + is_64bit = ((VEX_W & *ip) == VEX_W); + ip++; // move to P2 + ip++; // skip P2, move to opcode + // To find the end of instruction (which == end_pc_operand). + switch (0xFF & *ip) { + case 0x61: // pcmpestri r, r/a, #8 + case 0x70: // pshufd r, r/a, #8 + case 0x73: // psrldq r, #8 + tail_size = 1; // the imm8 + break; + default: + break; + } + ip++; // skip opcode + debug_only(has_disp32 = true); // has both kinds of operands! + break; + case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a @@ -985,12 +1219,22 @@ void Assembler::addsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2); + } else { + emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); + } } void Assembler::addsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2); + } else { + emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); + } } void Assembler::addss(XMMRegister dst, XMMRegister src) { @@ -1000,20 +1244,26 @@ void Assembler::addss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); } void Assembler::aesdec(XMMRegister dst, Address src) { assert(VM_Version::supports_aes(), ""); InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + simd_prefix(dst, dst, src, VEX_SIMD_66, false, + VEX_OPCODE_0F_38, false, AVX_128bit, true); emit_int8((unsigned char)0xDE); emit_operand(dst, src); } void Assembler::aesdec(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_aes(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, + VEX_OPCODE_0F_38, false, AVX_128bit, true); emit_int8((unsigned char)0xDE); emit_int8(0xC0 | encode); } @@ -1021,14 +1271,16 @@ void Assembler::aesdeclast(XMMRegister dst, Address src) { assert(VM_Version::supports_aes(), ""); InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + simd_prefix(dst, dst, src, VEX_SIMD_66, false, + VEX_OPCODE_0F_38, false, AVX_128bit, true); emit_int8((unsigned char)0xDF); emit_operand(dst, src); } void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_aes(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, + VEX_OPCODE_0F_38, false, AVX_128bit, true); emit_int8((unsigned char)0xDF); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1036,14 +1288,16 @@ void Assembler::aesenc(XMMRegister dst, Address src) { assert(VM_Version::supports_aes(), ""); InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + simd_prefix(dst, dst, src, VEX_SIMD_66, false, + VEX_OPCODE_0F_38, false, AVX_128bit, true); emit_int8((unsigned char)0xDC); emit_operand(dst, src); } void Assembler::aesenc(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_aes(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, + VEX_OPCODE_0F_38, false, AVX_128bit, true); emit_int8((unsigned char)0xDC); emit_int8(0xC0 | encode); } @@ -1051,14 +1305,16 @@ void Assembler::aesenclast(XMMRegister dst, Address src) { assert(VM_Version::supports_aes(), ""); InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + simd_prefix(dst, dst, src, VEX_SIMD_66, false, + VEX_OPCODE_0F_38, false, AVX_128bit, true); emit_int8((unsigned char)0xDD); emit_operand(dst, src); } void Assembler::aesenclast(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_aes(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, + VEX_OPCODE_0F_38, false, AVX_128bit, true); emit_int8((unsigned char)0xDD); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1091,7 +1347,7 @@ void Assembler::andnl(Register dst, Register src1, Register src2) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode(dst, src1, src2); + int encode = vex_prefix_0F38_and_encode(dst, src1, src2, false); emit_int8((unsigned char)0xF2); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1099,7 +1355,7 @@ void Assembler::andnl(Register dst, Register src1, Address src2) { InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38(dst, src1, src2); + vex_prefix_0F38(dst, src1, src2, false); emit_int8((unsigned char)0xF2); emit_operand(dst, src2); } @@ -1126,7 +1382,7 @@ void Assembler::blsil(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode(rbx, dst, src); + int encode = vex_prefix_0F38_and_encode(rbx, dst, src, false); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1134,14 +1390,14 @@ void Assembler::blsil(Register dst, Address src) { InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38(rbx, dst, src); + vex_prefix_0F38(rbx, dst, src, false); emit_int8((unsigned char)0xF3); emit_operand(rbx, src); } void Assembler::blsmskl(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode(rdx, dst, src); + int encode = vex_prefix_0F38_and_encode(rdx, dst, src, false); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1149,14 +1405,14 @@ void Assembler::blsmskl(Register dst, Address src) { InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38(rdx, dst, src); + vex_prefix_0F38(rdx, dst, src, false); emit_int8((unsigned char)0xF3); emit_operand(rdx, src); } void Assembler::blsrl(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode(rcx, dst, src); + int encode = vex_prefix_0F38_and_encode(rcx, dst, src, false); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1164,7 +1420,7 @@ void Assembler::blsrl(Register dst, Address src) { InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38(rcx, dst, src); + vex_prefix_0F38(rcx, dst, src, false); emit_int8((unsigned char)0xF3); emit_operand(rcx, src); } @@ -1312,22 +1568,36 @@ // NOTE: dbx seems to decode this as comiss even though the // 0x66 is there. Strangly ucomisd comes out correct NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, true); + } else { + emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); + } } void Assembler::comisd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex()) { + emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, true); + } else { + emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); + } } void Assembler::comiss(XMMRegister dst, Address src) { + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE); + emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, true); } void Assembler::comiss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE); + emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, true); } void Assembler::cpuid() { @@ -1347,36 +1617,61 @@ void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2); + } else { + emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); + } } void Assembler::cvtsd2ss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1F; + input_size_in_bits = EVEX_64bit; + emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2); + } else { + emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); + } } void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); + int encode = 0; + if (VM_Version::supports_evex()) { + encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, true); + } else { + encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, false); + } emit_int8(0x2A); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + emit_simd_arith_q(0x2A, dst, src, VEX_SIMD_F2, true); + } else { + emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2); + } } void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, true); emit_int8(0x2A); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3); + emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, true); } void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { @@ -1385,6 +1680,10 @@ } void Assembler::cvtss2sd(XMMRegister dst, Address src) { + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } NOT_LP64(assert(VM_Version::supports_sse2(), "")); emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); } @@ -1392,14 +1691,14 @@ void Assembler::cvttsd2sil(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, true); emit_int8(0x2C); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvttss2sil(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, true); emit_int8(0x2C); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1414,15 +1713,29 @@ void Assembler::divsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2); + } else { + emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); + } } void Assembler::divsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2); + } else { + emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); + } } void Assembler::divss(XMMRegister dst, Address src) { + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } NOT_LP64(assert(VM_Version::supports_sse(), "")); emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); } @@ -1675,7 +1988,11 @@ void Assembler::movapd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex()) { + emit_simd_arith_nonds_q(0x28, dst, src, VEX_SIMD_66, true); + } else { + emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66); + } } void Assembler::movaps(XMMRegister dst, XMMRegister src) { @@ -1685,7 +2002,8 @@ void Assembler::movlhps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE); + int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, true, VEX_OPCODE_0F, + false, AVX_128bit); emit_int8(0x16); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1698,6 +2016,51 @@ emit_operand(dst, src); } +void Assembler::kmovq(KRegister dst, KRegister src) { + NOT_LP64(assert(VM_Version::supports_evex(), "")); + int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, + true, VEX_OPCODE_0F, true); + emit_int8((unsigned char)0x90); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::kmovq(KRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_evex(), "")); + int dst_enc = dst->encoding(); + int nds_enc = 0; + vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_NONE, + VEX_OPCODE_0F, true, AVX_128bit, true, true); + emit_int8((unsigned char)0x90); + emit_operand((Register)dst, src); +} + +void Assembler::kmovq(Address dst, KRegister src) { + NOT_LP64(assert(VM_Version::supports_evex(), "")); + int src_enc = src->encoding(); + int nds_enc = 0; + vex_prefix(dst, nds_enc, src_enc, VEX_SIMD_NONE, + VEX_OPCODE_0F, true, AVX_128bit, true, true); + emit_int8((unsigned char)0x90); + emit_operand((Register)src, dst); +} + +void Assembler::kmovql(KRegister dst, Register src) { + NOT_LP64(assert(VM_Version::supports_evex(), "")); + bool supports_bw = VM_Version::supports_avx512bw(); + VexSimdPrefix pre = supports_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE; + int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, true, + VEX_OPCODE_0F, supports_bw); + emit_int8((unsigned char)0x92); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::kmovdl(KRegister dst, Register src) { + NOT_LP64(assert(VM_Version::supports_evex(), "")); + VexSimdPrefix pre = VM_Version::supports_avx512bw() ? VEX_SIMD_F2 : VEX_SIMD_NONE; + int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, true, VEX_OPCODE_0F, false); + emit_int8((unsigned char)0x92); + emit_int8((unsigned char)(0xC0 | encode)); +} void Assembler::movb(Address dst, int imm8) { InstructionMark im(this); @@ -1718,7 +2081,7 @@ void Assembler::movdl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, true); emit_int8(0x6E); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1726,23 +2089,31 @@ void Assembler::movdl(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // swap src/dst to get correct prefix - int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66); + int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66, true); emit_int8(0x7E); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movdl(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66); + simd_prefix(dst, src, VEX_SIMD_66, true, VEX_OPCODE_0F); emit_int8(0x6E); emit_operand(dst, src); } void Assembler::movdl(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66); + simd_prefix(dst, src, VEX_SIMD_66, true); emit_int8(0x7E); emit_operand(src, dst); } @@ -1754,11 +2125,17 @@ void Assembler::movdqa(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FVM; + } emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66); } void Assembler::movdqu(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FVM; + } emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); } @@ -1769,8 +2146,11 @@ void Assembler::movdqu(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FVM; + } InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_F3); + simd_prefix(dst, src, VEX_SIMD_F3, false); emit_int8(0x7F); emit_operand(src, dst); } @@ -1778,28 +2158,77 @@ // Move Unaligned 256bit Vector void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) { assert(UseAVX > 0, ""); - bool vector256 = true; - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FVM; + } + int vector_len = AVX_256bit; + int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector_len); emit_int8(0x6F); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vmovdqu(XMMRegister dst, Address src) { assert(UseAVX > 0, ""); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FVM; + } InstructionMark im(this); - bool vector256 = true; - vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256); + int vector_len = AVX_256bit; + vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len, false); emit_int8(0x6F); emit_operand(dst, src); } void Assembler::vmovdqu(Address dst, XMMRegister src) { assert(UseAVX > 0, ""); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FVM; + } InstructionMark im(this); - bool vector256 = true; + int vector_len = AVX_256bit; // swap src<->dst for encoding assert(src != xnoreg, "sanity"); - vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256); + vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len, false); + emit_int8(0x7F); + emit_operand(src, dst); +} + +// Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64) +void Assembler::evmovdqu(XMMRegister dst, XMMRegister src, int vector_len) { + assert(UseAVX > 0, ""); + int src_enc = src->encoding(); + int dst_enc = dst->encoding(); + int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F, + true, vector_len, false, false); + emit_int8(0x6F); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::evmovdqu(XMMRegister dst, Address src, int vector_len) { + assert(UseAVX > 0, ""); + InstructionMark im(this); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FVM; + vex_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, vector_len, false); + } else { + vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len, false); + } + emit_int8(0x6F); + emit_operand(dst, src); +} + +void Assembler::evmovdqu(Address dst, XMMRegister src, int vector_len) { + assert(UseAVX > 0, ""); + InstructionMark im(this); + assert(src != xnoreg, "sanity"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FVM; + // swap src<->dst for encoding + vex_prefix_q(src, xnoreg, dst, VEX_SIMD_F3, vector_len, false); + } else { + // swap src<->dst for encoding + vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len, false); + } emit_int8(0x7F); emit_operand(src, dst); } @@ -1845,7 +2274,11 @@ // The selection is done in MacroAssembler::movdbl() and movflt(). void Assembler::movlpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x12, dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } + emit_simd_arith(0x12, dst, src, VEX_SIMD_66, true); } void Assembler::movq( MMXRegister dst, Address src ) { @@ -1871,7 +2304,13 @@ void Assembler::movq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_F3); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + simd_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, true); + } else { + simd_prefix(dst, src, VEX_SIMD_F3, true, VEX_OPCODE_0F); + } emit_int8(0x7E); emit_operand(dst, src); } @@ -1879,7 +2318,14 @@ void Assembler::movq(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + simd_prefix(src, xnoreg, dst, VEX_SIMD_66, true, + VEX_OPCODE_0F, true, AVX_128bit); + } else { + simd_prefix(dst, src, VEX_SIMD_66, true); + } emit_int8((unsigned char)0xD6); emit_operand(src, dst); } @@ -1902,36 +2348,60 @@ void Assembler::movsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x10, dst, src, VEX_SIMD_F2); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0x10, dst, src, VEX_SIMD_F2, true); + } else { + emit_simd_arith(0x10, dst, src, VEX_SIMD_F2); + } } void Assembler::movsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + emit_simd_arith_nonds_q(0x10, dst, src, VEX_SIMD_F2, true); + } else { + emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2); + } } void Assembler::movsd(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_F2); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + simd_prefix_q(src, xnoreg, dst, VEX_SIMD_F2); + } else { + simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, false); + } emit_int8(0x11); emit_operand(src, dst); } void Assembler::movss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x10, dst, src, VEX_SIMD_F3); + emit_simd_arith(0x10, dst, src, VEX_SIMD_F3, true); } void Assembler::movss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } + emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3, true); } void Assembler::movss(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_F3); + simd_prefix(dst, src, VEX_SIMD_F3, false); emit_int8(0x11); emit_operand(src, dst); } @@ -2023,16 +2493,30 @@ void Assembler::mulsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2); + } else { + emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); + } } void Assembler::mulsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2); + } else { + emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); + } } void Assembler::mulss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); } @@ -2332,22 +2816,30 @@ void Assembler::packuswb(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); - emit_simd_arith(0x67, dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_32bit; + } + emit_simd_arith(0x67, dst, src, VEX_SIMD_66, + false, (VM_Version::supports_avx512dq() == false)); } void Assembler::packuswb(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x67, dst, src, VEX_SIMD_66); + emit_simd_arith(0x67, dst, src, VEX_SIMD_66, + false, (VM_Version::supports_avx512dq() == false)); } -void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "some form of AVX must be enabled"); + emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector_len, + false, (VM_Version::supports_avx512dq() == false)); } -void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256) { +void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) { assert(VM_Version::supports_avx2(), ""); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector256); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false, + VEX_OPCODE_0F_3A, true, vector_len); emit_int8(0x00); emit_int8(0xC0 | encode); emit_int8(imm8); @@ -2361,7 +2853,8 @@ void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { assert(VM_Version::supports_sse4_2(), ""); InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, false, VEX_OPCODE_0F_3A, + false, AVX_128bit, true); emit_int8(0x61); emit_operand(dst, src); emit_int8(imm8); @@ -2369,7 +2862,8 @@ void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_2(), ""); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false, + VEX_OPCODE_0F_3A, false, AVX_128bit, true); emit_int8(0x61); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -2377,7 +2871,8 @@ void Assembler::pextrd(Register dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F_3A, + false, AVX_128bit, (VM_Version::supports_avx512dq() == false)); emit_int8(0x16); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -2385,7 +2880,8 @@ void Assembler::pextrq(Register dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F_3A, + false, AVX_128bit, (VM_Version::supports_avx512dq() == false)); emit_int8(0x16); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -2393,7 +2889,8 @@ void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, false); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, true, VEX_OPCODE_0F_3A, + false, AVX_128bit, (VM_Version::supports_avx512dq() == false)); emit_int8(0x22); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -2401,7 +2898,8 @@ void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, true); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, true, VEX_OPCODE_0F_3A, + false, AVX_128bit, (VM_Version::supports_avx512dq() == false)); emit_int8(0x22); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -2409,15 +2907,18 @@ void Assembler::pmovzxbw(XMMRegister dst, Address src) { assert(VM_Version::supports_sse4_1(), ""); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_HVM; + } InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + simd_prefix(dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38); emit_int8(0x30); emit_operand(dst, src); } void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38); emit_int8(0x30); emit_int8((unsigned char)(0xC0 | encode)); } @@ -2520,15 +3021,20 @@ void Assembler::pshufb(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_ssse3(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38, + false, AVX_128bit, (VM_Version::supports_avx512bw() == false)); emit_int8(0x00); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::pshufb(XMMRegister dst, Address src) { assert(VM_Version::supports_ssse3(), ""); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FVM; + } InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + simd_prefix(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38, + false, AVX_128bit, (VM_Version::supports_avx512bw() == false)); emit_int8(0x00); emit_operand(dst, src); } @@ -2545,8 +3051,12 @@ assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_32bit; + } InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66); + simd_prefix(dst, src, VEX_SIMD_66, false); emit_int8(0x70); emit_operand(dst, src); emit_int8(mode & 0xFF); @@ -2555,7 +3065,8 @@ void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2); + emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2, false, + (VM_Version::supports_avx512bw() == false)); emit_int8(mode & 0xFF); } @@ -2563,8 +3074,12 @@ assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FVM; + } InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_F2); + simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, false, VEX_OPCODE_0F, + false, AVX_128bit, (VM_Version::supports_avx512bw() == false)); emit_int8(0x70); emit_operand(dst, src); emit_int8(mode & 0xFF); @@ -2573,7 +3088,8 @@ void Assembler::psrldq(XMMRegister dst, int shift) { // Shift 128 bit value in xmm register by number of bytes. NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66); + int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F, + false, AVX_128bit, (VM_Version::supports_avx512bw() == false)); emit_int8(0x73); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift); @@ -2583,14 +3099,15 @@ assert(VM_Version::supports_sse4_1(), ""); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + simd_prefix(dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38); emit_int8(0x17); emit_operand(dst, src); } void Assembler::ptest(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, + false, VEX_OPCODE_0F_38); emit_int8(0x17); emit_int8((unsigned char)(0xC0 | encode)); } @@ -2598,19 +3115,20 @@ void Assembler::vptest(XMMRegister dst, Address src) { assert(VM_Version::supports_avx(), ""); InstructionMark im(this); - bool vector256 = true; + int vector_len = AVX_256bit; assert(dst != xnoreg, "sanity"); int dst_enc = dst->encoding(); // swap src<->dst for encoding - vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256); + vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len); emit_int8(0x17); emit_operand(dst, src); } void Assembler::vptest(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - bool vector256 = true; - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38); + int vector_len = AVX_256bit; + int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, + vector_len, VEX_OPCODE_0F_38); emit_int8(0x17); emit_int8((unsigned char)(0xC0 | encode)); } @@ -2618,6 +3136,9 @@ void Assembler::punpcklbw(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FVM; + } emit_simd_arith(0x60, dst, src, VEX_SIMD_66); } @@ -2629,6 +3150,10 @@ void Assembler::punpckldq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_32bit; + } emit_simd_arith(0x62, dst, src, VEX_SIMD_66); } @@ -2838,12 +3363,22 @@ void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2); + } else { + emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); + } } void Assembler::sqrtsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2); + } else { + emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); + } } void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { @@ -2857,6 +3392,10 @@ void Assembler::sqrtss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); } @@ -2907,12 +3446,20 @@ void Assembler::subsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2); + } else { + emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); + } } void Assembler::subsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + } + emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2); } void Assembler::subss(XMMRegister dst, XMMRegister src) { @@ -2922,6 +3469,10 @@ void Assembler::subss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); } @@ -2978,22 +3529,36 @@ void Assembler::ucomisd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, true); + } else { + emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); + } } void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex()) { + emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, true); + } else { + emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); + } } void Assembler::ucomiss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } + emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, true); } void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE); + emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, true); } void Assembler::xabort(int8_t imm8) { @@ -3075,82 +3640,138 @@ void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit); + } else { + emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit); + } } void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); + if (VM_Version::supports_evex()) { + emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit); + } else { + emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit); + } } void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } + emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit); } void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); + emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit); } void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit); + } else { + emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit); + } } void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); + if (VM_Version::supports_evex()) { + emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit); + } else { + emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit); + } } void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } + emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit); } void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); + emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit); } void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit); + } else { + emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit); + } } void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); + if (VM_Version::supports_evex()) { + emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit); + } else { + emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit); + } } void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } + emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit); } void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); + emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit); } void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit); + } else { + emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit); + } } void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); + if (VM_Version::supports_evex()) { + emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit); + } else { + emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit); + } } void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } + emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit); } void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); + emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit); } //====================VECTOR ARITHMETIC===================================== @@ -3159,7 +3780,11 @@ void Assembler::addpd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x58, dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0x58, dst, src, VEX_SIMD_66); + } else { + emit_simd_arith(0x58, dst, src, VEX_SIMD_66); + } } void Assembler::addps(XMMRegister dst, XMMRegister src) { @@ -3167,29 +3792,47 @@ emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE); } -void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { +void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256); + if (VM_Version::supports_evex()) { + emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len); + } } -void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { +void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256); + emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len); } -void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { +void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_64bit; + emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len); + } } -void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { +void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_32bit; + } + emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len); } void Assembler::subpd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x5C, dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_66); + } else { + emit_simd_arith(0x5C, dst, src, VEX_SIMD_66); + } } void Assembler::subps(XMMRegister dst, XMMRegister src) { @@ -3197,29 +3840,47 @@ emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE); } -void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { +void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256); + if (VM_Version::supports_evex()) { + emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len); + } } -void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { +void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256); + emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len); } -void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { +void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_64bit; + emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len); + } } -void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { +void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_32bit; + } + emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len); } void Assembler::mulpd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x59, dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66); + } else { + emit_simd_arith(0x59, dst, src, VEX_SIMD_66); + } } void Assembler::mulps(XMMRegister dst, XMMRegister src) { @@ -3227,29 +3888,47 @@ emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE); } -void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { +void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256); + if (VM_Version::supports_evex()) { + emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len); + } } -void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { +void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256); + emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len); } -void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { +void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_64bit; + emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len); + } } -void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { +void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_32bit; + } + emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len); } void Assembler::divpd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x5E, dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_66); + } else { + emit_simd_arith(0x5E, dst, src, VEX_SIMD_66); + } } void Assembler::divps(XMMRegister dst, XMMRegister src) { @@ -3257,118 +3936,199 @@ emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE); } -void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { +void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256); + if (VM_Version::supports_evex()) { + emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len); + } } -void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { +void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256); + emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len); } -void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { +void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_64bit; + emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len); + } } -void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { +void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_32bit; + } + emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len); } void Assembler::andpd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x54, dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) { + emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66); + } else { + emit_simd_arith(0x54, dst, src, VEX_SIMD_66, false, true); + } } void Assembler::andps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE); + emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, false, + (VM_Version::supports_avx512dq() == false)); } void Assembler::andps(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_32bit; + } + emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, + false, (VM_Version::supports_avx512dq() == false)); } void Assembler::andpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x54, dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_64bit; + emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66); + } else { + emit_simd_arith(0x54, dst, src, VEX_SIMD_66, false, true); + } } -void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { +void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256); + if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) { + emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, true); + } } -void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { +void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256); + bool legacy_mode = (VM_Version::supports_avx512dq() == false); + emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, legacy_mode); } -void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { +void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256); + if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_64bit; + emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, true); + } } -void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { +void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_32bit; + } + emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, + (VM_Version::supports_avx512dq() == false)); } void Assembler::xorpd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x57, dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) { + emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66); + } else { + emit_simd_arith(0x57, dst, src, VEX_SIMD_66, false, true); + } } void Assembler::xorps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE); + emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, + false, (VM_Version::supports_avx512dq() == false)); } void Assembler::xorpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x57, dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_64bit; + emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66); + } else { + emit_simd_arith(0x57, dst, src, VEX_SIMD_66, false, true); + } } void Assembler::xorps(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_32bit; + } + emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, false, + (VM_Version::supports_avx512dq() == false)); } -void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { +void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256); + if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) { + emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, true); + } } -void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { +void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256); + emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, + (VM_Version::supports_avx512dq() == false)); } -void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { +void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256); + if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_64bit; + emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, true); + } } -void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { +void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_32bit; + } + emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, + (VM_Version::supports_avx512dq() == false)); } - // Integer vector arithmetic -void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38); +void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx() && (vector_len == 0) || + VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, + VEX_OPCODE_0F_38, true, false); emit_int8(0x01); emit_int8((unsigned char)(0xC0 | encode)); } -void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38); +void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx() && (vector_len == 0) || + VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, + VEX_OPCODE_0F_38, true, false); emit_int8(0x02); emit_int8((unsigned char)(0xC0 | encode)); } @@ -3390,61 +4150,89 @@ void Assembler::paddq(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xD4, dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0xD4, dst, src, VEX_SIMD_66); + } else { + emit_simd_arith(0xD4, dst, src, VEX_SIMD_66); + } } void Assembler::phaddw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse3(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, + VEX_OPCODE_0F_38, false, AVX_128bit, true); emit_int8(0x01); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::phaddd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse3(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, + VEX_OPCODE_0F_38, false, AVX_128bit, true); emit_int8(0x02); emit_int8((unsigned char)(0xC0 | encode)); } -void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, + (VM_Version::supports_avx512bw() == false)); } -void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, + (VM_Version::supports_avx512bw() == false)); } -void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len); } -void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len); + } } -void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FVM; + } + emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len); } -void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FVM; + } + emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len); } -void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_32bit; + } + emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len); } -void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_64bit; + emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len); + } } void Assembler::psubb(XMMRegister dst, XMMRegister src) { @@ -3464,84 +4252,149 @@ void Assembler::psubq(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xFB, dst, src, VEX_SIMD_66); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0xFB, dst, src, VEX_SIMD_66); + } else { + emit_simd_arith(0xFB, dst, src, VEX_SIMD_66); + } } -void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, + (VM_Version::supports_avx512bw() == false)); } -void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, + (VM_Version::supports_avx512bw() == false)); } -void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len); } -void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len); + } } -void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FVM; + } + emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, + (VM_Version::supports_avx512bw() == false)); } -void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FVM; + } + emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, + (VM_Version::supports_avx512bw() == false)); } -void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_32bit; + } + emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len); } -void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_64bit; + emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len); + } } void Assembler::pmullw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xD5, dst, src, VEX_SIMD_66); + emit_simd_arith(0xD5, dst, src, VEX_SIMD_66, + (VM_Version::supports_avx512bw() == false)); } void Assembler::pmulld(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, + false, VEX_OPCODE_0F_38); emit_int8(0x40); emit_int8((unsigned char)(0xC0 | encode)); } -void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, + (VM_Version::supports_avx512bw() == false)); +} + +void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, + vector_len, VEX_OPCODE_0F_38); + emit_int8(0x40); + emit_int8((unsigned char)(0xC0 | encode)); } -void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38); +void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 2, "requires some form of AVX"); + int src_enc = src->encoding(); + int dst_enc = dst->encoding(); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, + VEX_OPCODE_0F_38, true, vector_len, false, false); emit_int8(0x40); emit_int8((unsigned char)(0xC0 | encode)); } -void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FVM; + } + emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len); } -void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); +void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_32bit; + } InstructionMark im(this); int dst_enc = dst->encoding(); int nds_enc = nds->is_valid() ? nds->encoding() : 0; - vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256); + vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, + VEX_OPCODE_0F_38, false, vector_len); + emit_int8(0x40); + emit_operand(dst, src); +} + +void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_64bit; + } + InstructionMark im(this); + int dst_enc = dst->encoding(); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, true, vector_len); emit_int8(0x40); emit_operand(dst, src); } @@ -3550,7 +4403,8 @@ void Assembler::psllw(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // XMM6 is for /6 encoding: 66 0F 71 /6 ib - int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); + int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F, + false, AVX_128bit, (VM_Version::supports_avx512bw() == false)); emit_int8(0x71); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); @@ -3559,7 +4413,7 @@ void Assembler::pslld(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // XMM6 is for /6 encoding: 66 0F 72 /6 ib - int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); + int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false); emit_int8(0x72); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); @@ -3568,7 +4422,7 @@ void Assembler::psllq(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // XMM6 is for /6 encoding: 66 0F 73 /6 ib - int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); + int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F, true); emit_int8(0x73); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); @@ -3576,7 +4430,8 @@ void Assembler::psllw(XMMRegister dst, XMMRegister shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66); + emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66, false, + (VM_Version::supports_avx512bw() == false)); } void Assembler::pslld(XMMRegister dst, XMMRegister shift) { @@ -3586,50 +4441,65 @@ void Assembler::psllq(XMMRegister dst, XMMRegister shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0xF3, dst, shift, VEX_SIMD_66); + } else { + emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66); + } } -void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); +void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); // XMM6 is for /6 encoding: 66 0F 71 /6 ib - emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256); + emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector_len, + (VM_Version::supports_avx512bw() == false)); emit_int8(shift & 0xFF); } -void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); +void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); // XMM6 is for /6 encoding: 66 0F 72 /6 ib - emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256); + emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector_len); emit_int8(shift & 0xFF); } -void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); +void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); // XMM6 is for /6 encoding: 66 0F 73 /6 ib - emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256); + if (VM_Version::supports_evex()) { + emit_vex_arith_q(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len); + } emit_int8(shift & 0xFF); } -void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256); +void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector_len, + (VM_Version::supports_avx512bw() == false)); } -void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256); +void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector_len); } -void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256); +void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + emit_vex_arith_q(0xF3, dst, src, shift, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector_len); + } } // Shift packed integers logically right by specified number of bits. void Assembler::psrlw(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // XMM2 is for /2 encoding: 66 0F 71 /2 ib - int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); + int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F, + (VM_Version::supports_avx512bw() == false)); emit_int8(0x71); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); @@ -3638,7 +4508,7 @@ void Assembler::psrld(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // XMM2 is for /2 encoding: 66 0F 72 /2 ib - int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); + int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false); emit_int8(0x72); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); @@ -3649,7 +4519,12 @@ // shifts 128 bit value in xmm register by number of bytes. NOT_LP64(assert(VM_Version::supports_sse2(), "")); // XMM2 is for /2 encoding: 66 0F 73 /2 ib - int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); + int encode = 0; + if (VM_Version::supports_evex() && VM_Version::supports_avx512bw()) { + encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F, false); + } else { + encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F, true); + } emit_int8(0x73); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); @@ -3657,7 +4532,8 @@ void Assembler::psrlw(XMMRegister dst, XMMRegister shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66); + emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66, false, + (VM_Version::supports_avx512bw() == false)); } void Assembler::psrld(XMMRegister dst, XMMRegister shift) { @@ -3667,50 +4543,65 @@ void Assembler::psrlq(XMMRegister dst, XMMRegister shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0xD3, dst, shift, VEX_SIMD_66); + } else { + emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66); + } } -void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); +void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); // XMM2 is for /2 encoding: 66 0F 73 /2 ib - emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256); + emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector_len, + (VM_Version::supports_avx512bw() == false)); emit_int8(shift & 0xFF); } -void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); +void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); // XMM2 is for /2 encoding: 66 0F 73 /2 ib - emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256); + emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector_len); emit_int8(shift & 0xFF); } -void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); +void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); // XMM2 is for /2 encoding: 66 0F 73 /2 ib - emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256); + if (VM_Version::supports_evex()) { + emit_vex_arith_q(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len); + } emit_int8(shift & 0xFF); } -void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256); +void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector_len, + (VM_Version::supports_avx512bw() == false)); } -void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256); +void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector_len); } -void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256); +void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + emit_vex_arith_q(0xD3, dst, src, shift, VEX_SIMD_66, vector_len); + } else { + emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector_len); + } } // Shift packed integers arithmetically right by specified number of bits. void Assembler::psraw(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // XMM4 is for /4 encoding: 66 0F 71 /4 ib - int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66); + int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F, + (VM_Version::supports_avx512bw() == false)); emit_int8(0x71); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); @@ -3719,7 +4610,7 @@ void Assembler::psrad(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // XMM4 is for /4 encoding: 66 0F 72 /4 ib - int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66); + int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, false); emit_int8(0x72); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); @@ -3727,7 +4618,8 @@ void Assembler::psraw(XMMRegister dst, XMMRegister shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66); + emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66, + (VM_Version::supports_avx512bw() == false)); } void Assembler::psrad(XMMRegister dst, XMMRegister shift) { @@ -3735,28 +4627,30 @@ emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66); } -void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); +void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); // XMM4 is for /4 encoding: 66 0F 71 /4 ib - emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256); + emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector_len, + (VM_Version::supports_avx512bw() == false)); emit_int8(shift & 0xFF); } -void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); +void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); // XMM4 is for /4 encoding: 66 0F 71 /4 ib - emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256); + emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector_len); emit_int8(shift & 0xFF); } -void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256); +void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector_len, + (VM_Version::supports_avx512bw() == false)); } -void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256); +void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector_len); } @@ -3766,14 +4660,18 @@ emit_simd_arith(0xDB, dst, src, VEX_SIMD_66); } -void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len); } -void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_32bit; + } + emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len); } void Assembler::por(XMMRegister dst, XMMRegister src) { @@ -3781,14 +4679,18 @@ emit_simd_arith(0xEB, dst, src, VEX_SIMD_66); } -void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len); } -void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_32bit; + } + emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len); } void Assembler::pxor(XMMRegister dst, XMMRegister src) { @@ -3796,21 +4698,25 @@ emit_simd_arith(0xEF, dst, src, VEX_SIMD_66); } -void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len); } -void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { - assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256); +void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_FV; + input_size_in_bits = EVEX_32bit; + } + emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len); } void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - bool vector256 = true; - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); + int vector_len = AVX_256bit; + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A); emit_int8(0x18); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 128 bits @@ -3818,14 +4724,51 @@ emit_int8(0x01); } +void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_evex(), ""); + int vector_len = AVX_512bit; + int src_enc = src->encoding(); + int dst_enc = dst->encoding(); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, + VEX_OPCODE_0F_3A, true, vector_len, false, false); + emit_int8(0x1A); + emit_int8((unsigned char)(0xC0 | encode)); + // 0x00 - insert into lower 256 bits + // 0x01 - insert into upper 256 bits + emit_int8(0x01); +} + +void Assembler::vinsertf64x4h(XMMRegister dst, Address src) { + assert(VM_Version::supports_avx(), ""); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T4; + input_size_in_bits = EVEX_64bit; + } + InstructionMark im(this); + int vector_len = AVX_512bit; + assert(dst != xnoreg, "sanity"); + int dst_enc = dst->encoding(); + // swap src<->dst for encoding + vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector_len); + emit_int8(0x1A); + emit_operand(dst, src); + // 0x01 - insert into upper 128 bits + emit_int8(0x01); +} + void Assembler::vinsertf128h(XMMRegister dst, Address src) { assert(VM_Version::supports_avx(), ""); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T4; + input_size_in_bits = EVEX_32bit; + } InstructionMark im(this); - bool vector256 = true; + int vector_len = AVX_256bit; assert(dst != xnoreg, "sanity"); int dst_enc = dst->encoding(); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); + vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len); emit_int8(0x18); emit_operand(dst, src); // 0x01 - insert into upper 128 bits @@ -3834,8 +4777,8 @@ void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - bool vector256 = true; - int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); + int vector_len = AVX_256bit; + int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A); emit_int8(0x19); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 128 bits @@ -3845,11 +4788,15 @@ void Assembler::vextractf128h(Address dst, XMMRegister src) { assert(VM_Version::supports_avx(), ""); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T4; + input_size_in_bits = EVEX_32bit; + } InstructionMark im(this); - bool vector256 = true; + int vector_len = AVX_256bit; assert(src != xnoreg, "sanity"); int src_enc = src->encoding(); - vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); + vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len); emit_int8(0x19); emit_operand(src, dst); // 0x01 - extract from upper 128 bits @@ -3858,8 +4805,8 @@ void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx2(), ""); - bool vector256 = true; - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); + int vector_len = AVX_256bit; + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A); emit_int8(0x38); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 128 bits @@ -3867,38 +4814,169 @@ emit_int8(0x01); } +void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_evex(), ""); + int vector_len = AVX_512bit; + int src_enc = src->encoding(); + int dst_enc = dst->encoding(); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, + VM_Version::supports_avx512dq(), vector_len, false, false); + emit_int8(0x38); + emit_int8((unsigned char)(0xC0 | encode)); + // 0x00 - insert into lower 256 bits + // 0x01 - insert into upper 256 bits + emit_int8(0x01); +} + void Assembler::vinserti128h(XMMRegister dst, Address src) { assert(VM_Version::supports_avx2(), ""); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T4; + input_size_in_bits = EVEX_32bit; + } InstructionMark im(this); - bool vector256 = true; + int vector_len = AVX_256bit; assert(dst != xnoreg, "sanity"); int dst_enc = dst->encoding(); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); + vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len); emit_int8(0x38); emit_operand(dst, src); // 0x01 - insert into upper 128 bits emit_int8(0x01); } +void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + int vector_len = AVX_256bit; + int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A); + emit_int8(0x39); + emit_int8((unsigned char)(0xC0 | encode)); + // 0x00 - insert into lower 128 bits + // 0x01 - insert into upper 128 bits + emit_int8(0x01); +} + void Assembler::vextracti128h(Address dst, XMMRegister src) { assert(VM_Version::supports_avx2(), ""); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T4; + input_size_in_bits = EVEX_32bit; + } InstructionMark im(this); - bool vector256 = true; + int vector_len = AVX_256bit; assert(src != xnoreg, "sanity"); int src_enc = src->encoding(); - vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); + vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len); emit_int8(0x39); emit_operand(src, dst); // 0x01 - extract from upper 128 bits emit_int8(0x01); } +void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_evex(), ""); + int vector_len = AVX_512bit; + int src_enc = src->encoding(); + int dst_enc = dst->encoding(); + int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, + true, vector_len, false, false); + emit_int8(0x3B); + emit_int8((unsigned char)(0xC0 | encode)); + // 0x01 - extract from upper 256 bits + emit_int8(0x01); +} + +void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) { + assert(VM_Version::supports_evex(), ""); + int vector_len = AVX_512bit; + int src_enc = src->encoding(); + int dst_enc = dst->encoding(); + int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, + VM_Version::supports_avx512dq(), vector_len, false, false); + emit_int8(0x39); + emit_int8((unsigned char)(0xC0 | encode)); + // 0x01 - extract from bits 255:128 + // 0x02 - extract from bits 383:256 + // 0x03 - extract from bits 511:384 + emit_int8(value & 0x3); +} + +void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_evex(), ""); + int vector_len = AVX_512bit; + int src_enc = src->encoding(); + int dst_enc = dst->encoding(); + int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, + VM_Version::supports_avx512dq(), vector_len, false, false); + emit_int8(0x1B); + emit_int8((unsigned char)(0xC0 | encode)); + // 0x01 - extract from upper 256 bits + emit_int8(0x01); +} + +void Assembler::vextractf64x4h(Address dst, XMMRegister src) { + assert(VM_Version::supports_avx2(), ""); + tuple_type = EVEX_T4; + input_size_in_bits = EVEX_64bit; + InstructionMark im(this); + int vector_len = AVX_512bit; + assert(src != xnoreg, "sanity"); + int src_enc = src->encoding(); + vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, + VM_Version::supports_avx512dq(), vector_len); + emit_int8(0x1B); + emit_operand(src, dst); + // 0x01 - extract from upper 128 bits + emit_int8(0x01); +} + +void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) { + assert(VM_Version::supports_evex(), ""); + int vector_len = AVX_512bit; + int src_enc = src->encoding(); + int dst_enc = dst->encoding(); + int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, + VEX_OPCODE_0F_3A, false, vector_len, false, false); + emit_int8(0x19); + emit_int8((unsigned char)(0xC0 | encode)); + // 0x01 - extract from bits 255:128 + // 0x02 - extract from bits 383:256 + // 0x03 - extract from bits 511:384 + emit_int8(value & 0x3); +} + +void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) { + assert(VM_Version::supports_evex(), ""); + int vector_len = AVX_512bit; + int src_enc = src->encoding(); + int dst_enc = dst->encoding(); + int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, + VM_Version::supports_avx512dq(), vector_len, false, false); + emit_int8(0x19); + emit_int8((unsigned char)(0xC0 | encode)); + // 0x01 - extract from bits 255:128 + // 0x02 - extract from bits 383:256 + // 0x03 - extract from bits 511:384 + emit_int8(value & 0x3); +} + // duplicate 4-bytes integer data from src into 8 locations in dest void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_avx2(), ""); - bool vector256 = true; - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38); + int vector_len = AVX_256bit; + int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, + vector_len, VEX_OPCODE_0F_38, false); + emit_int8(0x58); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// duplicate 4-bytes integer data from src into 8 locations in dest +void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, + vector_len, VEX_OPCODE_0F_38, false); emit_int8(0x58); emit_int8((unsigned char)(0xC0 | encode)); } @@ -3906,7 +4984,8 @@ // Carry-Less Multiplication Quadword void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) { assert(VM_Version::supports_clmul(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, + VEX_OPCODE_0F_3A, false, AVX_128bit, true); emit_int8(0x44); emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)mask); @@ -3915,8 +4994,9 @@ // Carry-Less Multiplication Quadword void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) { assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), ""); - bool vector256 = false; - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); + int vector_len = AVX_128bit; + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, + vector_len, VEX_OPCODE_0F_3A, true); emit_int8(0x44); emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)mask); @@ -3924,8 +5004,11 @@ void Assembler::vzeroupper() { assert(VM_Version::supports_avx(), ""); - (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE); - emit_int8(0x77); + if (UseAVX < 3) + { + (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE); + emit_int8(0x77); + } } @@ -4442,7 +5525,7 @@ } -void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) { +void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, int vector_len) { if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) { prefix(VEX_3bytes); @@ -4452,7 +5535,7 @@ emit_int8(byte1); int byte2 = ((~nds_enc) & 0xf) << 3; - byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre; + byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre; emit_int8(byte2); } else { prefix(VEX_2bytes); @@ -4460,89 +5543,237 @@ int byte1 = vex_r ? VEX_R : 0; byte1 = (~byte1) & 0x80; byte1 |= ((~nds_enc) & 0xf) << 3; - byte1 |= (vector256 ? 4 : 0) | pre; + byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre; emit_int8(byte1); } } -void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){ +// This is a 4 byte encoding +void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v, + int nds_enc, VexSimdPrefix pre, VexOpcode opc, + bool is_extended_context, bool is_merge_context, + int vector_len, bool no_mask_reg ){ + // EVEX 0x62 prefix + prefix(EVEX_4bytes); + evex_encoding = (vex_w ? VEX_W : 0) | (evex_r ? EVEX_Rb : 0); + + // P0: byte 2, initialized to RXBR`00mm + // instead of not'd + int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0); + byte2 = (~byte2) & 0xF0; + // confine opc opcode extensions in mm bits to lower two bits + // of form {0F, 0F_38, 0F_3A} + byte2 |= opc; + emit_int8(byte2); + + // P1: byte 3 as Wvvvv1pp + int byte3 = ((~nds_enc) & 0xf) << 3; + // p[10] is always 1 + byte3 |= EVEX_F; + byte3 |= (vex_w & 1) << 7; + // confine pre opcode extensions in pp bits to lower two bits + // of form {66, F3, F2} + byte3 |= pre; + emit_int8(byte3); + + // P2: byte 4 as zL'Lbv'aaa + int byte4 = (no_mask_reg) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now) + // EVEX.v` for extending EVEX.vvvv or VIDX + byte4 |= (evex_v ? 0: EVEX_V); + // third EXEC.b for broadcast actions + byte4 |= (is_extended_context ? EVEX_Rb : 0); + // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024 + byte4 |= ((vector_len) & 0x3) << 5; + // last is EVEX.z for zero/merge actions + byte4 |= (is_merge_context ? EVEX_Z : 0); + emit_int8(byte4); +} + +void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, + VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) { bool vex_r = (xreg_enc >= 8); bool vex_b = adr.base_needs_rex(); bool vex_x = adr.index_needs_rex(); - vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); + avx_vector_len = vector_len; + + // if vector length is turned off, revert to AVX for vectors smaller than AVX_512bit + if (VM_Version::supports_avx512vl() == false) { + switch (vector_len) { + case AVX_128bit: + case AVX_256bit: + legacy_mode = true; + break; + } + } + + if ((UseAVX > 2) && (legacy_mode == false)) + { + bool evex_r = (xreg_enc >= 16); + bool evex_v = (nds_enc >= 16); + is_evex_instruction = true; + evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg); + } else { + vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len); + } } -int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) { +int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, + bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) { bool vex_r = (dst_enc >= 8); bool vex_b = (src_enc >= 8); bool vex_x = false; - vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); + avx_vector_len = vector_len; + + // if vector length is turned off, revert to AVX for vectors smaller than AVX_512bit + if (VM_Version::supports_avx512vl() == false) { + switch (vector_len) { + case AVX_128bit: + case AVX_256bit: + legacy_mode = true; + break; + } + } + + if ((UseAVX > 2) && (legacy_mode == false)) + { + bool evex_r = (dst_enc >= 16); + bool evex_v = (nds_enc >= 16); + // can use vex_x as bank extender on rm encoding + vex_x = (src_enc >= 16); + evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg); + } else { + vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len); + } + + // return modrm byte components for operands return (((dst_enc & 7) << 3) | (src_enc & 7)); } -void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { +void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, + bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) { if (UseAVX > 0) { int xreg_enc = xreg->encoding(); int nds_enc = nds->is_valid() ? nds->encoding() : 0; - vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256); + vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg); } else { assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding"); rex_prefix(adr, xreg, pre, opc, rex_w); } } -int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { +int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, + bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) { int dst_enc = dst->encoding(); int src_enc = src->encoding(); if (UseAVX > 0) { int nds_enc = nds->is_valid() ? nds->encoding() : 0; - return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256); + return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg); } else { assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding"); return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w); } } -void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) { +int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre, + bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg); +} + +int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre, + bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg); +} + +void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) { + InstructionMark im(this); + simd_prefix(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode); + emit_int8(opcode); + emit_operand(dst, src); +} + +void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg) { InstructionMark im(this); - simd_prefix(dst, dst, src, pre); + simd_prefix_q(dst, dst, src, pre, no_mask_reg); emit_int8(opcode); emit_operand(dst, src); } -void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) { - int encode = simd_prefix_and_encode(dst, dst, src, pre); +void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) { + int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode); + emit_int8(opcode); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) { + int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit); emit_int8(opcode); emit_int8((unsigned char)(0xC0 | encode)); } // Versions with no second source register (non-destructive source). -void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) { +void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) { InstructionMark im(this); - simd_prefix(dst, xnoreg, src, pre); + simd_prefix(dst, xnoreg, src, pre, opNoRegMask); emit_int8(opcode); emit_operand(dst, src); } -void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) { - int encode = simd_prefix_and_encode(dst, xnoreg, src, pre); +void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) { + InstructionMark im(this); + simd_prefix_q(dst, xnoreg, src, pre, opNoRegMask); + emit_int8(opcode); + emit_operand(dst, src); +} + +void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) { + int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, legacy_mode, AVX_128bit); + emit_int8(opcode); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) { + int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit); emit_int8(opcode); emit_int8((unsigned char)(0xC0 | encode)); } // 3-operands AVX instructions -void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, - Address src, VexSimdPrefix pre, bool vector256) { +void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, Address src, + VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) { InstructionMark im(this); - vex_prefix(dst, nds, src, pre, vector256); + vex_prefix(dst, nds, src, pre, vector_len, no_mask_reg, legacy_mode); emit_int8(opcode); emit_operand(dst, src); } -void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, - XMMRegister src, VexSimdPrefix pre, bool vector256) { - int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256); +void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, + Address src, VexSimdPrefix pre, int vector_len, bool no_mask_reg) { + InstructionMark im(this); + vex_prefix_q(dst, nds, src, pre, vector_len, no_mask_reg); + emit_int8(opcode); + emit_operand(dst, src); +} + +void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, + VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) { + int encode = vex_prefix_and_encode(dst, nds, src, pre, vector_len, VEX_OPCODE_0F, false, no_mask_reg); + emit_int8(opcode); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, + VexSimdPrefix pre, int vector_len, bool no_mask_reg) { + int src_enc = src->encoding(); + int dst_enc = dst->encoding(); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg); emit_int8(opcode); emit_int8((unsigned char)(0xC0 | encode)); } @@ -5040,6 +6271,10 @@ } void Assembler::andnq(Register dst, Register src1, Address src2) { + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + } InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); vex_prefix_0F38_q(dst, src1, src2); @@ -5181,44 +6416,52 @@ void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2); + int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, true); emit_int8(0x2A); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } InstructionMark im(this); - simd_prefix_q(dst, dst, src, VEX_SIMD_F2); + simd_prefix_q(dst, dst, src, VEX_SIMD_F2, true); emit_int8(0x2A); emit_operand(dst, src); } void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3); + int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, true); emit_int8(0x2A); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); + if (VM_Version::supports_evex()) { + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + } InstructionMark im(this); - simd_prefix_q(dst, dst, src, VEX_SIMD_F3); + simd_prefix_q(dst, dst, src, VEX_SIMD_F3, true); emit_int8(0x2A); emit_operand(dst, src); } void Assembler::cvttsd2siq(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2); + int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, true); emit_int8(0x2C); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvttss2siq(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3); + int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, true); emit_int8(0x2C); emit_int8((unsigned char)(0xC0 | encode)); } @@ -5387,7 +6630,7 @@ void Assembler::movdq(XMMRegister dst, Register src) { // table D-1 says MMX/SSE2 NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66); + int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66, true); emit_int8(0x6E); emit_int8((unsigned char)(0xC0 | encode)); } @@ -5396,7 +6639,7 @@ // table D-1 says MMX/SSE2 NOT_LP64(assert(VM_Version::supports_sse2(), "")); // swap src/dst to get correct prefix - int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66); + int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66, true); emit_int8(0x7E); emit_int8((unsigned char)(0xC0 | encode)); } @@ -5529,7 +6772,8 @@ void Assembler::mulxq(Register dst1, Register dst2, Register src) { assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported"); - int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, true, false); + int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), + VEX_SIMD_F2, VEX_OPCODE_0F_38, true, AVX_128bit, true, false); emit_int8((unsigned char)0xF6); emit_int8((unsigned char)(0xC0 | encode)); } @@ -5678,7 +6922,8 @@ void Assembler::rorxq(Register dst, Register src, int imm8) { assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported"); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, true, false); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, + VEX_OPCODE_0F_3A, true, AVX_128bit, true, false); emit_int8((unsigned char)0xF0); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8);