--- old/src/cpu/x86/vm/assembler_x86.cpp 2016-03-01 16:13:30.632281638 -0800 +++ new/src/cpu/x86/vm/assembler_x86.cpp 2016-03-01 16:13:30.544280161 -0800 @@ -5611,7 +5611,7 @@ } -void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { +void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { assert(VM_Version::supports_avx(), ""); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); @@ -5621,10 +5621,10 @@ emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 128 bits // 0x01 - insert into upper 128 bits - emit_int8(0x01); + emit_int8(imm8 & 0x01); } -void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) { +void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { assert(VM_Version::supports_evex(), ""); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int nds_enc = nds->is_valid() ? nds->encoding() : 0; @@ -5633,25 +5633,26 @@ emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 256 bits // 0x01 - insert into upper 256 bits - emit_int8(value & 0x01); + emit_int8(imm8 & 0x01); } -void Assembler::vinsertf64x4h(XMMRegister dst, Address src, int value) { +void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, int imm8) { assert(VM_Version::supports_evex(), ""); assert(dst != xnoreg, "sanity"); InstructionMark im(this); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit); // swap src<->dst for encoding - vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x1A); emit_operand(dst, src); // 0x00 - insert into lower 256 bits // 0x01 - insert into upper 128 bits - emit_int8(value & 0x01); + emit_int8(imm8 & 0x01); } -void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) { +void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { assert(VM_Version::supports_evex(), ""); InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int nds_enc = nds->is_valid() ? nds->encoding() : 0; @@ -5662,55 +5663,58 @@ // 0x01 - insert into q1 128 bits (128..255) // 0x02 - insert into q2 128 bits (256..383) // 0x03 - insert into q3 128 bits (384..511) - emit_int8(value & 0x3); + emit_int8(imm8 & 0x3); } -void Assembler::vinsertf32x4h(XMMRegister dst, Address src, int value) { +void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, int imm8) { assert(VM_Version::supports_avx(), ""); assert(dst != xnoreg, "sanity"); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + int nds_enc = nds->is_valid() ? nds->encoding() : 0; InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); // swap src<->dst for encoding - vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x18); emit_operand(dst, src); // 0x00 - insert into q0 128 bits (0..127) // 0x01 - insert into q1 128 bits (128..255) // 0x02 - insert into q2 128 bits (256..383) // 0x03 - insert into q3 128 bits (384..511) - emit_int8(value & 0x3); + emit_int8(imm8 & 0x3); } -void Assembler::vinsertf128h(XMMRegister dst, Address src) { +void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, int imm8) { assert(VM_Version::supports_avx(), ""); assert(dst != xnoreg, "sanity"); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + int nds_enc = nds->is_valid() ? nds->encoding() : 0; InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); // swap src<->dst for encoding - vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x18); emit_operand(dst, src); + // 0x00 - insert into lower 128 bits // 0x01 - insert into upper 128 bits - emit_int8(0x01); + emit_int8(imm8 & 0x01); } -void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) { +void Assembler::vextractf128(XMMRegister dst, XMMRegister src, int imm8) { assert(VM_Version::supports_avx(), ""); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_int8((unsigned char)(0xC0 | encode)); - // 0x00 - insert into lower 128 bits - // 0x01 - insert into upper 128 bits - emit_int8(0x01); + // 0x00 - extract from lower 128 bits + // 0x01 - extract from upper 128 bits + emit_int8(imm8 & 0x01); } -void Assembler::vextractf128h(Address dst, XMMRegister src) { +void Assembler::vextractf128(Address dst, XMMRegister src, int imm8) { assert(VM_Version::supports_avx(), ""); assert(src != xnoreg, "sanity"); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; @@ -5720,11 +5724,12 @@ vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_operand(src, dst); + // 0x00 - extract from lower 128 bits // 0x01 - extract from upper 128 bits - emit_int8(0x01); + emit_int8(imm8 & 0x01); } -void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { +void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { assert(VM_Version::supports_avx2(), ""); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); @@ -5734,10 +5739,10 @@ emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 128 bits // 0x01 - insert into upper 128 bits - emit_int8(0x01); + emit_int8(imm8 & 0x01); } -void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) { +void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { assert(VM_Version::supports_evex(), ""); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int nds_enc = nds->is_valid() ? nds->encoding() : 0; @@ -5746,37 +5751,39 @@ emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 256 bits // 0x01 - insert into upper 256 bits - emit_int8(value & 0x01); + emit_int8(imm8 & 0x01); } -void Assembler::vinserti128h(XMMRegister dst, Address src) { +void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, int imm8) { assert(VM_Version::supports_avx2(), ""); assert(dst != xnoreg, "sanity"); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + int nds_enc = nds->is_valid() ? nds->encoding() : 0; InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); // swap src<->dst for encoding - vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x38); emit_operand(dst, src); + // 0x00 - insert into lower 128 bits // 0x01 - insert into upper 128 bits - emit_int8(0x01); + emit_int8(imm8 & 0x01); } -void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) { +void Assembler::vextracti128(XMMRegister dst, XMMRegister src, int imm8) { assert(VM_Version::supports_avx(), ""); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x39); emit_int8((unsigned char)(0xC0 | encode)); - // 0x00 - insert into lower 128 bits - // 0x01 - insert into upper 128 bits - emit_int8(0x01); + // 0x00 - extract from lower 128 bits + // 0x01 - extract from upper 128 bits + emit_int8(imm8 & 0x01); } -void Assembler::vextracti128h(Address dst, XMMRegister src) { +void Assembler::vextracti128(Address dst, XMMRegister src, int imm8) { assert(VM_Version::supports_avx2(), ""); assert(src != xnoreg, "sanity"); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; @@ -5786,11 +5793,12 @@ vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x39); emit_operand(src, dst); + // 0x00 - extract from lower 128 bits // 0x01 - extract from upper 128 bits - emit_int8(0x01); + emit_int8(imm8 & 0x01); } -void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src, int value) { +void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, int imm8) { assert(VM_Version::supports_evex(), ""); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); @@ -5798,22 +5806,23 @@ emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - extract from lower 256 bits // 0x01 - extract from upper 256 bits - emit_int8(value & 0x01); + emit_int8(imm8 & 0x01); } -void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) { +void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, int imm8) { assert(VM_Version::supports_evex(), ""); InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x39); emit_int8((unsigned char)(0xC0 | encode)); + // 0x00 - extract from bits 127:0 // 0x01 - extract from bits 255:128 // 0x02 - extract from bits 383:256 // 0x03 - extract from bits 511:384 - emit_int8(value & 0x3); + emit_int8(imm8 & 0x3); } -void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src, int value) { +void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, int imm8) { assert(VM_Version::supports_evex(), ""); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); @@ -5821,10 +5830,10 @@ emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - extract from lower 256 bits // 0x01 - extract from upper 256 bits - emit_int8(value & 0x1); + emit_int8(imm8 & 0x1); } -void Assembler::vextractf64x4h(Address dst, XMMRegister src, int value) { +void Assembler::vextractf64x4(Address dst, XMMRegister src, int imm8) { assert(VM_Version::supports_evex(), ""); assert(src != xnoreg, "sanity"); InstructionMark im(this); @@ -5835,10 +5844,10 @@ emit_operand(src, dst); // 0x00 - extract from lower 256 bits // 0x01 - extract from upper 256 bits - emit_int8(value & 0x01); + emit_int8(imm8 & 0x01); } -void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) { +void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, int imm8) { assert(VM_Version::supports_avx(), ""); int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); @@ -5849,10 +5858,10 @@ // 0x01 - extract from bits 255:128 // 0x02 - extract from bits 383:256 // 0x03 - extract from bits 511:384 - emit_int8(value & 0x3); + emit_int8(imm8 & 0x3); } -void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) { +void Assembler::vextractf32x4(Address dst, XMMRegister src, int imm8) { assert(VM_Version::supports_evex(), ""); assert(src != xnoreg, "sanity"); InstructionMark im(this); @@ -5865,19 +5874,20 @@ // 0x01 - extract from bits 255:128 // 0x02 - extract from bits 383:256 // 0x03 - extract from bits 511:384 - emit_int8(value & 0x3); + emit_int8(imm8 & 0x3); } -void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) { +void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, int imm8) { assert(VM_Version::supports_evex(), ""); InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_int8((unsigned char)(0xC0 | encode)); + // 0x00 - extract from bits 127:0 // 0x01 - extract from bits 255:128 // 0x02 - extract from bits 383:256 // 0x03 - extract from bits 511:384 - emit_int8(value & 0x3); + emit_int8(imm8 & 0x3); } // duplicate 4-bytes integer data from src into 8 locations in dest --- old/src/cpu/x86/vm/assembler_x86.hpp 2016-03-01 16:13:30.640281772 -0800 +++ new/src/cpu/x86/vm/assembler_x86.hpp 2016-03-01 16:13:30.556280362 -0800 @@ -1959,32 +1959,32 @@ void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len); // Copy low 128bit into high 128bit of YMM registers. - void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src); - void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src); - void vextractf128h(XMMRegister dst, XMMRegister src); - void vextracti128h(XMMRegister dst, XMMRegister src); + void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); + void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); + void vextractf128(XMMRegister dst, XMMRegister src, int imm8); + void vextracti128(XMMRegister dst, XMMRegister src, int imm8); // Load/store high 128bit of YMM registers which does not destroy other half. - void vinsertf128h(XMMRegister dst, Address src); - void vinserti128h(XMMRegister dst, Address src); - void vextractf128h(Address dst, XMMRegister src); - void vextracti128h(Address dst, XMMRegister src); + void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, int imm8); + void vinserti128(XMMRegister dst, XMMRegister nds, Address src, int imm8); + void vextractf128(Address dst, XMMRegister src, int imm8); + void vextracti128(Address dst, XMMRegister src, int imm8); // Copy low 256bit into high 256bit of ZMM registers. - void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value); - void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value); - void vextracti64x4h(XMMRegister dst, XMMRegister src, int value); - void vextractf64x4h(XMMRegister dst, XMMRegister src, int value); - void vextractf64x4h(Address dst, XMMRegister src, int value); - void vinsertf64x4h(XMMRegister dst, Address src, int value); + void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); + void vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); + void vextracti64x4(XMMRegister dst, XMMRegister src, int imm8); + void vextractf64x4(XMMRegister dst, XMMRegister src, int imm8); + void vextractf64x4(Address dst, XMMRegister src, int imm8); + void vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, int imm8); // Copy targeted 128bit segments of the ZMM registers - void vextracti64x2h(XMMRegister dst, XMMRegister src, int value); - void vextractf64x2h(XMMRegister dst, XMMRegister src, int value); - void vextractf32x4h(XMMRegister dst, XMMRegister src, int value); - void vextractf32x4h(Address dst, XMMRegister src, int value); - void vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value); - void vinsertf32x4h(XMMRegister dst, Address src, int value); + void vextracti64x2(XMMRegister dst, XMMRegister src, int imm8); + void vextractf64x2(XMMRegister dst, XMMRegister src, int imm8); + void vextractf32x4(XMMRegister dst, XMMRegister src, int imm8); + void vextractf32x4(Address dst, XMMRegister src, int imm8); + void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); + void vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, int imm8); // duplicate 4-bytes integer data from src into 8 locations in dest void vpbroadcastd(XMMRegister dst, XMMRegister src); --- old/src/cpu/x86/vm/stubGenerator_x86_64.cpp 2016-03-01 16:13:30.644281839 -0800 +++ new/src/cpu/x86/vm/stubGenerator_x86_64.cpp 2016-03-01 16:13:30.556280362 -0800 @@ -275,7 +275,7 @@ } if (VM_Version::supports_evex()) { for (int i = xmm_save_first; i <= last_reg; i++) { - __ vextractf32x4h(xmm_save(i), as_XMMRegister(i), 0); + __ vextractf32x4(xmm_save(i), as_XMMRegister(i), 0); } } else { for (int i = xmm_save_first; i <= last_reg; i++) { @@ -393,7 +393,7 @@ // emit the restores for xmm regs if (VM_Version::supports_evex()) { for (int i = xmm_save_first; i <= last_reg; i++) { - __ vinsertf32x4h(as_XMMRegister(i), xmm_save(i), 0); + __ vinsertf32x4(as_XMMRegister(i), as_XMMRegister(i), xmm_save(i), 0); } } else { for (int i = xmm_save_first; i <= last_reg; i++) { --- old/src/cpu/x86/vm/sharedRuntime_x86_32.cpp 2016-03-01 16:13:30.648281907 -0800 +++ new/src/cpu/x86/vm/sharedRuntime_x86_32.cpp 2016-03-01 16:13:30.564280497 -0800 @@ -208,13 +208,13 @@ __ subptr(rsp, ymm_bytes); // Save upper half of YMM registers for (int n = 0; n < num_xmm_regs; n++) { - __ vextractf128h(Address(rsp, n*16), as_XMMRegister(n)); + __ vextractf128(Address(rsp, n*16), as_XMMRegister(n), 1); } if (UseAVX > 2) { __ subptr(rsp, zmm_bytes); // Save upper half of ZMM registers for (int n = 0; n < num_xmm_regs; n++) { - __ vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1); + __ vextractf64x4(Address(rsp, n*32), as_XMMRegister(n), 1); } } } @@ -304,13 +304,13 @@ if (UseAVX > 2) { // Restore upper half of ZMM registers. for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1); + __ vinsertf64x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*32), 1); } __ addptr(rsp, zmm_bytes); } // Restore upper half of YMM registers. for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf128h(as_XMMRegister(n), Address(rsp, n*16)); + __ vinsertf128(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 1); } __ addptr(rsp, ymm_bytes); } --- old/src/cpu/x86/vm/sharedRuntime_x86_64.cpp 2016-03-01 16:13:30.660282109 -0800 +++ new/src/cpu/x86/vm/sharedRuntime_x86_64.cpp 2016-03-01 16:13:30.556280362 -0800 @@ -179,13 +179,13 @@ // Save upper half of YMM registers(0..15) int base_addr = XSAVE_AREA_YMM_BEGIN; for (int n = 0; n < 16; n++) { - __ vextractf128h(Address(rsp, base_addr+n*16), as_XMMRegister(n)); + __ vextractf128(Address(rsp, base_addr+n*16), as_XMMRegister(n), 1); } if (VM_Version::supports_evex()) { // Save upper half of ZMM registers(0..15) base_addr = XSAVE_AREA_ZMM_BEGIN; for (int n = 0; n < 16; n++) { - __ vextractf64x4h(Address(rsp, base_addr+n*32), as_XMMRegister(n), 1); + __ vextractf64x4(Address(rsp, base_addr+n*32), as_XMMRegister(n), 1); } // Save full ZMM registers(16..num_xmm_regs) base_addr = XSAVE_AREA_UPPERBANK; @@ -333,13 +333,13 @@ // Restore upper half of YMM registers (0..15) int base_addr = XSAVE_AREA_YMM_BEGIN; for (int n = 0; n < 16; n++) { - __ vinsertf128h(as_XMMRegister(n), Address(rsp, base_addr+n*16)); + __ vinsertf128(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, base_addr+n*16), 1); } if (VM_Version::supports_evex()) { // Restore upper half of ZMM registers (0..15) base_addr = XSAVE_AREA_ZMM_BEGIN; for (int n = 0; n < 16; n++) { - __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, base_addr+n*32), 1); + __ vinsertf64x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, base_addr+n*32), 1); } // Restore full ZMM registers(16..num_xmm_regs) base_addr = XSAVE_AREA_UPPERBANK; --- old/src/cpu/x86/vm/macroAssembler_x86.hpp 2016-03-01 16:13:30.664282176 -0800 +++ new/src/cpu/x86/vm/macroAssembler_x86.hpp 2016-03-01 16:13:30.564280497 -0800 @@ -1185,12 +1185,36 @@ void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); } void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); } - // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector. - void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { - if (UseAVX > 1) // vinserti128h is available only in AVX2 - Assembler::vinserti128h(dst, nds, src); - else - Assembler::vinsertf128h(dst, nds, src); + void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { + if (UseAVX > 1) { // vinserti128h is available only in AVX2 + Assembler::vinserti128(dst, nds, src, imm8); + } else { + Assembler::vinsertf128(dst, nds, src, imm8); + } + } + + void vinserti128(XMMRegister dst, XMMRegister nds, Address src, int imm8) { + if (UseAVX > 1) { // vinserti128 is available only in AVX2 + Assembler::vinserti128(dst, nds, src, imm8); + } else { + Assembler::vinsertf128(dst, nds, src, imm8); + } + } + + void vextracti128(XMMRegister dst, XMMRegister src, int imm8) { + if (UseAVX > 1) { // vextracti128 is available only in AVX2 + Assembler::vextracti128(dst, src, imm8); + } else { + Assembler::vextractf128(dst, src, imm8); + } + } + + void vextracti128(Address dst, XMMRegister src, int imm8) { + if (UseAVX > 1) { // vextracti128 is available only in AVX2 + Assembler::vextracti128(dst, src, imm8); + } else { + Assembler::vextractf128(dst, src, imm8); + } } // Carry-Less Multiplication Quadword --- old/src/cpu/x86/vm/vm_version_x86.cpp 2016-03-01 16:13:30.664282176 -0800 +++ new/src/cpu/x86/vm/vm_version_x86.cpp 2016-03-01 16:13:30.564280497 -0800 @@ -385,7 +385,7 @@ __ movdl(xmm0, rcx); __ pshufd(xmm0, xmm0, 0x00); - __ vinsertf128h(xmm0, xmm0, xmm0); + __ vinsertf128(xmm0, xmm0, xmm0, 0x01); __ vmovdqu(xmm7, xmm0); #ifdef _LP64 __ vmovdqu(xmm8, xmm0); --- old/src/cpu/x86/vm/x86.ad 2016-03-01 16:13:30.668282244 -0800 +++ new/src/cpu/x86/vm/x86.ad 2016-03-01 16:13:30.564280497 -0800 @@ -3179,13 +3179,13 @@ "punpcklbw $dst,$dst\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate32B" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate32B" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3196,12 +3196,12 @@ format %{ "punpcklbw $dst,$mem\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate32B" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate32B" %} ins_encode %{ __ punpcklbw($dst$$XMMRegister, $mem$$Address); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3223,11 +3223,11 @@ match(Set dst (ReplicateB con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} + "vinserti128 $dst,$dst,$dst,0x1\t! lreplicate32B($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3298,12 +3298,12 @@ format %{ "movd $dst,$src\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate16S" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate16S" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3313,11 +3313,11 @@ match(Set dst (ReplicateS (LoadS mem))); format %{ "pshuflw $dst,$mem,0x00\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate16S" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate16S" %} ins_encode %{ __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3327,11 +3327,11 @@ match(Set dst (ReplicateS con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate16S($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3363,11 +3363,11 @@ match(Set dst (ReplicateI src)); format %{ "movd $dst,$src\n\t" "pshufd $dst,$dst,0x00\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate8I" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate8I" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3376,10 +3376,10 @@ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI (LoadI mem))); format %{ "pshufd $dst,$mem,0x00\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate8I" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate8I" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3401,11 +3401,11 @@ match(Set dst (ReplicateI con)); format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst" %} + "vinserti128 $dst,$dst,$dst,0x1" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3430,11 +3430,11 @@ match(Set dst (ReplicateL src)); format %{ "movdq $dst,$src\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate4L" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate4L" %} ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3447,13 +3447,13 @@ "movdl $tmp,$src.hi\n\t" "punpckldq $dst,$tmp\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate4L" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate4L" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3464,11 +3464,11 @@ match(Set dst (ReplicateL con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate4L($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress($con)); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3478,11 +3478,11 @@ match(Set dst (ReplicateL (LoadL mem))); format %{ "movq $dst,$mem\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate4L" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate4L" %} ins_encode %{ __ movq($dst$$XMMRegister, $mem$$Address); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3511,10 +3511,10 @@ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateF src)); format %{ "pshufd $dst,$src,0x00\n\t" - "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} + "vinsertf128 $dst,$dst,$dst,0x1\t! replicate8F" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); - __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3523,10 +3523,10 @@ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateF (LoadF mem))); format %{ "pshufd $dst,$mem,0x00\n\t" - "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} + "vinsertf128 $dst,$dst,$dst,0x1\t! replicate8F" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); - __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3576,10 +3576,10 @@ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateD src)); format %{ "pshufd $dst,$src,0x44\n\t" - "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} + "vinsertf128 $dst,$dst,$dst,0x1\t! replicate4D" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); - __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3588,10 +3588,10 @@ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateD (LoadD mem))); format %{ "pshufd $dst,$mem,0x44\n\t" - "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} + "vinsertf128 $dst,$dst,$dst,0x1\t! replicate4D" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); - __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -4791,7 +4791,7 @@ effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "vphaddd $tmp,$tmp,$tmp2\n\t" - "vextracti128 $tmp2,$tmp\n\t" + "vextracti128 $tmp2,$tmp,0x1\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t" @@ -4800,7 +4800,7 @@ int vector_len = 1; __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); - __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vextracti128($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -4813,7 +4813,7 @@ predicate(UseAVX > 2); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti128 $tmp,$src2\n\t" + format %{ "vextracti128 $tmp,$src2,0x1\n\t" "vpaddd $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" @@ -4824,7 +4824,7 @@ "movd $dst,$tmp2\t! add reduction8I" %} ins_encode %{ int vector_len = 0; - __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); + __ vextracti128($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); @@ -4843,7 +4843,7 @@ effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" "vpaddd $tmp3,$tmp3,$src2\n\t" - "vextracti128 $tmp,$tmp3\n\t" + "vextracti128 $tmp,$tmp3,0x1\n\t" "vpaddd $tmp,$tmp,$tmp3\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" @@ -4853,9 +4853,9 @@ "vpaddd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ - __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); + __ vextracti64x4($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); - __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); + __ vextracti128($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x1); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); @@ -4892,7 +4892,7 @@ predicate(UseAVX > 2); match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti128 $tmp,$src2\n\t" + format %{ "vextracti128 $tmp,$src2,0x1\n\t" "vpaddq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" @@ -4900,7 +4900,7 @@ "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction4L" %} ins_encode %{ - __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); + __ vextracti128($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -4917,7 +4917,7 @@ effect(TEMP tmp, TEMP tmp2); format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" "vpaddq $tmp2,$tmp2,$src2\n\t" - "vextracti128 $tmp,$tmp2\n\t" + "vextracti128 $tmp,$tmp2,0x1\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" @@ -4925,9 +4925,9 @@ "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction8L" %} ins_encode %{ - __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); + __ vextracti64x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); - __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ vextracti128($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -5026,7 +5026,7 @@ "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" - "vextractf128 $tmp2,$src2\n\t" + "vextractf128 $tmp2,$src2,0x1\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" @@ -5042,7 +5042,7 @@ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5065,7 +5065,7 @@ "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vextractf32x4 $tmp2,$src2,0x1\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" @@ -5073,7 +5073,7 @@ "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vextractf32x4 $tmp2,$src2,0x2\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" @@ -5081,7 +5081,7 @@ "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vextractf32x4 $tmp2,$src2,0x3\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" @@ -5097,7 +5097,7 @@ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5105,7 +5105,7 @@ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5113,7 +5113,7 @@ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5162,7 +5162,7 @@ format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" - "vextractf32x4h $tmp2,$src2, 0x1\n\t" + "vextractf32x4 $tmp2,$src2,0x1\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} @@ -5170,7 +5170,7 @@ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5185,15 +5185,15 @@ format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vextractf32x4 $tmp2,$src2,0x1\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vextractf32x4 $tmp2,$src2,0x2\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vextractf32x4 $tmp2,$src2,0x3\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} @@ -5201,15 +5201,15 @@ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5307,7 +5307,7 @@ predicate(UseAVX > 0); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti128 $tmp,$src2\n\t" + format %{ "vextracti128 $tmp,$src2,0x1\n\t" "vpmulld $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" @@ -5318,7 +5318,7 @@ "movd $dst,$tmp2\t! mul reduction8I" %} ins_encode %{ int vector_len = 0; - __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); + __ vextracti128($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); @@ -5337,7 +5337,7 @@ effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" "vpmulld $tmp3,$tmp3,$src2\n\t" - "vextracti128 $tmp,$tmp3\n\t" + "vextracti128 $tmp,$tmp3,0x1\n\t" "vpmulld $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" @@ -5347,9 +5347,9 @@ "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ - __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); + __ vextracti64x4($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); - __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); + __ vextracti128($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x1); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); @@ -5386,7 +5386,7 @@ predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); match(Set dst (MulReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti128 $tmp,$src2\n\t" + format %{ "vextracti128 $tmp,$src2,0x1\n\t" "vpmullq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" @@ -5394,7 +5394,7 @@ "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction4L" %} ins_encode %{ - __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); + __ vextracti128($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -5411,7 +5411,7 @@ effect(TEMP tmp, TEMP tmp2); format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" "vpmullq $tmp2,$tmp2,$src2\n\t" - "vextracti128 $tmp,$tmp2\n\t" + "vextracti128 $tmp,$tmp2,0x1\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" @@ -5419,9 +5419,9 @@ "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction8L" %} ins_encode %{ - __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); + __ vextracti64x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); - __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ vextracti128($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -5520,7 +5520,7 @@ "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" - "vextractf128 $tmp2,$src2\n\t" + "vextractf128 $tmp2,$src2,0x1\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" @@ -5536,7 +5536,7 @@ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5559,7 +5559,7 @@ "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vextractf32x4 $tmp2,$src2,0x1\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" @@ -5567,7 +5567,7 @@ "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vextractf32x4 $tmp2,$src2,0x2\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" @@ -5575,7 +5575,7 @@ "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vextractf32x4 $tmp2,$src2,0x3\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" @@ -5591,7 +5591,7 @@ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5599,7 +5599,7 @@ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5607,7 +5607,7 @@ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5656,7 +5656,7 @@ format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" - "vextractf128 $tmp2,$src2\n\t" + "vextractf128 $tmp2,$src2,0x1\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} @@ -5664,7 +5664,7 @@ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5679,15 +5679,15 @@ format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vextractf32x4 $tmp2,$src2,0x1\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vextractf32x4 $tmp2,$src2,0x2\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vextractf32x4 $tmp2,$src2,0x3\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} @@ -5695,15 +5695,15 @@ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); --- old/src/cpu/x86/vm/macroAssembler_x86.cpp 2016-03-01 16:13:30.672282311 -0800 +++ new/src/cpu/x86/vm/macroAssembler_x86.cpp 2016-03-01 16:13:30.568280564 -0800 @@ -3445,7 +3445,7 @@ void MacroAssembler::movdqu(Address dst, XMMRegister src) { if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) { - Assembler::vextractf32x4h(dst, src, 0); + Assembler::vextractf32x4(dst, src, 0); } else { Assembler::movdqu(dst, src); } @@ -3453,7 +3453,7 @@ void MacroAssembler::movdqu(XMMRegister dst, Address src) { if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) { - Assembler::vinsertf32x4h(dst, src, 0); + Assembler::vinsertf32x4(dst, dst, src, 0); } else { Assembler::movdqu(dst, src); } @@ -3478,7 +3478,7 @@ void MacroAssembler::vmovdqu(Address dst, XMMRegister src) { if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) { - Assembler::vextractf64x4h(dst, src, 0); + Assembler::vextractf64x4(dst, src, 0); } else { Assembler::vmovdqu(dst, src); } @@ -3486,7 +3486,7 @@ void MacroAssembler::vmovdqu(XMMRegister dst, Address src) { if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) { - Assembler::vinsertf64x4h(dst, src, 0); + Assembler::vinsertf64x4(dst, dst, src, 0); } else { Assembler::vmovdqu(dst, src); } @@ -5649,14 +5649,14 @@ // Save upper half of ZMM registers subptr(rsp, 32*num_xmm_regs); for (int n = 0; n < num_xmm_regs; n++) { - vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1); + vextractf64x4(Address(rsp, n*32), as_XMMRegister(n), 1); } } assert(UseAVX > 0, "256 bit vectors are supported only with AVX"); // Save upper half of YMM registers subptr(rsp, 16*num_xmm_regs); for (int n = 0; n < num_xmm_regs; n++) { - vextractf128h(Address(rsp, n*16), as_XMMRegister(n)); + vextractf128(Address(rsp, n*16), as_XMMRegister(n), 1); } } #endif @@ -5665,7 +5665,7 @@ #ifdef _LP64 if (VM_Version::supports_evex()) { for (int n = 0; n < num_xmm_regs; n++) { - vextractf32x4h(Address(rsp, n*16), as_XMMRegister(n), 0); + vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0); } } else { for (int n = 0; n < num_xmm_regs; n++) { @@ -5753,7 +5753,7 @@ #ifdef _LP64 if (VM_Version::supports_evex()) { for (int n = 0; n < num_xmm_regs; n++) { - vinsertf32x4h(as_XMMRegister(n), Address(rsp, n*16), 0); + vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0); } } else { for (int n = 0; n < num_xmm_regs; n++) { @@ -5771,12 +5771,12 @@ if (MaxVectorSize > 16) { // Restore upper half of YMM registers. for (int n = 0; n < num_xmm_regs; n++) { - vinsertf128h(as_XMMRegister(n), Address(rsp, n*16)); + vinsertf128(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 1); } addptr(rsp, 16*num_xmm_regs); if(UseAVX > 2) { for (int n = 0; n < num_xmm_regs; n++) { - vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1); + vinsertf64x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*32), 1); } addptr(rsp, 32*num_xmm_regs); }