--- old/src/hotspot/cpu/x86/assembler_x86.cpp 2018-09-06 16:44:39.473198000 -0700 +++ new/src/hotspot/cpu/x86/assembler_x86.cpp 2018-09-06 16:44:38.891288800 -0700 @@ -3746,6 +3746,27 @@ emit_int8(imm8); } +void Assembler::vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx512vbmi(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x8D); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::vpermw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512vlbw(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_is_evex_instruction(); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x8D); + emit_int8((unsigned char)(0xC0 | encode)); +} + void Assembler::vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx2(), ""); // VEX.NDS.256.66.0F38.W0 36 /r @@ -3765,6 +3786,14 @@ emit_operand(dst, src); } +void Assembler::evpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x36); + emit_int8((unsigned char)(0xC0 | encode)); +} + void Assembler::vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { assert(VM_Version::supports_avx2(), ""); InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); --- old/src/hotspot/cpu/x86/assembler_x86.hpp 2018-09-06 16:44:44.720764700 -0700 +++ new/src/hotspot/cpu/x86/assembler_x86.hpp 2018-09-06 16:44:44.159922900 -0700 @@ -1682,8 +1682,11 @@ // Permutations void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len); void vpermq(XMMRegister dst, XMMRegister src, int imm8); + void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpermw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src); void vpermd(XMMRegister dst, XMMRegister nds, Address src); + void evpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len); --- old/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp 2018-09-06 16:44:49.613699300 -0700 +++ new/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp 2018-09-06 16:44:49.043216800 -0700 @@ -6116,6 +6116,12 @@ 0xFFFFFFFF, 0, 0, 0); StubRoutines::x86::_vector_64_bit_mask = generate_vector_custom_i32("vector_64_bit_mask", Assembler::AVX_512bit, 0xFFFFFFFF, 0xFFFFFFFF, 0, 0); + StubRoutines::x86::_vector_int_shuffle_mask = generate_vector_fp_mask("vector_int_shuffle_mask", 0x0302010003020100); + StubRoutines::x86::_vector_int_size_mask = generate_vector_fp_mask("vector_int_size_mask", 0x0000000400000004); + StubRoutines::x86::_vector_short_shuffle_mask = generate_vector_fp_mask("vector_short_shuffle_mask", 0x0100010001000100); + StubRoutines::x86::_vector_short_size_mask = generate_vector_fp_mask("vector_short_size_mask", 0x0002000200020002); + StubRoutines::x86::_vector_long_shuffle_mask = generate_vector_fp_mask("vector_long_shuffle_mask", 0x0000000100000000); + StubRoutines::x86::_vector_long_size_mask = generate_vector_fp_mask("vector_long_size_mask", 0x0000000200000002); // support for verify_oop (must happen after universe_init) StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); --- old/src/hotspot/cpu/x86/stubRoutines_x86.hpp 2018-09-06 16:44:54.405710600 -0700 +++ new/src/hotspot/cpu/x86/stubRoutines_x86.hpp 2018-09-06 16:44:53.858471800 -0700 @@ -33,7 +33,7 @@ enum platform_dependent_constants { code_size1 = 20000 LP64_ONLY(+10000), // simply increase if too small (assembler will crash if too small) - code_size2 = 33800 LP64_ONLY(+10000) // simply increase if too small (assembler will crash if too small) + code_size2 = 35800 LP64_ONLY(+10000) // simply increase if too small (assembler will crash if too small) }; class x86 { @@ -66,6 +66,12 @@ static address _vector_int_to_short_mask; static address _vector_32_bit_mask; static address _vector_64_bit_mask; + static address _vector_int_shuffle_mask; + static address _vector_int_size_mask; + static address _vector_short_shuffle_mask; + static address _vector_short_size_mask; + static address _vector_long_shuffle_mask; + static address _vector_long_size_mask; public: @@ -160,7 +166,24 @@ static address vector_all_ones_mask() { return _vector_double_sign_flip; } - + static address vector_int_shuffle_mask() { + return _vector_int_shuffle_mask; + } + static address vector_int_size_mask() { + return _vector_int_size_mask; + } + static address vector_short_shuffle_mask() { + return _vector_short_shuffle_mask; + } + static address vector_short_size_mask() { + return _vector_short_size_mask; + } + static address vector_long_shuffle_mask() { + return _vector_long_shuffle_mask; + } + static address vector_long_size_mask() { + return _vector_long_size_mask; + } #else // !LP64 private: --- old/src/hotspot/cpu/x86/stubRoutines_x86_64.cpp 2018-09-06 16:44:59.151951700 -0700 +++ new/src/hotspot/cpu/x86/stubRoutines_x86_64.cpp 2018-09-06 16:44:58.602040600 -0700 @@ -54,3 +54,9 @@ address StubRoutines::x86::_vector_int_to_short_mask = NULL; address StubRoutines::x86::_vector_32_bit_mask = NULL; address StubRoutines::x86::_vector_64_bit_mask = NULL; +address StubRoutines::x86::_vector_int_shuffle_mask = NULL; +address StubRoutines::x86::_vector_int_size_mask = NULL; +address StubRoutines::x86::_vector_short_shuffle_mask = NULL; +address StubRoutines::x86::_vector_short_size_mask = NULL; +address StubRoutines::x86::_vector_long_shuffle_mask = NULL; +address StubRoutines::x86::_vector_long_size_mask = NULL; --- old/src/hotspot/cpu/x86/vmStructs_x86.hpp 2018-09-06 16:45:03.919694400 -0700 +++ new/src/hotspot/cpu/x86/vmStructs_x86.hpp 2018-09-06 16:45:03.343506400 -0700 @@ -75,6 +75,7 @@ declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL) \ declare_preprocessor_constant("VM_Version::CPU_SHA", CPU_SHA) \ declare_preprocessor_constant("VM_Version::CPU_FMA", CPU_FMA) \ - declare_preprocessor_constant("VM_Version::CPU_VZEROUPPER", CPU_VZEROUPPER) + declare_preprocessor_constant("VM_Version::CPU_VZEROUPPER", CPU_VZEROUPPER) \ + declare_preprocessor_constant("VM_Version::CPU_AVX512VBMI", CPU_AVX512VBMI) #endif // CPU_X86_VM_VMSTRUCTS_X86_HPP --- old/src/hotspot/cpu/x86/vm_version_x86.cpp 2018-09-06 16:45:08.861128300 -0700 +++ new/src/hotspot/cpu/x86/vm_version_x86.cpp 2018-09-06 16:45:08.340761700 -0700 @@ -667,6 +667,7 @@ _features &= ~CPU_AVX512_VPOPCNTDQ; _features &= ~CPU_VPCLMULQDQ; _features &= ~CPU_VAES; + _features &= ~CPU_AVX512VBMI; } if (UseAVX < 2) --- old/src/hotspot/cpu/x86/vm_version_x86.hpp 2018-09-06 16:45:13.558726900 -0700 +++ new/src/hotspot/cpu/x86/vm_version_x86.hpp 2018-09-06 16:45:13.033767700 -0700 @@ -336,6 +336,7 @@ #define CPU_AVX512_VPOPCNTDQ ((uint64_t)UCONST64(0x2000000000)) // Vector popcount #define CPU_VPCLMULQDQ ((uint64_t)UCONST64(0x4000000000)) //Vector carryless multiplication #define CPU_VAES ((uint64_t)UCONST64(0x8000000000)) // Vector AES instructions +#define CPU_AVX512VBMI ((uint64_t)UCONST64(0x10000000000)) // Vector BMI instructions enum Extended_Family { // AMD @@ -548,6 +549,8 @@ result |= CPU_VPCLMULQDQ; if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0) result |= CPU_VAES; + if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0) + result |= CPU_AVX512VBMI; } } if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0) @@ -816,6 +819,7 @@ static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; } static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; } static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; } + static bool supports_avx512vbmi() { return (_features & CPU_AVX512VBMI) != 0; } static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); } static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); } static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); } --- old/src/hotspot/cpu/x86/x86.ad 2018-09-06 16:45:18.440178500 -0700 +++ new/src/hotspot/cpu/x86/x86.ad 2018-09-06 16:45:17.906105500 -0700 @@ -1349,6 +1349,12 @@ static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } static address vector_all_ones_mask() { return StubRoutines::x86::vector_all_ones_mask(); } + static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } + static address vector_int_sizemask() { return StubRoutines::x86::vector_int_size_mask(); } + static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } + static address vector_short_sizemask() { return StubRoutines::x86::vector_short_size_mask(); } + static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } + static address vector_long_sizemask() { return StubRoutines::x86::vector_long_size_mask(); } #else static address float_signmask() { return (address)float_signmask_pool; } static address float_signflip() { return (address)float_signflip_pool; } @@ -1528,6 +1534,13 @@ else if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation else if (size_in_bits >= 256 && UseAVX < 2) { ret_value = false; } // Implementation limitation break; + case Op_VectorLoadShuffle: + case Op_VectorRearrange: + if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation due to how shuffle is loaded + else if (size_in_bits >= 256 && UseAVX < 2) { ret_value = false; } // Implementation limitation + else if (bt == T_BYTE && size_in_bits >= 256 && !VM_Version::supports_avx512vbmi()) { ret_value = false; } // Implementation limitation + else if (bt == T_SHORT && size_in_bits >= 256 && !VM_Version::supports_avx512vlbw()) { ret_value = false; } // Implementation limitation + break; case Op_VectorStoreMask: if (UseAVX < 0) { ret_value = false; } // Implementation limitation else if ((size_in_bits >= 256 || bt == T_LONG || bt == T_DOUBLE) && UseAVX < 2) { ret_value = false; } // Implementation limitation @@ -23535,6 +23548,416 @@ ins_pipe( pipe_slow ); %} +//-------------------------------- LOAD_SHUFFLE ---------------------------------- + +instruct loadshuffle8b(vecD dst, vecD src) %{ + predicate(UseSSE > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadShuffle src)); + format %{ "movdqu $dst, $src\t! load shuffle (load 8B for 8BRearrange)" %} + ins_encode %{ + __ movdqu($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadshuffle16b(vecX dst, vecX src) %{ + predicate(UseSSE > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadShuffle src)); + format %{ "movdqu $dst, $src\t! load shuffle (load 16B for 16BRearrange)" %} + ins_encode %{ + __ movdqu($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadshuffle32b(vecY dst, vecY src) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadShuffle src)); + format %{ "vmovdqu $dst, $src\t! load shuffle (load 32B for 32BRearrange)" %} + ins_encode %{ + __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadshuffle64b(vecZ dst, vecZ src) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadShuffle src)); + format %{ "vmovdqu $dst, $src\t! load shuffle (load 64B for 64BRearrange)" %} + ins_encode %{ + __ evmovdqul($dst$$XMMRegister, $src$$XMMRegister, 2); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadshuffle4s(vecD dst, vecS src, vecD tmp, vecD tmp2, rRegI scratch) %{ + predicate(UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorLoadShuffle src)); + effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP scratch); + format %{ "pmovsxbw $tmp, $src \n\t" + "movdqu $tmp2,0x0002000200020002\n\t" + "pmullw $tmp,$tmp2\n\t" + "movdqu $tmp2,$tmp\n\t" + "psllw $tmp2,0x8\n\t" + "paddb $tmp2,$tmp\n\t" + "movdqu $tmp, 0x0100010001000100 \n\t" + "paddb $tmp2,$tmp\n\t" + "movdqu $dst, $tmp2\t! load shuffle (load 4B for 4SRearrange)" %} + ins_encode %{ + __ pmovsxbw($tmp$$XMMRegister, $src$$XMMRegister); + __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_sizemask()), $scratch$$Register); + __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ movdqu($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ psllw($tmp2$$XMMRegister, 0x8); + __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), $scratch$$Register); + __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ movdqu($dst$$XMMRegister, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadshuffle8s(vecX dst, vecD src, vecX tmp, vecX tmp2, rRegI scratch) %{ + predicate(UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorLoadShuffle src)); + effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP scratch); + format %{ "pmovsxbw $tmp, $src \n\t" + "movdqu $tmp2,0x0002000200020002\n\t" + "pmullw $tmp,$tmp2\n\t" + "movdqu $tmp2,$tmp\n\t" + "psllw $tmp2,0x8\n\t" + "paddb $tmp2,$tmp\n\t" + "movdqu $tmp, 0x0100010001000100 \n\t" + "paddb $tmp2,$tmp\n\t" + "movdqu $dst, $tmp2\t! load shuffle (load 8B for 8SRearrange)" %} + ins_encode %{ + __ pmovsxbw($tmp$$XMMRegister, $src$$XMMRegister); + __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_sizemask()), $scratch$$Register); + __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ movdqu($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ psllw($tmp2$$XMMRegister, 0x8); + __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), $scratch$$Register); + __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ movdqu($dst$$XMMRegister, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadshuffle16s(vecY dst, vecX src) %{ + predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorLoadShuffle src)); + format %{ "vpmovsxbw $dst,$src\t! load shuffle (load 16B for 16SRearrange)" %} + ins_encode %{ + int vector_len = 1; + __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadshuffle32s(vecZ dst, vecY src) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorLoadShuffle src)); + format %{ "vpmovsxbw $dst,$src\t! load shuffle (load 32B for 32SRearrange)" %} + ins_encode %{ + int vector_len = 2; + __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadshuffle4i(vecX dst, vecS src, vecX tmp, vecX tmp2, rRegI scratch) %{ + predicate(UseSSE > 3 && n->as_Vector()->length() == 4 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (VectorLoadShuffle src)); + effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP scratch); + format %{ "vpmovsxbd $tmp, $src \n\t" + "movdqu $tmp2, 0x0000000400000004 \n\t" + "pmulld $tmp2, $tmp \n\t" + "movdqu $tmp,$tmp2\n\t" + "pslld $tmp2,0x8\n\t" + "paddb $tmp2,$tmp\n\t" + "pslld $tmp2,0x8\n\t" + "paddb $tmp2,$tmp\n\t" + "pslld $tmp2,0x8\n\t" + "paddb $tmp2,$tmp\n\t" + "movdqu $tmp, 0x0302010003020100 \n\t" + "paddb $tmp2,$tmp\n\t" + "movdqu $dst, $tmp2\t! load shuffle (load 4B for 4IRearrange)" %} + ins_encode %{ + __ vpmovsxbd($tmp$$XMMRegister, $src$$XMMRegister, 0); + __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_int_sizemask()), $scratch$$Register); + __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ movdqu($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ pslld($tmp2$$XMMRegister, 0x8); + __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pslld($tmp2$$XMMRegister, 0x8); + __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pslld($tmp2$$XMMRegister, 0x8); + __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_int_shufflemask()), $scratch$$Register); + __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ movdqu($dst$$XMMRegister, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadshuffle8i(vecY dst, vecD src) %{ + predicate(UseAVX >= 1 && n->as_Vector()->length() == 8 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (VectorLoadShuffle src)); + format %{ "vpmovsxbd $dst, $src\t! load shuffle (load 8B for 8IRearrange)" %} + ins_encode %{ + int vector_len = 1; + __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadshuffle16i(vecZ dst, vecX src) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (VectorLoadShuffle src)); + format %{ "vpmovsxbd $dst, $src\t! load shuffle (load 16B for 16IRearrange)" %} + ins_encode %{ + int vector_len = 2; + __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadshuffle4l(vecY dst, vecS src, vecY tmp, vecY tmp2, rRegI scratch) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (VectorLoadShuffle src)); + effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP scratch); + format %{ "vpmovsxbd $tmp2, $src \n\t" + "movdqu $tmp, 0x0000000200000002 \n\t" + "pmulld $tmp, $tmp2 \n\t" + "vpmovsxdq $tmp2,$tmp\n\t" + "vpsllq $tmp2,0x20\n\t" + "vpaddd $tmp2,$tmp\n\t" + "vmovdqu $tmp, 0x0000000100000000 \n\t" + "vpaddd $tmp2,$tmp\n\t" + "vmovdqu $dst, $tmp2\t! load shuffle (load 4L for 4LRearrange)" %} + ins_encode %{ + int vector_len = 1; + __ vpmovsxbd($tmp2$$XMMRegister, $src$$XMMRegister, 0); + __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sizemask()), $scratch$$Register); + __ pmulld($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ vpmovsxdq($tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vpsllq($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x20, vector_len); + __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_shufflemask()), $scratch$$Register); + __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vmovdqu($dst$$XMMRegister, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadshuffle8l(vecZ dst, vecD src, vecZ tmp, vecZ tmp2, rRegI scratch) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (VectorLoadShuffle src)); + effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP scratch); + format %{ "vpmovsxbd $tmp2, $src \n\t" + "movdqu $tmp, 0x0000000200000002 \n\t" + "pmulld $tmp, $tmp2\n\t" + "vpmovsxdq $tmp2,$tmp\n\t" + "vpsllq $tmp2,0x20\n\t" + "vpaddd $tmp2,$tmp\n\t" + "vmovdqu $tmp, 0x0000000100000000 \n\t" + "vpaddd $tmp2,$tmp\n\t" + "vmovdqu $dst, $tmp2\t! load shuffle (load 8L for 8LRearrange)" %} + ins_encode %{ + int vector_len = 2; + __ vpmovsxbd($tmp2$$XMMRegister, $src$$XMMRegister, 1); + __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sizemask()), $scratch$$Register); + __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 1); + __ vpmovsxdq($tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vpsllq($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x20, vector_len); + __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ evmovdqul($tmp$$XMMRegister, k1, ExternalAddress(vector_long_shufflemask()), false, vector_len, $scratch$$Register); + __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ evmovdqul($dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} +//-------------------------------- Rearrange ------------------------------------- + +instruct rearrange8b(vecD dst, vecD shuffle) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 8 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorRearrange dst shuffle)); + effect(TEMP dst); + format %{ "pshufb $dst, $shuffle\t! rerrrange (8BRearrange)" %} + ins_encode %{ + __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rearrange16b(vecX dst, vecX shuffle) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 16 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorRearrange dst shuffle)); + effect(TEMP dst); + format %{ "pshufb $dst, $shuffle\t! rearrange (16BRearrange)" %} + ins_encode %{ + __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rearrange32b(vecY dst, vecY src, vecY shuffle) %{ + predicate(UseAVX > 2 && VM_Version::supports_avx512vbmi() && n->as_Vector()->length() == 32 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorRearrange src shuffle)); + effect(TEMP dst); + format %{ "vpermb $dst, $shuffle\t! rearrange (32BRearrange)" %} + ins_encode %{ + int vector_len = 1; + __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct rearrange64b(vecZ dst, vecZ src, vecZ shuffle) %{ + predicate(UseAVX > 2 && VM_Version::supports_avx512vbmi() && n->as_Vector()->length() == 64 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorRearrange src shuffle)); + effect(TEMP dst); + format %{ "vpermb $dst, $shuffle\t! rearrange (64BRearrange)" %} + ins_encode %{ + int vector_len = 2; + __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct rearrange4s(vecD dst, vecD shuffle) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 4 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorRearrange dst shuffle)); + effect(TEMP dst); + format %{ "pshufb $dst, $shuffle\t! rerrrange (4SRearrange)" %} + ins_encode %{ + __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rearrange8s(vecX dst, vecX shuffle) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 8 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorRearrange dst shuffle)); + effect(TEMP dst); + format %{ "pshufb $dst, $shuffle\t! rearrange (8SRearrange)" %} + ins_encode %{ + __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rearrange16s(vecY dst, vecY src, vecY shuffle) %{ + predicate(UseAVX > 2 && VM_Version::supports_avx512vlbw() && n->as_Vector()->length() == 16 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorRearrange src shuffle)); + effect(TEMP dst); + format %{ "vpermw $dst, $shuffle\t! rearrange (16SRearrange)" %} + ins_encode %{ + int vector_len = 1; + __ vpermw($dst$$XMMRegister, k0, $shuffle$$XMMRegister, $src$$XMMRegister, false,vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct rearrange32s(vecZ dst, vecZ src, vecZ shuffle) %{ + predicate(UseAVX > 2 && VM_Version::supports_avx512vlbw() && n->as_Vector()->length() == 32 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorRearrange src shuffle)); + effect(TEMP dst); + format %{ "vpermw $dst, $shuffle\t! rearrange (32SRearrange)" %} + ins_encode %{ + int vector_len = 2; + __ __ vpermw($dst$$XMMRegister, k0, $shuffle$$XMMRegister, $src$$XMMRegister, false,vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct rearrange4i(vecX dst, vecX shuffle) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 4 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (VectorRearrange dst shuffle)); + effect(TEMP dst); + format %{ "pshufb $dst, $shuffle\t! rearrange (4IRearrange)" %} + ins_encode %{ + __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rearrange8i(vecY dst, vecY src, vecY shuffle) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (VectorRearrange src shuffle)); + effect(TEMP dst); + format %{ "vpermd $dst, $src, $shuffle\t! rearrange (8IRearrange)" %} + ins_encode %{ + int vector_len = 1; + __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rearrange16i(vecZ dst, vecZ src, vecZ shuffle) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (VectorRearrange src shuffle)); + effect(TEMP dst); + format %{ "vpermd $dst, $src, $shuffle\t! rearrange (16IRearrange)" %} + ins_encode %{ + int vector_len = 2; + __ evpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct rearrange4l(vecY dst, vecY src, vecY shuffle) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (VectorRearrange src shuffle)); + effect(TEMP dst); + format %{ "vpermd $dst, $src, $shuffle\t! rearrange (4LRearrange)" %} + ins_encode %{ + int vector_len = 1; + __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rearrange8l(vecZ dst, vecZ src, vecZ shuffle) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (VectorRearrange src shuffle)); + effect(TEMP dst); + format %{ "vpermd $dst, $src, $shuffle\t! rearrange (8LRearrange)" %} + ins_encode %{ + int vector_len = 2; + __ evpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} // --------------------------------- FMA -------------------------------------- // a * b + c --- old/src/hotspot/share/adlc/formssel.cpp 2018-09-06 16:45:24.667335300 -0700 +++ new/src/hotspot/share/adlc/formssel.cpp 2018-09-06 16:45:23.975960000 -0700 @@ -4193,6 +4193,7 @@ "ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD", "LoadVector","StoreVector", "VectorLoadMask", "VectorStoreMask", "VectorBlend", "VectorInsert", + "VectorRearrange","VectorLoadShuffle", "VectorCastB2X", "VectorCastS2X", "VectorCastI2X", "VectorCastL2X", "VectorCastF2X", "VectorCastD2X", "VectorMaskWrapper", "VectorMaskCmp", "VectorReinterpret", --- old/src/hotspot/share/ci/ciType.cpp 2018-09-06 16:45:29.617867300 -0700 +++ new/src/hotspot/share/ci/ciType.cpp 2018-09-06 16:45:29.083975400 -0700 @@ -139,6 +139,9 @@ static bool is_float64mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Float64Vector_Float64Mask); } +static bool is_float64shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Float64Vector_Float64Shuffle); +} static bool is_float64(BasicType bt, vmSymbols::SID sid) { return is_float64vector(bt, sid) || is_float64species(bt, sid) || is_float64mask(bt, sid); } @@ -151,6 +154,9 @@ static bool is_float128mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Float128Vector_Float128Mask); } +static bool is_float128shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Float128Vector_Float128Shuffle); +} static bool is_float128(BasicType bt, vmSymbols::SID sid) { return is_float128vector(bt, sid) || is_float128species(bt, sid) || is_float128mask(bt, sid); } @@ -163,6 +169,9 @@ static bool is_float256mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Float256Vector_Float256Mask); } +static bool is_float256shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Float256Vector_Float256Shuffle); +} static bool is_float256(BasicType bt, vmSymbols::SID sid) { return is_float256vector(bt, sid) || is_float256species(bt, sid) || is_float256mask(bt, sid); } @@ -175,6 +184,9 @@ static bool is_float512mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Float512Vector_Float512Mask); } +static bool is_float512shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Float512Vector_Float512Shuffle); +} static bool is_float512(BasicType bt, vmSymbols::SID sid) { return is_float512vector(bt, sid) || is_float512species(bt, sid) || is_float512mask(bt, sid); } @@ -190,6 +202,9 @@ static bool is_double64mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Double64Vector_Double64Mask); } +static bool is_double64shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Double64Vector_Double64Shuffle); +} static bool is_double64(BasicType bt, vmSymbols::SID sid) { return is_double64vector(bt, sid) || is_double64species(bt, sid) || is_double64mask(bt, sid); } @@ -202,6 +217,9 @@ static bool is_double128mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Double128Vector_Double128Mask); } +static bool is_double128shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Double128Vector_Double128Shuffle); +} static bool is_double128(BasicType bt, vmSymbols::SID sid) { return is_double128vector(bt, sid) || is_double128species(bt, sid) || is_double128mask(bt, sid); } @@ -214,6 +232,9 @@ static bool is_double256mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Double256Vector_Double256Mask); } +static bool is_double256shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Double256Vector_Double256Shuffle); +} static bool is_double256(BasicType bt, vmSymbols::SID sid) { return is_double256vector(bt, sid) || is_double256species(bt, sid) || is_double256mask(bt, sid); } @@ -226,6 +247,9 @@ static bool is_double512mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Double512Vector_Double512Mask); } +static bool is_double512shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Double512Vector_Double512Shuffle); +} static bool is_double512(BasicType bt, vmSymbols::SID sid) { return is_double512vector(bt, sid) || is_double512species(bt, sid) || is_double512mask(bt, sid); } @@ -241,6 +265,9 @@ static bool is_int64mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Int64Vector_Int64Mask); } +static bool is_int64shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Int64Vector_Int64Shuffle); +} static bool is_int64(BasicType bt, vmSymbols::SID sid) { return is_int64vector(bt, sid) || is_int64species(bt, sid) || is_int64mask(bt, sid); } @@ -253,6 +280,9 @@ static bool is_int128mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Int128Vector_Int128Mask); } +static bool is_int128shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Int128Vector_Int128Shuffle); +} static bool is_int128(BasicType bt, vmSymbols::SID sid) { return is_int128vector(bt, sid) || is_int128species(bt, sid) || is_int128mask(bt, sid); } @@ -265,6 +295,9 @@ static bool is_int256mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Int256Vector_Int256Mask); } +static bool is_int256shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Int256Vector_Int256Shuffle); +} static bool is_int256(BasicType bt, vmSymbols::SID sid) { return is_int256vector(bt, sid) || is_int256species(bt, sid) || is_int256mask(bt, sid); } @@ -277,6 +310,9 @@ static bool is_int512mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Int512Vector_Int512Mask); } +static bool is_int512shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Int512Vector_Int512Shuffle); +} static bool is_int512(BasicType bt, vmSymbols::SID sid) { return is_int512vector(bt, sid) || is_int512species(bt, sid) || is_int512mask(bt, sid); } @@ -292,6 +328,9 @@ static bool is_long64mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Long64Vector_Long64Mask); } +static bool is_long64shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Long64Vector_Long64Shuffle); +} static bool is_long64(BasicType bt, vmSymbols::SID sid) { return is_long64vector(bt, sid) || is_long64species(bt, sid) || is_long64mask(bt, sid); } @@ -304,6 +343,9 @@ static bool is_long128mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Long128Vector_Long128Mask); } +static bool is_long128shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Long128Vector_Long128Shuffle); +} static bool is_long128(BasicType bt, vmSymbols::SID sid) { return is_long128vector(bt, sid) || is_long128species(bt, sid) || is_long128mask(bt, sid); } @@ -316,6 +358,9 @@ static bool is_long256mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Long256Vector_Long256Mask); } +static bool is_long256shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Long256Vector_Long256Shuffle); +} static bool is_long256(BasicType bt, vmSymbols::SID sid) { return is_long256vector(bt, sid) || is_long256species(bt, sid) || is_long256mask(bt, sid); } @@ -328,6 +373,9 @@ static bool is_long512mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Long512Vector_Long512Mask); } +static bool is_long512shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Long512Vector_Long512Shuffle); +} static bool is_long512(BasicType bt, vmSymbols::SID sid) { return is_long512vector(bt, sid) || is_long512species(bt, sid) || is_long512mask(bt, sid); } @@ -343,6 +391,9 @@ static bool is_byte64mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Byte64Vector_Byte64Mask); } +static bool is_byte64shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Byte64Vector_Byte64Shuffle); +} static bool is_byte64(BasicType bt, vmSymbols::SID sid) { return is_byte64vector(bt, sid) || is_byte64species(bt, sid) || is_byte64mask(bt, sid); } @@ -355,6 +406,9 @@ static bool is_byte128mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Byte128Vector_Byte128Mask); } +static bool is_byte128shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Byte128Vector_Byte128Shuffle); +} static bool is_byte128(BasicType bt, vmSymbols::SID sid) { return is_byte128vector(bt, sid) || is_byte128species(bt, sid) || is_byte128mask(bt, sid); } @@ -367,6 +421,9 @@ static bool is_byte256mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Byte256Vector_Byte256Mask); } +static bool is_byte256shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Byte256Vector_Byte256Shuffle); +} static bool is_byte256(BasicType bt, vmSymbols::SID sid) { return is_byte256vector(bt, sid) || is_byte256species(bt, sid) || is_byte256mask(bt, sid); } @@ -379,6 +436,9 @@ static bool is_byte512mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Byte512Vector_Byte512Mask); } +static bool is_byte512shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Byte512Vector_Byte512Shuffle); +} static bool is_byte512(BasicType bt, vmSymbols::SID sid) { return is_byte512vector(bt, sid) || is_byte512species(bt, sid) || is_byte512mask(bt, sid); } @@ -394,6 +454,9 @@ static bool is_short64mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Short64Vector_Short64Mask); } +static bool is_short64shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Short64Vector_Short64Shuffle); +} static bool is_short64(BasicType bt, vmSymbols::SID sid) { return is_short64vector(bt, sid) || is_short64species(bt, sid) || is_short64mask(bt, sid); } @@ -406,6 +469,9 @@ static bool is_short128mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Short128Vector_Short128Mask); } +static bool is_short128shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Short128Vector_Short128Shuffle); +} static bool is_short128(BasicType bt, vmSymbols::SID sid) { return is_short128vector(bt, sid) || is_short128species(bt, sid) || is_short128mask(bt, sid); } @@ -418,6 +484,9 @@ static bool is_short256mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Short256Vector_Short256Mask); } +static bool is_short256shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Short256Vector_Short256Shuffle); +} static bool is_short256(BasicType bt, vmSymbols::SID sid) { return is_short256vector(bt, sid) || is_short256species(bt, sid) || is_short256mask(bt, sid); } @@ -430,6 +499,9 @@ static bool is_short512mask(BasicType bt, vmSymbols::SID sid) { return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Short512Vector_Short512Mask); } +static bool is_short512shuffle(BasicType bt, vmSymbols::SID sid) { + return bt == T_OBJECT && sid == vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_incubator_vector_Short512Vector_Short512Shuffle); +} static bool is_short512(BasicType bt, vmSymbols::SID sid) { return is_short512vector(bt, sid) || is_short512species(bt, sid) || is_short512mask(bt, sid); } @@ -449,6 +521,16 @@ is_short64mask(__) || is_short128mask(__) || is_short256mask(__) || is_short512mask(__)); } +bool ciType::is_vectorshuffle() { + return basic_type() == T_OBJECT && + (is_float64shuffle(__) || is_float128shuffle(__) || is_float256shuffle(__) || is_float512shuffle(__) || + is_double64shuffle(__) || is_double128shuffle(__) || is_double256shuffle(__) || is_double512shuffle(__) || + is_int64shuffle(__) || is_int128shuffle(__) || is_int256shuffle(__) || is_int512shuffle(__) || + is_long64shuffle(__) || is_long128shuffle(__) || is_long256shuffle(__) || is_long512shuffle(__) || + is_byte64shuffle(__) || is_byte128shuffle(__) || is_byte256shuffle(__) || is_byte512shuffle(__) || + is_short64shuffle(__) || is_short128shuffle(__) || is_short256shuffle(__) || is_short512shuffle(__)); +} + bool ciType::is_vectorapi_vector() { return is_float_vec_or_mask(__) || is_double_vec_or_mask(__) || --- old/src/hotspot/share/ci/ciType.hpp 2018-09-06 16:45:34.201300800 -0700 +++ new/src/hotspot/share/ci/ciType.hpp 2018-09-06 16:45:33.671986000 -0700 @@ -74,6 +74,7 @@ bool is_two_word() const { return size() == 2; } bool is_vectormask(); + bool is_vectorshuffle(); bool is_vectorapi_vector(); int vectorapi_vector_size(); BasicType vectorapi_vector_bt(); --- old/src/hotspot/share/classfile/vmSymbols.hpp 2018-09-06 16:45:39.060530300 -0700 +++ new/src/hotspot/share/classfile/vmSymbols.hpp 2018-09-06 16:45:38.523877100 -0700 @@ -91,85 +91,109 @@ template(jdk_incubator_vector_Float64Vector, "jdk/incubator/vector/Float64Vector") \ template(jdk_incubator_vector_Float64Vector_Float64Species,"jdk/incubator/vector/Float64Vector$Float64Species") \ template(jdk_incubator_vector_Float64Vector_Float64Mask,"jdk/incubator/vector/Float64Vector$Float64Mask") \ + template(jdk_incubator_vector_Float64Vector_Float64Shuffle,"jdk/incubator/vector/Float64Vector$Float64Shuffle") \ template(jdk_incubator_vector_Float128Vector, "jdk/incubator/vector/Float128Vector") \ template(jdk_incubator_vector_Float128Vector_Float128Species,"jdk/incubator/vector/Float128Vector$Float128Species") \ template(jdk_incubator_vector_Float128Vector_Float128Mask,"jdk/incubator/vector/Float128Vector$Float128Mask") \ + template(jdk_incubator_vector_Float128Vector_Float128Shuffle,"jdk/incubator/vector/Float128Vector$Float128Shuffle") \ template(jdk_incubator_vector_Float256Vector, "jdk/incubator/vector/Float256Vector") \ template(jdk_incubator_vector_Float256Vector_Float256Species,"jdk/incubator/vector/Float256Vector$Float256Species") \ template(jdk_incubator_vector_Float256Vector_Float256Mask,"jdk/incubator/vector/Float256Vector$Float256Mask") \ + template(jdk_incubator_vector_Float256Vector_Float256Shuffle,"jdk/incubator/vector/Float256Vector$Float256Shuffle") \ template(jdk_incubator_vector_Float512Vector, "jdk/incubator/vector/Float512Vector") \ template(jdk_incubator_vector_Float512Vector_Float512Species,"jdk/incubator/vector/Float512Vector$Float512Species") \ template(jdk_incubator_vector_Float512Vector_Float512Mask,"jdk/incubator/vector/Float512Vector$Float512Mask") \ + template(jdk_incubator_vector_Float512Vector_Float512Shuffle,"jdk/incubator/vector/Float512Vector$Float512Shuffle") \ template(jdk_incubator_vector_DoubleVector, "jdk/incubator/vector/DoubleVector") \ template(jdk_incubator_vector_DoubleVector_DoubleSpecies,"jdk/incubator/vector/DoubleVector$DoubleSpecies") \ template(jdk_incubator_vector_Double64Vector, "jdk/incubator/vector/Double64Vector") \ template(jdk_incubator_vector_Double64Vector_Double64Species,"jdk/incubator/vector/Double64Vector$Double64Species") \ template(jdk_incubator_vector_Double64Vector_Double64Mask,"jdk/incubator/vector/Double64Vector$Double64Mask") \ + template(jdk_incubator_vector_Double64Vector_Double64Shuffle,"jdk/incubator/vector/Double64Vector$Double64Shuffle") \ template(jdk_incubator_vector_Double128Vector, "jdk/incubator/vector/Double128Vector") \ template(jdk_incubator_vector_Double128Vector_Double128Species,"jdk/incubator/vector/Double128Vector$Double128Species") \ template(jdk_incubator_vector_Double128Vector_Double128Mask,"jdk/incubator/vector/Double128Vector$Double128Mask") \ + template(jdk_incubator_vector_Double128Vector_Double128Shuffle,"jdk/incubator/vector/Double128Vector$Double128Shuffle") \ template(jdk_incubator_vector_Double256Vector, "jdk/incubator/vector/Double256Vector") \ template(jdk_incubator_vector_Double256Vector_Double256Species,"jdk/incubator/vector/Double256Vector$Double256Species") \ template(jdk_incubator_vector_Double256Vector_Double256Mask,"jdk/incubator/vector/Double256Vector$Double256Mask") \ + template(jdk_incubator_vector_Double256Vector_Double256Shuffle,"jdk/incubator/vector/Double256Vector$Double256Shuffle") \ template(jdk_incubator_vector_Double512Vector, "jdk/incubator/vector/Double512Vector") \ template(jdk_incubator_vector_Double512Vector_Double512Species,"jdk/incubator/vector/Double512Vector$Double512Species") \ template(jdk_incubator_vector_Double512Vector_Double512Mask,"jdk/incubator/vector/Double512Vector$Double512Mask") \ + template(jdk_incubator_vector_Double512Vector_Double512Shuffle,"jdk/incubator/vector/Double512Vector$Double512Shuffle") \ template(jdk_incubator_vector_LongVector, "jdk/incubator/vector/LongVector") \ template(jdk_incubator_vector_LongVector_LongSpecies, "jdk/incubator/vector/LongVector$LongSpecies") \ template(jdk_incubator_vector_Long64Vector, "jdk/incubator/vector/Long64Vector") \ template(jdk_incubator_vector_Long64Vector_Long64Species,"jdk/incubator/vector/Long64Vector$Long64Species") \ template(jdk_incubator_vector_Long64Vector_Long64Mask,"jdk/incubator/vector/Long64Vector$Long64Mask") \ + template(jdk_incubator_vector_Long64Vector_Long64Shuffle,"jdk/incubator/vector/Long64Vector$Long64Shuffle") \ template(jdk_incubator_vector_Long128Vector, "jdk/incubator/vector/Long128Vector") \ template(jdk_incubator_vector_Long128Vector_Long128Species,"jdk/incubator/vector/Long128Vector$Long128Species") \ template(jdk_incubator_vector_Long128Vector_Long128Mask,"jdk/incubator/vector/Long128Vector$Long128Mask") \ + template(jdk_incubator_vector_Long128Vector_Long128Shuffle,"jdk/incubator/vector/Long128Vector$Long128Shuffle") \ template(jdk_incubator_vector_Long256Vector, "jdk/incubator/vector/Long256Vector") \ template(jdk_incubator_vector_Long256Vector_Long256Species,"jdk/incubator/vector/Long256Vector$Long256Species") \ template(jdk_incubator_vector_Long256Vector_Long256Mask,"jdk/incubator/vector/Long256Vector$Long256Mask") \ + template(jdk_incubator_vector_Long256Vector_Long256Shuffle,"jdk/incubator/vector/Long256Vector$Long256Shuffle") \ template(jdk_incubator_vector_Long512Vector, "jdk/incubator/vector/Long512Vector") \ template(jdk_incubator_vector_Long512Vector_Long512Species,"jdk/incubator/vector/Long512Vector$Long512Species") \ template(jdk_incubator_vector_Long512Vector_Long512Mask,"jdk/incubator/vector/Long512Vector$Long512Mask") \ + template(jdk_incubator_vector_Long512Vector_Long512Shuffle,"jdk/incubator/vector/Long512Vector$Long512Shuffle") \ template(jdk_incubator_vector_IntVector, "jdk/incubator/vector/IntVector") \ template(jdk_incubator_vector_IntVector_IntSpecies, "jdk/incubator/vector/IntVector$IntSpecies") \ template(jdk_incubator_vector_Int64Vector, "jdk/incubator/vector/Int64Vector") \ template(jdk_incubator_vector_Int64Vector_Int64Species,"jdk/incubator/vector/Int64Vector$Int64Species") \ template(jdk_incubator_vector_Int64Vector_Int64Mask,"jdk/incubator/vector/Int64Vector$Int64Mask") \ + template(jdk_incubator_vector_Int64Vector_Int64Shuffle,"jdk/incubator/vector/Int64Vector$Int64Shuffle") \ template(jdk_incubator_vector_Int128Vector, "jdk/incubator/vector/Int128Vector") \ template(jdk_incubator_vector_Int128Vector_Int128Species,"jdk/incubator/vector/Int128Vector$Int128Species") \ template(jdk_incubator_vector_Int128Vector_Int128Mask,"jdk/incubator/vector/Int128Vector$Int128Mask") \ + template(jdk_incubator_vector_Int128Vector_Int128Shuffle,"jdk/incubator/vector/Int128Vector$Int128Shuffle") \ template(jdk_incubator_vector_Int256Vector, "jdk/incubator/vector/Int256Vector") \ template(jdk_incubator_vector_Int256Vector_Int256Species,"jdk/incubator/vector/Int256Vector$Int256Species") \ template(jdk_incubator_vector_Int256Vector_Int256Mask,"jdk/incubator/vector/Int256Vector$Int256Mask") \ + template(jdk_incubator_vector_Int256Vector_Int256Shuffle,"jdk/incubator/vector/Int256Vector$Int256Shuffle") \ template(jdk_incubator_vector_Int512Vector, "jdk/incubator/vector/Int512Vector") \ template(jdk_incubator_vector_Int512Vector_Int512Species,"jdk/incubator/vector/Int512Vector$Int512Species") \ template(jdk_incubator_vector_Int512Vector_Int512Mask,"jdk/incubator/vector/Int512Vector$Int512Mask") \ + template(jdk_incubator_vector_Int512Vector_Int512Shuffle,"jdk/incubator/vector/Int512Vector$Int512Shuffle") \ template(jdk_incubator_vector_ByteVector, "jdk/incubator/vector/ByteVector") \ template(jdk_incubator_vector_ByteVector_ByteSpecies, "jdk/incubator/vector/ByteVector$ByteSpecies") \ template(jdk_incubator_vector_Byte64Vector, "jdk/incubator/vector/Byte64Vector") \ template(jdk_incubator_vector_Byte64Vector_Byte64Species,"jdk/incubator/vector/Byte64Vector$Byte64Species") \ template(jdk_incubator_vector_Byte64Vector_Byte64Mask,"jdk/incubator/vector/Byte64Vector$Byte64Mask") \ + template(jdk_incubator_vector_Byte64Vector_Byte64Shuffle,"jdk/incubator/vector/Byte64Vector$Byte64Shuffle") \ template(jdk_incubator_vector_Byte128Vector, "jdk/incubator/vector/Byte128Vector") \ template(jdk_incubator_vector_Byte128Vector_Byte128Species,"jdk/incubator/vector/Byte128Vector$Byte128Species") \ template(jdk_incubator_vector_Byte128Vector_Byte128Mask,"jdk/incubator/vector/Byte128Vector$Byte128Mask") \ + template(jdk_incubator_vector_Byte128Vector_Byte128Shuffle,"jdk/incubator/vector/Byte128Vector$Byte128Shuffle") \ template(jdk_incubator_vector_Byte256Vector, "jdk/incubator/vector/Byte256Vector") \ template(jdk_incubator_vector_Byte256Vector_Byte256Species,"jdk/incubator/vector/Byte256Vector$Byte256Species") \ template(jdk_incubator_vector_Byte256Vector_Byte256Mask,"jdk/incubator/vector/Byte256Vector$Byte256Mask") \ + template(jdk_incubator_vector_Byte256Vector_Byte256Shuffle,"jdk/incubator/vector/Byte256Vector$Byte256Shuffle") \ template(jdk_incubator_vector_Byte512Vector, "jdk/incubator/vector/Byte512Vector") \ template(jdk_incubator_vector_Byte512Vector_Byte512Species,"jdk/incubator/vector/Byte512Vector$Byte512Species") \ template(jdk_incubator_vector_Byte512Vector_Byte512Mask,"jdk/incubator/vector/Byte512Vector$Byte512Mask") \ + template(jdk_incubator_vector_Byte512Vector_Byte512Shuffle,"jdk/incubator/vector/Byte512Vector$Byte512Shuffle") \ template(jdk_incubator_vector_ShortVector, "jdk/incubator/vector/ShortVector") \ template(jdk_incubator_vector_ShortVector_ShortSpecies, "jdk/incubator/vector/ShortVector$ShortSpecies") \ template(jdk_incubator_vector_Short64Vector, "jdk/incubator/vector/Short64Vector") \ template(jdk_incubator_vector_Short64Vector_Short64Species,"jdk/incubator/vector/Short64Vector$Short64Species") \ template(jdk_incubator_vector_Short64Vector_Short64Mask,"jdk/incubator/vector/Short64Vector$Short64Mask") \ + template(jdk_incubator_vector_Short64Vector_Short64Shuffle,"jdk/incubator/vector/Short64Vector$Short64Shuffle") \ template(jdk_incubator_vector_Short128Vector, "jdk/incubator/vector/Short128Vector") \ template(jdk_incubator_vector_Short128Vector_Short128Species,"jdk/incubator/vector/Short128Vector$Short128Species") \ template(jdk_incubator_vector_Short128Vector_Short128Mask,"jdk/incubator/vector/Short128Vector$Short128Mask") \ + template(jdk_incubator_vector_Short128Vector_Short128Shuffle,"jdk/incubator/vector/Short128Vector$Short128Shuffle") \ template(jdk_incubator_vector_Short256Vector, "jdk/incubator/vector/Short256Vector") \ template(jdk_incubator_vector_Short256Vector_Short256Species,"jdk/incubator/vector/Short256Vector$Short256Species") \ template(jdk_incubator_vector_Short256Vector_Short256Mask,"jdk/incubator/vector/Short256Vector$Short256Mask") \ + template(jdk_incubator_vector_Short256Vector_Short256Shuffle,"jdk/incubator/vector/Short256Vector$Short256Shuffle") \ template(jdk_incubator_vector_Short512Vector, "jdk/incubator/vector/Short512Vector") \ template(jdk_incubator_vector_Short512Vector_Short512Species,"jdk/incubator/vector/Short512Vector$Short512Species") \ template(jdk_incubator_vector_Short512Vector_Short512Mask,"jdk/incubator/vector/Short512Vector$Short512Mask") \ + template(jdk_incubator_vector_Short512Vector_Short512Shuffle,"jdk/incubator/vector/Short512Vector$Short512Shuffle") \ template(java_lang_Shutdown, "java/lang/Shutdown") \ template(java_lang_ref_Reference, "java/lang/ref/Reference") \ template(java_lang_ref_SoftReference, "java/lang/ref/SoftReference") \ @@ -1488,6 +1512,12 @@ "Ljdk/incubator/vector/VectorIntrinsics$VectorCompareOp;)Ljdk/incubator/vector/Vector$Mask;") \ do_name(vector_compare_name, "compare") \ \ + do_intrinsic(_VectorRearrange, jdk_incubator_vector_VectorIntrinsics, vector_rearrange_name, vector_rearrange_sig, F_S) \ + do_signature(vector_rearrange_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;I" \ + "Ljdk/incubator/vector/Vector;Ljdk/incubator/vector/Vector$Shuffle;" \ + "Ljdk/incubator/vector/VectorIntrinsics$VectorRearrangeOp;)Ljdk/incubator/vector/Vector;") \ + do_name(vector_rearrange_name, "rearrangeOp") \ + \ do_intrinsic(_VectorExtract, jdk_incubator_vector_VectorIntrinsics, vector_extract_name, vector_extract_sig, F_S) \ do_signature(vector_extract_sig, "(Ljava/lang/Class;Ljava/lang/Class;I" \ "Ljdk/incubator/vector/Vector;I" \ --- old/src/hotspot/share/jvmci/vmStructs_jvmci.cpp 2018-09-06 16:45:44.350270400 -0700 +++ new/src/hotspot/share/jvmci/vmStructs_jvmci.cpp 2018-09-06 16:45:43.789761200 -0700 @@ -745,7 +745,8 @@ declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL) \ declare_preprocessor_constant("VM_Version::CPU_SHA", CPU_SHA) \ declare_preprocessor_constant("VM_Version::CPU_FMA", CPU_FMA) \ - declare_preprocessor_constant("VM_Version::CPU_VZEROUPPER", CPU_VZEROUPPER) + declare_preprocessor_constant("VM_Version::CPU_VZEROUPPER", CPU_VZEROUPPER) \ + declare_preprocessor_constant("VM_Version::CPU_AVX512VBMI", CPU_AVX512VBMI) #endif --- old/src/hotspot/share/opto/classes.hpp 2018-09-06 16:45:49.003580000 -0700 +++ new/src/hotspot/share/opto/classes.hpp 2018-09-06 16:45:48.514104200 -0700 @@ -409,7 +409,9 @@ macro(VectorMaskCmp) macro(VectorTest) macro(VectorBlend) +macro(VectorRearrange) macro(VectorLoadMask) +macro(VectorLoadShuffle) macro(VectorStoreMask) macro(VectorReinterpret) macro(VectorCast) --- old/src/hotspot/share/opto/compile.cpp 2018-09-06 16:45:53.612018300 -0700 +++ new/src/hotspot/share/opto/compile.cpp 2018-09-06 16:45:53.098081000 -0700 @@ -2853,6 +2853,9 @@ if (from_kls->is_vectormask()) { field_name = "bits"; bt = T_BOOLEAN; + } else if (from_kls->is_vectorshuffle()) { + field_name = "reorder"; + bt = T_BYTE; } ciField* field = from_kls->get_field_by_name(ciSymbol::make(field_name), @@ -2888,6 +2891,9 @@ if (from_kls->is_vectormask() && masktype != T_BOOLEAN) { assert(vec_unbox->bottom_type()->is_vect()->element_basic_type() == masktype, "expect mask type consistency"); vec_val_load = gvn.transform(new VectorLoadMaskNode(vec_val_load, TypeVect::make(masktype, num_elem))); + } else if (from_kls->is_vectorshuffle()) { + assert(vec_unbox->bottom_type()->is_vect()->element_basic_type() == masktype, "expect shuffle type consistency"); + vec_val_load = gvn.transform(new VectorLoadShuffleNode(vec_val_load, TypeVect::make(masktype, num_elem))); } gvn.hash_delete(vec_unbox); --- old/src/hotspot/share/opto/library_call.cpp 2018-09-06 16:45:58.546644800 -0700 +++ new/src/hotspot/share/opto/library_call.cpp 2018-09-06 16:45:57.992021000 -0700 @@ -336,6 +336,7 @@ bool inline_vector_reduction(); bool inline_vector_test(); bool inline_vector_blend(); + bool inline_vector_rearrange(); bool inline_vector_compare(); bool inline_vector_broadcast_int(); bool inline_vector_cast_reinterpret(bool is_cast); @@ -908,6 +909,8 @@ return inline_vector_test(); case vmIntrinsics::_VectorBlend: return inline_vector_blend(); + case vmIntrinsics::_VectorRearrange: + return inline_vector_rearrange(); case vmIntrinsics::_VectorCompare: return inline_vector_compare(); case vmIntrinsics::_VectorBroadcastInt: @@ -7283,6 +7286,57 @@ C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt)))); return true; +} + +// static +// +// V rearrangeOp(Class vectorClass, Class shuffleClass, Class< ? > elementType, int vlen, +// V v1, Sh sh, +// VectorSwizzleOp defaultImpl) { ... + +bool LibraryCallKit::inline_vector_rearrange() { + const TypeInstPtr* vector_klass = gvn().type(argument(0))->is_instptr(); + const TypeInstPtr* shuffle_klass = gvn().type(argument(1))->is_instptr(); + const TypeInstPtr* elem_klass = gvn().type(argument(2))->is_instptr(); + const TypeInt* vlen = gvn().type(argument(3))->is_int(); + + if (shuffle_klass->const_oop() == NULL || vector_klass->const_oop() == NULL || + elem_klass->const_oop() == NULL || !vlen->is_con()) { + return false; // not enough info for intrinsification + } + ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type(); + if (!elem_type->is_primitive_type()) { + return false; // should be primitive type + } + BasicType elem_bt = elem_type->basic_type(); + BasicType shuffle_bt = elem_bt; + int num_elem = vlen->get_con(); + + if (!arch_supports_vector(Op_VectorLoadShuffle, num_elem, elem_bt, VecMaskNotUsed)) { + return false; // not supported + } + if (!arch_supports_vector(Op_VectorRearrange, num_elem, elem_bt, VecMaskNotUsed)) { + return false; // not supported + } + ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass(); + const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass); + + ciKlass* shbox_klass = shuffle_klass->const_oop()->as_instance()->java_lang_Class_klass(); + const TypeInstPtr* shbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, shbox_klass); + + Node* v1 = unbox_vector(argument(4), vbox_type, elem_bt, num_elem); + Node* shuffle = unbox_vector(argument(5), shbox_type, shuffle_bt, num_elem); + + if (v1 == NULL || shuffle == NULL) { + return false; // operand unboxing failed + } + + Node* rearrange = _gvn.transform(new VectorRearrangeNode(v1, shuffle)); + Node* box = box_vector(rearrange, vbox_type, elem_bt, num_elem); + set_result(box); + + C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt)))); + return true; } Node* LibraryCallKit::shift_count(Node* cnt, int shift_op, BasicType bt, int num_elem) { --- old/src/hotspot/share/opto/vectornode.hpp 2018-09-06 16:46:03.376939500 -0700 +++ new/src/hotspot/share/opto/vectornode.hpp 2018-09-06 16:46:02.870743900 -0700 @@ -1266,6 +1266,19 @@ Node* vec_mask() const { return in(3); } }; +class VectorRearrangeNode : public VectorNode { +public: + VectorRearrangeNode(Node* vec1, Node* shuffle) + : VectorNode(vec1, shuffle, vec1->bottom_type()->is_vect()) { + // assert(mask->is_VectorMask(), "VectorBlendNode requires that third argument be a mask"); + } + + virtual int Opcode() const; + Node* vec1() const { return in(1); } + Node* vec_shuffle() const { return in(2); } +}; + + class VectorLoadMaskNode : public VectorNode { public: VectorLoadMaskNode(Node* in, const TypeVect* vt) @@ -1278,6 +1291,18 @@ virtual int Opcode() const; }; +class VectorLoadShuffleNode : public VectorNode { +public: + VectorLoadShuffleNode(Node* in, const TypeVect* vt) + : VectorNode(in, vt) { + assert(in->is_LoadVector(), "expected load vector"); + assert(in->as_LoadVector()->vect_type()->element_basic_type() == T_BYTE, "must be BYTE"); + } + + int GetOutShuffleSize() const { return type2aelembytes(vect_type()->element_basic_type()); } + virtual int Opcode() const; +}; + class VectorStoreMaskNode : public VectorNode { private: int _mask_size; --- old/src/hotspot/share/runtime/vmStructs.cpp 2018-09-06 16:46:08.026108100 -0700 +++ new/src/hotspot/share/runtime/vmStructs.cpp 2018-09-06 16:46:07.485077600 -0700 @@ -1862,8 +1862,10 @@ declare_c2_type(FmaDNode, Node) \ declare_c2_type(FmaFNode, Node) \ declare_c2_type(VectorLoadMaskNode, VectorNode) \ + declare_c2_type(VectorLoadShuffleNode, VectorNode) \ declare_c2_type(VectorStoreMaskNode, VectorNode) \ declare_c2_type(VectorBlendNode, VectorNode) \ + declare_c2_type(VectorRearrangeNode, VectorNode) \ declare_c2_type(VectorMaskWrapperNode, VectorNode) \ declare_c2_type(VectorMaskCmpNode, VectorNode) \ declare_c2_type(VectorCastB2XNode, VectorNode) \ --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java 2018-09-06 16:46:13.148824400 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java 2018-09-06 16:46:12.623223200 -0700 @@ -844,12 +844,19 @@ } @Override - public Byte128Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Byte128Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Byte128Shuffle s = (Byte128Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Byte128Vector.class, Byte128Shuffle.class, byte.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { byte[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java 2018-09-06 16:46:17.806371600 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java 2018-09-06 16:46:17.274602600 -0700 @@ -844,12 +844,19 @@ } @Override - public Byte256Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Byte256Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Byte256Shuffle s = (Byte256Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Byte256Vector.class, Byte256Shuffle.class, byte.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { byte[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java 2018-09-06 16:46:22.469716500 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java 2018-09-06 16:46:21.962304700 -0700 @@ -844,12 +844,19 @@ } @Override - public Byte512Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Byte512Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Byte512Shuffle s = (Byte512Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Byte512Vector.class, Byte512Shuffle.class, byte.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { byte[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java 2018-09-06 16:46:27.239274100 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java 2018-09-06 16:46:26.685935600 -0700 @@ -844,12 +844,19 @@ } @Override - public Byte64Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Byte64Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Byte64Shuffle s = (Byte64Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Byte64Vector.class, Byte64Shuffle.class, byte.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { byte[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java 2018-09-06 16:46:31.994313900 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java 2018-09-06 16:46:31.422202400 -0700 @@ -997,12 +997,19 @@ } @Override - public Double128Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Double128Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Double128Shuffle s = (Double128Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Double128Vector.class, Double128Shuffle.class, double.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { double[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java 2018-09-06 16:46:36.612131400 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java 2018-09-06 16:46:36.098230400 -0700 @@ -997,12 +997,19 @@ } @Override - public Double256Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Double256Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Double256Shuffle s = (Double256Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Double256Vector.class, Double256Shuffle.class, double.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { double[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java 2018-09-06 16:46:41.219659400 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java 2018-09-06 16:46:40.713745300 -0700 @@ -997,12 +997,19 @@ } @Override - public Double512Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Double512Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Double512Shuffle s = (Double512Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Double512Vector.class, Double512Shuffle.class, double.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { double[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java 2018-09-06 16:46:45.836917600 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java 2018-09-06 16:46:45.323148000 -0700 @@ -997,12 +997,19 @@ } @Override - public Double64Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Double64Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Double64Shuffle s = (Double64Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Double64Vector.class, Double64Shuffle.class, double.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { double[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java 2018-09-06 16:46:50.521332800 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java 2018-09-06 16:46:50.006198200 -0700 @@ -997,12 +997,19 @@ } @Override - public Float128Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Float128Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Float128Shuffle s = (Float128Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Float128Vector.class, Float128Shuffle.class, float.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { float[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java 2018-09-06 16:46:55.275495100 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java 2018-09-06 16:46:54.726525600 -0700 @@ -997,12 +997,19 @@ } @Override - public Float256Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Float256Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Float256Shuffle s = (Float256Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Float256Vector.class, Float256Shuffle.class, float.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { float[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java 2018-09-06 16:46:59.962819000 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java 2018-09-06 16:46:59.341600400 -0700 @@ -997,12 +997,19 @@ } @Override - public Float512Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Float512Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Float512Shuffle s = (Float512Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Float512Vector.class, Float512Shuffle.class, float.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { float[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java 2018-09-06 16:47:04.490099500 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java 2018-09-06 16:47:03.979478000 -0700 @@ -997,12 +997,19 @@ } @Override - public Float64Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Float64Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Float64Shuffle s = (Float64Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Float64Vector.class, Float64Shuffle.class, float.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { float[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java 2018-09-06 16:47:09.204183200 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java 2018-09-06 16:47:08.675612700 -0700 @@ -915,12 +915,19 @@ } @Override - public Int128Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Int128Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Int128Shuffle s = (Int128Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Int128Vector.class, Int128Shuffle.class, int.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { int[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java 2018-09-06 16:47:14.003951200 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java 2018-09-06 16:47:13.470313300 -0700 @@ -915,12 +915,19 @@ } @Override - public Int256Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Int256Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Int256Shuffle s = (Int256Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Int256Vector.class, Int256Shuffle.class, int.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { int[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java 2018-09-06 16:47:18.774287500 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java 2018-09-06 16:47:18.207794700 -0700 @@ -915,12 +915,19 @@ } @Override - public Int512Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Int512Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Int512Shuffle s = (Int512Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Int512Vector.class, Int512Shuffle.class, int.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { int[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java 2018-09-06 16:47:23.416498500 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java 2018-09-06 16:47:22.886263700 -0700 @@ -915,12 +915,19 @@ } @Override - public Int64Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Int64Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Int64Shuffle s = (Int64Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Int64Vector.class, Int64Shuffle.class, int.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { int[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java 2018-09-06 16:47:28.094051300 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java 2018-09-06 16:47:27.556470800 -0700 @@ -915,12 +915,19 @@ } @Override - public Long128Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Long128Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Long128Shuffle s = (Long128Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Long128Vector.class, Long128Shuffle.class, long.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { long[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java 2018-09-06 16:47:32.850960300 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java 2018-09-06 16:47:32.282831500 -0700 @@ -915,12 +915,19 @@ } @Override - public Long256Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Long256Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Long256Shuffle s = (Long256Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Long256Vector.class, Long256Shuffle.class, long.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { long[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java 2018-09-06 16:47:37.503501700 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java 2018-09-06 16:47:36.973340900 -0700 @@ -915,12 +915,19 @@ } @Override - public Long512Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Long512Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Long512Shuffle s = (Long512Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Long512Vector.class, Long512Shuffle.class, long.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { long[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java 2018-09-06 16:47:42.390615800 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java 2018-09-06 16:47:41.810727400 -0700 @@ -915,12 +915,19 @@ } @Override - public Long64Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Long64Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Long64Shuffle s = (Long64Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Long64Vector.class, Long64Shuffle.class, long.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { long[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java 2018-09-06 16:47:46.979934500 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java 2018-09-06 16:47:46.437854900 -0700 @@ -845,12 +845,19 @@ } @Override - public Short128Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Short128Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Short128Shuffle s = (Short128Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Short128Vector.class, Short128Shuffle.class, short.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { short[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java 2018-09-06 16:47:51.627311900 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java 2018-09-06 16:47:51.098008600 -0700 @@ -845,12 +845,19 @@ } @Override - public Short256Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Short256Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Short256Shuffle s = (Short256Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Short256Vector.class, Short256Shuffle.class, short.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { short[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java 2018-09-06 16:47:56.370383300 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java 2018-09-06 16:47:55.834950800 -0700 @@ -845,12 +845,19 @@ } @Override - public Short512Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Short512Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Short512Shuffle s = (Short512Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Short512Vector.class, Short512Shuffle.class, short.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { short[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java 2018-09-06 16:48:01.085965000 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java 2018-09-06 16:48:00.561839900 -0700 @@ -845,12 +845,19 @@ } @Override - public Short64Vector rearrange(Shuffle s) { - return uOp((i, a) -> { + @ForceInline + public Short64Vector rearrange(Shuffle o1) { + Objects.requireNonNull(o1); + Short64Shuffle s = (Short64Shuffle)o1; + + return VectorIntrinsics.rearrangeOp( + Short64Vector.class, Short64Shuffle.class, short.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { short[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorIntrinsics.java 2018-09-06 16:48:05.669050700 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorIntrinsics.java 2018-09-06 16:48:05.170829600 -0700 @@ -262,6 +262,25 @@ /* ============================================================================ */ + interface VectorRearrangeOp, + Sh extends Vector.Shuffle, + S extends Vector.Shape, E> { + V apply(V v1, Sh shuffle); + } + + @HotSpotIntrinsicCandidate + static + , + Sh extends Vector.Shuffle, + S extends Vector.Shape, E> + V rearrangeOp(Class vectorClass, Class shuffleClass, Class elementType, int vlen, + V v1, Sh sh, + VectorRearrangeOp defaultImpl) { + return defaultImpl.apply(v1, sh); + } + + /* ============================================================================ */ + interface VectorBlendOp, M extends Vector.Mask, S extends Vector.Shape, E> { --- old/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template 2018-09-06 16:48:10.473785400 -0700 +++ new/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template 2018-09-06 16:48:09.929749900 -0700 @@ -1286,12 +1286,19 @@ } @Override - public $vectortype$ rearrange(Shuffle<$Boxtype$, Shapes.$shape$> s) { - return uOp((i, a) -> { + @ForceInline + public $vectortype$ rearrange(Shuffle<$Boxtype$, Shapes.$shape$> o1) { + Objects.requireNonNull(o1); + $shuffletype$ s = ($shuffletype$)o1; + + return VectorIntrinsics.rearrangeOp( + $vectortype$.class, $shuffletype$.class, $type$.class, LENGTH, + this, s, + (v1, s_) -> v1.uOp((i, a) -> { $type$[] vec = this.getElements(); - int ei = s.getElement(i); + int ei = s_.getElement(i); return vec[ei]; - }); + })); } @Override