< prev index next >

src/cpu/x86/vm/x86.ad

Print this page

        

*** 2892,2948 **** __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! // Replicate byte scalar to be vector ! instruct Repl4B(vecS dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateB src)); ! format %{ "movd $dst,$src\n\t" ! "punpcklbw $dst,$dst\n\t" ! "pshuflw $dst,$dst,0x00\t! replicate4B" %} ! ins_encode %{ ! __ movdl($dst$$XMMRegister, $src$$Register); ! __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); ! __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); ! %} ! ins_pipe( pipe_slow ); ! %} ! instruct Repl8B(vecD dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateB src)); format %{ "movd $dst,$src\n\t" "punpcklbw $dst,$dst\n\t" ! "pshuflw $dst,$dst,0x00\t! replicate8B" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ins_pipe( pipe_slow ); %} ! instruct Repl16B(vecX dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 16); ! match(Set dst (ReplicateB src)); ! format %{ "movd $dst,$src\n\t" ! "punpcklbw $dst,$dst\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\t! replicate16B" %} ins_encode %{ ! __ movdl($dst$$XMMRegister, $src$$Register); ! __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl32B(vecY dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 32); match(Set dst (ReplicateB src)); format %{ "movd $dst,$src\n\t" "punpcklbw $dst,$dst\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" --- 2892,2935 ---- __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! // ====================LEGACY REPLICATE======================================= ! instruct Repl16B(vecX dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl() && !VM_Version::supports_avx512bw()); match(Set dst (ReplicateB src)); format %{ "movd $dst,$src\n\t" "punpcklbw $dst,$dst\n\t" ! "pshuflw $dst,$dst,0x00\n\t" ! "punpcklqdq $dst,$dst\t! replicate16B" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct Repl16B_mem(vecX dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl() && !VM_Version::supports_avx512bw()); ! match(Set dst (ReplicateB (LoadB mem))); ! format %{ "punpcklbw $dst,$mem\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\t! replicate16B" %} ins_encode %{ ! __ punpcklbw($dst$$XMMRegister, $mem$$Address); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl32B(vecY dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 32 && UseAVX > 0 && !VM_Version::supports_avx512vl() && !VM_Version::supports_avx512bw()); match(Set dst (ReplicateB src)); format %{ "movd $dst,$src\n\t" "punpcklbw $dst,$dst\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t"
*** 2955,3007 **** __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct Repl64B(vecZ dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 64); ! match(Set dst (ReplicateB src)); ! format %{ "movd $dst,$src\n\t" ! "punpcklbw $dst,$dst\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! lower replicate32B\n\t" ! "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate632B" %} ins_encode %{ ! __ movdl($dst$$XMMRegister, $src$$Register); ! __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); - %} - - // Replicate byte scalar immediate to be vector by loading from const table. - instruct Repl4B_imm(vecS dst, immI con) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (ReplicateB con)); - format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); - %} - ins_pipe( pipe_slow ); - %} - - instruct Repl8B_imm(vecD dst, immI con) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (ReplicateB con)); - format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); %} ins_pipe( pipe_slow ); %} instruct Repl16B_imm(vecX dst, immI con) %{ ! predicate(n->as_Vector()->length() == 16); match(Set dst (ReplicateB con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\t! replicate16B($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); --- 2942,2969 ---- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct Repl32B_mem(vecY dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 32 && UseAVX > 0 && !VM_Version::supports_avx512vl() && !VM_Version::supports_avx512bw()); ! match(Set dst (ReplicateB (LoadB mem))); ! format %{ "punpcklbw $dst,$mem\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate32B" %} ins_encode %{ ! __ punpcklbw($dst$$XMMRegister, $mem$$Address); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl16B_imm(vecX dst, immI con) %{ ! predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl() && !VM_Version::supports_avx512bw()); match(Set dst (ReplicateB con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\t! replicate16B($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
*** 3009,3019 **** %} ins_pipe( pipe_slow ); %} instruct Repl32B_imm(vecY dst, immI con) %{ ! predicate(n->as_Vector()->length() == 32); match(Set dst (ReplicateB con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} ins_encode %{ --- 2971,2981 ---- %} ins_pipe( pipe_slow ); %} instruct Repl32B_imm(vecY dst, immI con) %{ ! predicate(n->as_Vector()->length() == 32 && UseAVX > 0 && !VM_Version::supports_avx512vl() && !VM_Version::supports_avx512bw()); match(Set dst (ReplicateB con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} ins_encode %{
*** 3022,3129 **** __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} - instruct Repl64B_imm(vecZ dst, immI con) %{ - predicate(n->as_Vector()->length() == 64); - match(Set dst (ReplicateB con)); - format %{ "movq $dst,[$constantaddress]\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! lower replicate32B($con)\n\t" - "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate32B($con)" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); - %} - - // Replicate byte scalar zero to be vector - instruct Repl4B_zero(vecS dst, immI0 zero) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (ReplicateB zero)); - format %{ "pxor $dst,$dst\t! replicate4B zero" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); - %} - - instruct Repl8B_zero(vecD dst, immI0 zero) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (ReplicateB zero)); - format %{ "pxor $dst,$dst\t! replicate8B zero" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); - %} - instruct Repl16B_zero(vecX dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 16); match(Set dst (ReplicateB zero)); format %{ "pxor $dst,$dst\t! replicate16B zero" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} instruct Repl32B_zero(vecY dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 32); match(Set dst (ReplicateB zero)); format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). int vector_len = 1; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} - instruct Repl64B_zero(vecZ dst, immI0 zero) %{ - predicate(n->as_Vector()->length() == 64); - match(Set dst (ReplicateB zero)); - format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} - ins_encode %{ - // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). - int vector_len = 2; - __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); - %} - ins_pipe( fpu_reg_reg ); - %} - - // Replicate char/short (2 byte) scalar to be vector - instruct Repl2S(vecS dst, rRegI src) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (ReplicateS src)); - format %{ "movd $dst,$src\n\t" - "pshuflw $dst,$dst,0x00\t! replicate2S" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); - %} - - instruct Repl4S(vecD dst, rRegI src) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (ReplicateS src)); - format %{ "movd $dst,$src\n\t" - "pshuflw $dst,$dst,0x00\t! replicate4S" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); - %} - instruct Repl8S(vecX dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateS src)); format %{ "movd $dst,$src\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\t! replicate8S" %} ins_encode %{ --- 2984,3017 ---- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl16B_zero(vecX dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && UseAVX < 3); match(Set dst (ReplicateB zero)); format %{ "pxor $dst,$dst\t! replicate16B zero" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} instruct Repl32B_zero(vecY dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 32 && UseAVX > 0 && UseAVX < 3); match(Set dst (ReplicateB zero)); format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). int vector_len = 1; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} instruct Repl8S(vecX dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl() && !VM_Version::supports_avx512bw()); match(Set dst (ReplicateS src)); format %{ "movd $dst,$src\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\t! replicate8S" %} ins_encode %{
*** 3133,3143 **** %} ins_pipe( pipe_slow ); %} instruct Repl16S(vecY dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 16); match(Set dst (ReplicateS src)); format %{ "movd $dst,$src\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128h $dst,$dst,$dst\t! replicate16S" %} --- 3021,3031 ---- %} ins_pipe( pipe_slow ); %} instruct Repl16S(vecY dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl() && !VM_Version::supports_avx512bw()); match(Set dst (ReplicateS src)); format %{ "movd $dst,$src\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
*** 3148,3198 **** __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} - instruct Repl32S(vecZ dst, rRegI src) %{ - predicate(n->as_Vector()->length() == 32); - match(Set dst (ReplicateS src)); - format %{ "movd $dst,$src\n\t" - "pshuflw $dst,$dst,0x00\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! lower replicate16S\n\t" - "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); - %} - - // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. - instruct Repl2S_imm(vecS dst, immI con) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (ReplicateS con)); - format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); - %} - ins_pipe( fpu_reg_reg ); - %} - - instruct Repl4S_imm(vecD dst, immI con) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (ReplicateS con)); - format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); - %} - ins_pipe( fpu_reg_reg ); - %} - instruct Repl8S_imm(vecX dst, immI con) %{ ! predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateS con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\t! replicate8S($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); --- 3036,3047 ---- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl8S_imm(vecX dst, immI con) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl() && !VM_Version::supports_avx512bw()); match(Set dst (ReplicateS con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\t! replicate8S($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
*** 3200,3210 **** %} ins_pipe( pipe_slow ); %} instruct Repl16S_imm(vecY dst, immI con) %{ ! predicate(n->as_Vector()->length() == 16); match(Set dst (ReplicateS con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} ins_encode %{ --- 3049,3059 ---- %} ins_pipe( pipe_slow ); %} instruct Repl16S_imm(vecY dst, immI con) %{ ! predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl() && !VM_Version::supports_avx512bw()); match(Set dst (ReplicateS con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} ins_encode %{
*** 3213,3320 **** __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} - instruct Repl32S_imm(vecZ dst, immI con) %{ - predicate(n->as_Vector()->length() == 32); - match(Set dst (ReplicateS con)); - format %{ "movq $dst,[$constantaddress]\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! lower replicate16S($con)\n\t" - "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S($con)" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); - %} - - // Replicate char/short (2 byte) scalar zero to be vector - instruct Repl2S_zero(vecS dst, immI0 zero) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (ReplicateS zero)); - format %{ "pxor $dst,$dst\t! replicate2S zero" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); - %} - - instruct Repl4S_zero(vecD dst, immI0 zero) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (ReplicateS zero)); - format %{ "pxor $dst,$dst\t! replicate4S zero" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); - %} - instruct Repl8S_zero(vecX dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateS zero)); format %{ "pxor $dst,$dst\t! replicate8S zero" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} instruct Repl16S_zero(vecY dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 16); match(Set dst (ReplicateS zero)); format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). int vector_len = 1; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} ! instruct Repl32S_zero(vecZ dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 32); ! match(Set dst (ReplicateS zero)); ! format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} ! ins_encode %{ ! // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). ! int vector_len = 2; ! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! // Replicate integer (4 byte) scalar to be vector ! instruct Repl2I(vecD dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateI src)); format %{ "movd $dst,$src\n\t" ! "pshufd $dst,$dst,0x00\t! replicate2I" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ! ins_pipe( fpu_reg_reg ); %} ! instruct Repl4I(vecX dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateI src)); ! format %{ "movd $dst,$src\n\t" ! "pshufd $dst,$dst,0x00\t! replicate4I" %} ins_encode %{ ! __ movdl($dst$$XMMRegister, $src$$Register); ! __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ins_pipe( pipe_slow ); %} instruct Repl8I(vecY dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateI src)); format %{ "movd $dst,$src\n\t" "pshufd $dst,$dst,0x00\n\t" "vinserti128h $dst,$dst,$dst\t! replicate8I" %} ins_encode %{ --- 3062,3117 ---- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl8S_zero(vecX dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && UseAVX < 3); match(Set dst (ReplicateS zero)); format %{ "pxor $dst,$dst\t! replicate8S zero" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} instruct Repl16S_zero(vecY dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && UseAVX < 3); match(Set dst (ReplicateS zero)); format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). int vector_len = 1; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} ! instruct Repl4I(vecX dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI src)); format %{ "movd $dst,$src\n\t" ! "pshufd $dst,$dst,0x00\t! replicate4I" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ! ins_pipe( pipe_slow ); %} ! instruct Repl4I_mem(vecX dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateI (LoadI mem))); ! format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} ins_encode %{ ! __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); %} ins_pipe( pipe_slow ); %} instruct Repl8I(vecY dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI src)); format %{ "movd $dst,$src\n\t" "pshufd $dst,$dst,0x00\n\t" "vinserti128h $dst,$dst,$dst\t! replicate8I" %} ins_encode %{
*** 3323,3361 **** __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct Repl16I(vecZ dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 16); ! match(Set dst (ReplicateI src)); ! format %{ "movd $dst,$src\n\t" ! "pshufd $dst,$dst,0x00\n\t" ! "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t" ! "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %} ins_encode %{ ! __ movdl($dst$$XMMRegister, $src$$Register); ! __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} - // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. - instruct Repl2I_imm(vecD dst, immI con) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (ReplicateI con)); - format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); - %} - ins_pipe( fpu_reg_reg ); - %} - instruct Repl4I_imm(vecX dst, immI con) %{ ! predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateI con)); format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" "punpcklqdq $dst,$dst" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); --- 3120,3143 ---- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct Repl8I_mem(vecY dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateI (LoadI mem))); ! format %{ "pshufd $dst,$mem,0x00\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate8I" %} ins_encode %{ ! __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl4I_imm(vecX dst, immI con) %{ ! predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI con)); format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" "punpcklqdq $dst,$dst" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
*** 3363,3373 **** %} ins_pipe( pipe_slow ); %} instruct Repl8I_imm(vecY dst, immI con) %{ ! predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateI con)); format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128h $dst,$dst,$dst" %} ins_encode %{ --- 3145,3155 ---- %} ins_pipe( pipe_slow ); %} instruct Repl8I_imm(vecY dst, immI con) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI con)); format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128h $dst,$dst,$dst" %} ins_encode %{
*** 3376,3491 **** __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct Repl16I_imm(vecZ dst, immI con) %{ ! predicate(n->as_Vector()->length() == 16); ! match(Set dst (ReplicateI con)); ! format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" ! "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\n\t" ! "vinserti64x4h $dst k0,$dst,$dst" %} ! ins_encode %{ ! __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! // Integer could be loaded into xmm register directly from memory. ! instruct Repl2I_mem(vecD dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateI (LoadI mem))); ! format %{ "movd $dst,$mem\n\t" ! "pshufd $dst,$dst,0x00\t! replicate2I" %} ins_encode %{ ! __ movdl($dst$$XMMRegister, $mem$$Address); ! __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ! ins_pipe( fpu_reg_reg ); %} ! instruct Repl4I_mem(vecX dst, memory mem) %{ predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateI (LoadI mem))); ! format %{ "movd $dst,$mem\n\t" ! "pshufd $dst,$dst,0x00\t! replicate4I" %} ins_encode %{ ! __ movdl($dst$$XMMRegister, $mem$$Address); ! __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ins_pipe( pipe_slow ); %} ! instruct Repl8I_mem(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 8); ! match(Set dst (ReplicateI (LoadI mem))); ! format %{ "movd $dst,$mem\n\t" ! "pshufd $dst,$dst,0x00\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate8I" %} ins_encode %{ ! __ movdl($dst$$XMMRegister, $mem$$Address); ! __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct Repl16I_mem(vecZ dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 16); ! match(Set dst (ReplicateI (LoadI mem))); ! format %{ "movd $dst,$mem\n\t" ! "pshufd $dst,$dst,0x00\n\t" ! "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t" ! "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %} ins_encode %{ ! __ movdl($dst$$XMMRegister, $mem$$Address); ! __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! // Replicate integer (4 byte) scalar zero to be vector ! instruct Repl2I_zero(vecD dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateI zero)); ! format %{ "pxor $dst,$dst\t! replicate2I" %} ins_encode %{ ! __ pxor($dst$$XMMRegister, $dst$$XMMRegister); %} ! ins_pipe( fpu_reg_reg ); %} ! instruct Repl4I_zero(vecX dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateI zero)); ! format %{ "pxor $dst,$dst\t! replicate4I zero)" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} ! instruct Repl8I_zero(vecY dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 8); ! match(Set dst (ReplicateI zero)); ! format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} ins_encode %{ ! // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). ! int vector_len = 1; ! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} ! instruct Repl16I_zero(vecZ dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 16); match(Set dst (ReplicateI zero)); format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). int vector_len = 2; --- 3158,4082 ---- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct Repl4I_zero(vecX dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && UseAVX < 3); ! match(Set dst (ReplicateI zero)); ! format %{ "pxor $dst,$dst\t! replicate4I zero)" %} ! ins_encode %{ ! __ pxor($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl8I_zero(vecY dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && UseAVX < 3); ! match(Set dst (ReplicateI zero)); ! format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} ! ins_encode %{ ! // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). ! int vector_len = 1; ! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! // Replicate long (8 byte) scalar to be vector ! #ifdef _LP64 ! instruct Repl4L(vecY dst, rRegL src) %{ ! predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateL src)); ! format %{ "movdq $dst,$src\n\t" ! "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate4L" %} ! ins_encode %{ ! __ movdq($dst$$XMMRegister, $src$$Register); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! #else // _LP64 ! instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ ! predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateL src)); ! effect(TEMP dst, USE src, TEMP tmp); ! format %{ "movdl $dst,$src.lo\n\t" ! "movdl $tmp,$src.hi\n\t" ! "punpckldq $dst,$tmp\n\t" ! "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate4L" %} ! ins_encode %{ ! __ movdl($dst$$XMMRegister, $src$$Register); ! __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); ! __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! #endif // _LP64 ! ! instruct Repl4L_imm(vecY dst, immL con) %{ ! predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateL con)); ! format %{ "movq $dst,[$constantaddress]\n\t" ! "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} ! ins_encode %{ ! __ movq($dst$$XMMRegister, $constantaddress($con)); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl4L_mem(vecY dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateL (LoadL mem))); ! format %{ "movq $dst,$mem\n\t" ! "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate4L" %} ! ins_encode %{ ! __ movq($dst$$XMMRegister, $mem$$Address); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl8L_mem(vecZ dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && UseAVX < 3); ! match(Set dst (ReplicateL (LoadL mem))); ! format %{ "movq $dst,$mem\n\t" ! "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" ! "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} ! ins_encode %{ ! __ movq($dst$$XMMRegister, $mem$$Address); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct Repl8F(vecY dst, regF src) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateF src)); ! format %{ "pshufd $dst,$src,0x00\n\t" ! "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} ins_encode %{ ! __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); ! __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ! ins_pipe( pipe_slow ); %} ! instruct Repl8F_mem(vecY dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateF (LoadF mem))); ! format %{ "pshufd $dst,$mem,0x00\n\t" ! "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} ! ins_encode %{ ! __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); ! __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl4D(vecY dst, regD src) %{ ! predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateD src)); ! format %{ "pshufd $dst,$src,0x44\n\t" ! "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} ! ins_encode %{ ! __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); ! __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl4D_mem(vecY dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateD (LoadD mem))); ! format %{ "pshufd $dst,$mem,0x44\n\t" ! "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} ! ins_encode %{ ! __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); ! __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // ====================GENERIC REPLICATE========================================== ! ! // Replicate byte scalar to be vector ! instruct Repl4B(vecS dst, rRegI src) %{ predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateB src)); ! format %{ "movd $dst,$src\n\t" ! "punpcklbw $dst,$dst\n\t" ! "pshuflw $dst,$dst,0x00\t! replicate4B" %} ins_encode %{ ! __ movdl($dst$$XMMRegister, $src$$Register); ! __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); ! __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ins_pipe( pipe_slow ); %} ! instruct Repl4B_mem(vecS dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateB (LoadB mem))); ! format %{ "punpcklbw $dst,$mem\n\t" ! "pshuflw $dst,$dst,0x00\t! replicate4B" %} ! ins_encode %{ ! __ punpcklbw($dst$$XMMRegister, $mem$$Address); ! __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl8B(vecD dst, rRegI src) %{ predicate(n->as_Vector()->length() == 8); ! match(Set dst (ReplicateB src)); ! format %{ "movd $dst,$src\n\t" ! "punpcklbw $dst,$dst\n\t" ! "pshuflw $dst,$dst,0x00\t! replicate8B" %} ins_encode %{ ! __ movdl($dst$$XMMRegister, $src$$Register); ! __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); ! __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ins_pipe( pipe_slow ); %} ! instruct Repl8B_mem(vecD dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 8); ! match(Set dst (ReplicateB (LoadB mem))); ! format %{ "punpcklbw $dst,$mem\n\t" ! "pshuflw $dst,$dst,0x00\t! replicate8B" %} ins_encode %{ ! __ punpcklbw($dst$$XMMRegister, $mem$$Address); ! __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ins_pipe( pipe_slow ); %} ! // Replicate byte scalar immediate to be vector by loading from const table. ! instruct Repl4B_imm(vecS dst, immI con) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateB con)); ! format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} ins_encode %{ ! __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); %} ! ins_pipe( pipe_slow ); %} ! instruct Repl8B_imm(vecD dst, immI con) %{ ! predicate(n->as_Vector()->length() == 8); ! match(Set dst (ReplicateB con)); ! format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} ! ins_encode %{ ! __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // Replicate byte scalar zero to be vector ! instruct Repl4B_zero(vecS dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateB zero)); ! format %{ "pxor $dst,$dst\t! replicate4B zero" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} ! instruct Repl8B_zero(vecD dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 8); ! match(Set dst (ReplicateB zero)); ! format %{ "pxor $dst,$dst\t! replicate8B zero" %} ins_encode %{ ! __ pxor($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! // Replicate char/short (2 byte) scalar to be vector ! instruct Repl2S(vecS dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateS src)); ! format %{ "movd $dst,$src\n\t" ! "pshuflw $dst,$dst,0x00\t! replicate2S" %} ! ins_encode %{ ! __ movdl($dst$$XMMRegister, $src$$Register); ! __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl4S(vecD dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateS src)); ! format %{ "movd $dst,$src\n\t" ! "pshuflw $dst,$dst,0x00\t! replicate4S" %} ! ins_encode %{ ! __ movdl($dst$$XMMRegister, $src$$Register); ! __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl4S_mem(vecD dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateS (LoadS mem))); ! format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} ! ins_encode %{ ! __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); %} ins_pipe( fpu_reg_reg ); %} ! // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. ! instruct Repl2S_imm(vecS dst, immI con) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateS con)); ! format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} ! ins_encode %{ ! __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl4S_imm(vecD dst, immI con) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateS con)); ! format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} ! ins_encode %{ ! __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! // Replicate char/short (2 byte) scalar zero to be vector ! instruct Repl2S_zero(vecS dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateS zero)); ! format %{ "pxor $dst,$dst\t! replicate2S zero" %} ! ins_encode %{ ! __ pxor($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl4S_zero(vecD dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateS zero)); ! format %{ "pxor $dst,$dst\t! replicate4S zero" %} ! ins_encode %{ ! __ pxor($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! // Replicate integer (4 byte) scalar to be vector ! instruct Repl2I(vecD dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateI src)); ! format %{ "movd $dst,$src\n\t" ! "pshufd $dst,$dst,0x00\t! replicate2I" %} ! ins_encode %{ ! __ movdl($dst$$XMMRegister, $src$$Register); ! __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! // Integer could be loaded into xmm register directly from memory. ! instruct Repl2I_mem(vecD dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateI (LoadI mem))); ! format %{ "movd $dst,$mem\n\t" ! "pshufd $dst,$dst,0x00\t! replicate2I" %} ! ins_encode %{ ! __ movdl($dst$$XMMRegister, $mem$$Address); ! __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. ! instruct Repl2I_imm(vecD dst, immI con) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateI con)); ! format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} ! ins_encode %{ ! __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! // Replicate integer (4 byte) scalar zero to be vector ! instruct Repl2I_zero(vecD dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateI zero)); ! format %{ "pxor $dst,$dst\t! replicate2I" %} ! ins_encode %{ ! __ pxor($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! // Replicate long (8 byte) scalar to be vector ! #ifdef _LP64 ! instruct Repl2L(vecX dst, rRegL src) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateL src)); ! format %{ "movdq $dst,$src\n\t" ! "punpcklqdq $dst,$dst\t! replicate2L" %} ! ins_encode %{ ! __ movdq($dst$$XMMRegister, $src$$Register); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! #else // _LP64 ! instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateL src)); ! effect(TEMP dst, USE src, TEMP tmp); ! format %{ "movdl $dst,$src.lo\n\t" ! "movdl $tmp,$src.hi\n\t" ! "punpckldq $dst,$tmp\n\t" ! "punpcklqdq $dst,$dst\t! replicate2L"%} ! ins_encode %{ ! __ movdl($dst$$XMMRegister, $src$$Register); ! __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); ! __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! #endif // _LP64 ! ! // Replicate long (8 byte) scalar immediate to be vector by loading from const table. ! instruct Repl2L_imm(vecX dst, immL con) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateL con)); ! format %{ "movq $dst,[$constantaddress]\n\t" ! "punpcklqdq $dst,$dst\t! replicate2L($con)" %} ! ins_encode %{ ! __ movq($dst$$XMMRegister, $constantaddress($con)); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // Long could be loaded into xmm register directly from memory. ! instruct Repl2L_mem(vecX dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateL (LoadL mem))); ! format %{ "movq $dst,$mem\n\t" ! "punpcklqdq $dst,$dst\t! replicate2L" %} ! ins_encode %{ ! __ movq($dst$$XMMRegister, $mem$$Address); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // Replicate long (8 byte) scalar zero to be vector ! instruct Repl2L_zero(vecX dst, immL0 zero) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateL zero)); ! format %{ "pxor $dst,$dst\t! replicate2L zero" %} ! ins_encode %{ ! __ pxor($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl4L_zero(vecY dst, immL0 zero) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateL zero)); ! format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} ! ins_encode %{ ! // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). ! int vector_len = 1; ! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! // Replicate float (4 byte) scalar to be vector ! instruct Repl2F(vecD dst, regF src) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateF src)); ! format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} ! ins_encode %{ ! __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl2F_mem(vecD dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateF (LoadF mem))); ! format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} ! ins_encode %{ ! __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl4F(vecX dst, regF src) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateF src)); ! format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} ! ins_encode %{ ! __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl4F_mem(vecX dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateF (LoadF mem))); ! format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} ! ins_encode %{ ! __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // Replicate float (4 byte) scalar zero to be vector ! instruct Repl2F_zero(vecD dst, immF0 zero) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateF zero)); ! format %{ "xorps $dst,$dst\t! replicate2F zero" %} ! ins_encode %{ ! __ xorps($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl4F_zero(vecX dst, immF0 zero) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateF zero)); ! format %{ "xorps $dst,$dst\t! replicate4F zero" %} ! ins_encode %{ ! __ xorps($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl8F_zero(vecY dst, immF0 zero) %{ ! predicate(n->as_Vector()->length() == 8); ! match(Set dst (ReplicateF zero)); ! format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} ! ins_encode %{ ! int vector_len = 1; ! __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! // Replicate double (8 bytes) scalar to be vector ! instruct Repl2D(vecX dst, regD src) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateD src)); ! format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} ! ins_encode %{ ! __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl2D_mem(vecX dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateD (LoadD mem))); ! format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} ! ins_encode %{ ! __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // Replicate double (8 byte) scalar zero to be vector ! instruct Repl2D_zero(vecX dst, immD0 zero) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateD zero)); ! format %{ "xorpd $dst,$dst\t! replicate2D zero" %} ! ins_encode %{ ! __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl4D_zero(vecY dst, immD0 zero) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateD zero)); ! format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} ! ins_encode %{ ! int vector_len = 1; ! __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! // ====================EVEX REPLICATE============================================= ! ! // Note: some of the legacy forms are applicable to EVEX ! ! instruct Repl16B_evex(vecX dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vl() && VM_Version::supports_avx512bw()); ! match(Set dst (ReplicateB src)); ! format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} ! ins_encode %{ ! int vector_len = 0; ! __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl16B_mem_evex(vecX dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vl() && VM_Version::supports_avx512bw()); ! match(Set dst (ReplicateB (LoadB mem))); ! format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} ! ins_encode %{ ! int vector_len = 0; ! __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl32B_evex(vecY dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vl() && VM_Version::supports_avx512bw()); ! match(Set dst (ReplicateB src)); ! format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} ! ins_encode %{ ! int vector_len = 1; ! __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl32B_mem_evex(vecY dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vl() && VM_Version::supports_avx512bw()); ! match(Set dst (ReplicateB (LoadB mem))); ! format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} ! ins_encode %{ ! int vector_len = 1; ! __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl64B_evex(vecZ dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 64 && UseAVX > 2); ! match(Set dst (ReplicateB src)); ! format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} ! ins_encode %{ ! int vector_len = 2; ! __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512vl() && VM_Version::supports_avx512bw()); ! match(Set dst (ReplicateB (LoadB mem))); ! format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} ! ins_encode %{ ! int vector_len = 2; ! __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl16B_imm_evex(vecX dst, immI con) %{ ! predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vl() && VM_Version::supports_avx512bw()); ! match(Set dst (ReplicateB con)); ! format %{ "movq $dst,[$constantaddress]\n\t" ! "vpbroadcastb $dst,$dst\t! replicate16B" %} ! ins_encode %{ ! int vector_len = 0; ! __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); ! __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl32B_imm_evex(vecY dst, immI con) %{ ! predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vl() && VM_Version::supports_avx512bw()); ! match(Set dst (ReplicateB con)); ! format %{ "movq $dst,[$constantaddress]\n\t" ! "vpbroadcastb $dst,$dst\t! replicate32B" %} ! ins_encode %{ ! int vector_len = 1; ! __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); ! __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl64B_imm_evex(vecZ dst, immI con) %{ ! predicate(n->as_Vector()->length() == 64 && UseAVX > 2); ! match(Set dst (ReplicateB con)); ! format %{ "movq $dst,[$constantaddress]\n\t" ! "vpbroadcastb $dst,$dst\t! upper replicate64B" %} ! ins_encode %{ ! int vector_len = 2; ! __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); ! __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 64 && UseAVX > 2); ! match(Set dst (ReplicateB zero)); ! format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} ! ins_encode %{ ! // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). ! int vector_len = 2; ! __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl8S_evex(vecX dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl() && VM_Version::supports_avx512bw()); ! match(Set dst (ReplicateS src)); ! format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} ! ins_encode %{ ! int vector_len = 0; ! __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl8S_mem_evex(vecX dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl() && VM_Version::supports_avx512bw()); ! match(Set dst (ReplicateS (LoadS mem))); ! format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} ! ins_encode %{ ! int vector_len = 0; ! __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl16S_evex(vecY dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vl() && VM_Version::supports_avx512bw()); ! match(Set dst (ReplicateS src)); ! format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} ! ins_encode %{ ! int vector_len = 1; ! __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl16S_mem_evex(vecY dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vl() && VM_Version::supports_avx512bw()); ! match(Set dst (ReplicateS (LoadS mem))); ! format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} ! ins_encode %{ ! int vector_len = 1; ! __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl32S_evex(vecZ dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 32 && UseAVX > 2); ! match(Set dst (ReplicateS src)); ! format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 32 && UseAVX > 2); ! match(Set dst (ReplicateS (LoadS mem))); ! format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl8S_imm_evex(vecX dst, immI con) %{ ! predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl() && VM_Version::supports_avx512bw()); ! match(Set dst (ReplicateS con)); ! format %{ "movq $dst,[$constantaddress]\n\t" ! "vpbroadcastw $dst,$dst\t! replicate8S" %} ! ins_encode %{ ! int vector_len = 0; ! __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); ! __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl16S_imm_evex(vecY dst, immI con) %{ ! predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vl() && VM_Version::supports_avx512bw()); ! match(Set dst (ReplicateS con)); ! format %{ "movq $dst,[$constantaddress]\n\t" ! "vpbroadcastw $dst,$dst\t! replicate16S" %} ! ins_encode %{ ! int vector_len = 1; ! __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); ! __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl32S_imm_evex(vecZ dst, immI con) %{ ! predicate(n->as_Vector()->length() == 32 && UseAVX > 2); ! match(Set dst (ReplicateS con)); ! format %{ "movq $dst,[$constantaddress]\n\t" ! "vpbroadcastw $dst,$dst\t! replicate32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); ! __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 32 && UseAVX > 2); ! match(Set dst (ReplicateS zero)); ! format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} ! ins_encode %{ ! // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). ! int vector_len = 2; ! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl4I_evex(vecX dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateI src)); ! format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} ! ins_encode %{ ! int vector_len = 0; ! __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl4I_mem_evex(vecX dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateI (LoadI mem))); ! format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} ! ins_encode %{ ! int vector_len = 0; ! __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl8I_evex(vecY dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateI src)); ! format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} ! ins_encode %{ ! int vector_len = 1; ! __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl8I_mem_evex(vecY dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateI (LoadI mem))); ! format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} ! ins_encode %{ ! int vector_len = 1; ! __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl16I_evex(vecZ dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 16 && UseAVX > 2); ! match(Set dst (ReplicateI src)); ! format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 16 && UseAVX > 2); ! match(Set dst (ReplicateI (LoadI mem))); ! format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl4I_imm_evex(vecX dst, immI con) %{ ! predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateI con)); ! format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" ! "vpbroadcastd $dst,$dst\t! replicate4I" %} ! ins_encode %{ ! int vector_len = 0; ! __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); ! __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl8I_imm_evex(vecY dst, immI con) %{ ! predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateI con)); ! format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" ! "vpbroadcastd $dst,$dst\t! replicate8I" %} ! ins_encode %{ ! int vector_len = 1; ! __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); ! __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl16I_imm_evex(vecZ dst, immI con) %{ ! predicate(n->as_Vector()->length() == 16 && UseAVX > 2); ! match(Set dst (ReplicateI con)); ! format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" ! "vpbroadcastd $dst,$dst\t! replicate16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); ! __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 16 && UseAVX > 2); match(Set dst (ReplicateI zero)); format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). int vector_len = 2;
*** 3494,3874 **** ins_pipe( fpu_reg_reg ); %} // Replicate long (8 byte) scalar to be vector #ifdef _LP64 ! instruct Repl2L(vecX dst, rRegL src) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateL src)); ! format %{ "movdq $dst,$src\n\t" ! "punpcklqdq $dst,$dst\t! replicate2L" %} ! ins_encode %{ ! __ movdq($dst$$XMMRegister, $src$$Register); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl4L(vecY dst, rRegL src) %{ ! predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateL src)); ! format %{ "movdq $dst,$src\n\t" ! "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate4L" %} ins_encode %{ ! __ movdq($dst$$XMMRegister, $src$$Register); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct Repl8L(vecZ dst, rRegL src) %{ ! predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateL src)); ! format %{ "movdq $dst,$src\n\t" ! "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" ! "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} ins_encode %{ ! __ movdq($dst$$XMMRegister, $src$$Register); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} #else // _LP64 ! instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateL src)); ! effect(TEMP dst, USE src, TEMP tmp); ! format %{ "movdl $dst,$src.lo\n\t" ! "movdl $tmp,$src.hi\n\t" ! "punpckldq $dst,$tmp\n\t" ! "punpcklqdq $dst,$dst\t! replicate2L"%} ! ins_encode %{ ! __ movdl($dst$$XMMRegister, $src$$Register); ! __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); ! __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ ! predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateL src)); effect(TEMP dst, USE src, TEMP tmp); format %{ "movdl $dst,$src.lo\n\t" "movdl $tmp,$src.hi\n\t" "punpckldq $dst,$tmp\n\t" ! "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate4L" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct Repl8L(vecZ dst, eRegL src, regD tmp) %{ ! predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateL src)); effect(TEMP dst, USE src, TEMP tmp); format %{ "movdl $dst,$src.lo\n\t" "movdl $tmp,$src.hi\n\t" "punpckldq $dst,$tmp\n\t" ! "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" ! "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} #endif // _LP64 ! // Replicate long (8 byte) scalar immediate to be vector by loading from const table. ! instruct Repl2L_imm(vecX dst, immL con) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateL con)); ! format %{ "movq $dst,[$constantaddress]\n\t" ! "punpcklqdq $dst,$dst\t! replicate2L($con)" %} ! ins_encode %{ ! __ movq($dst$$XMMRegister, $constantaddress($con)); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct Repl4L_imm(vecY dst, immL con) %{ ! predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateL con)); format %{ "movq $dst,[$constantaddress]\n\t" ! "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress($con)); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct Repl8L_imm(vecZ dst, immL con) %{ ! predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateL con)); format %{ "movq $dst,[$constantaddress]\n\t" ! "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! lower replicate4L($con)\n\t" ! "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress($con)); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // Long could be loaded into xmm register directly from memory. ! instruct Repl2L_mem(vecX dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateL (LoadL mem))); ! format %{ "movq $dst,$mem\n\t" ! "punpcklqdq $dst,$dst\t! replicate2L" %} ! ins_encode %{ ! __ movq($dst$$XMMRegister, $mem$$Address); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct Repl4L_mem(vecY dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateL (LoadL mem))); ! format %{ "movq $dst,$mem\n\t" ! "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate4L" %} ins_encode %{ ! __ movq($dst$$XMMRegister, $mem$$Address); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct Repl8L_mem(vecZ dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateL (LoadL mem))); ! format %{ "movq $dst,$mem\n\t" ! "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" ! "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} ins_encode %{ ! __ movq($dst$$XMMRegister, $mem$$Address); ! __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! // Replicate long (8 byte) scalar zero to be vector ! instruct Repl2L_zero(vecX dst, immL0 zero) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateL zero)); ! format %{ "pxor $dst,$dst\t! replicate2L zero" %} ! ins_encode %{ ! __ pxor($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl4L_zero(vecY dst, immL0 zero) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateL zero)); ! format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} ! ins_encode %{ ! // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). ! int vector_len = 1; ! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl8L_zero(vecZ dst, immL0 zero) %{ ! predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateL zero)); format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). int vector_len = 2; __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} ! // Replicate float (4 byte) scalar to be vector ! instruct Repl2F(vecD dst, regF src) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateF src)); ! format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} ! ins_encode %{ ! __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl4F(vecX dst, regF src) %{ ! predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateF src)); ! format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} ins_encode %{ ! __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); %} ins_pipe( pipe_slow ); %} ! instruct Repl8F(vecY dst, regF src) %{ ! predicate(n->as_Vector()->length() == 8); ! match(Set dst (ReplicateF src)); ! format %{ "pshufd $dst,$src,0x00\n\t" ! "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} ins_encode %{ ! __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); ! __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct Repl16F(vecZ dst, regF src) %{ ! predicate(n->as_Vector()->length() == 16); match(Set dst (ReplicateF src)); ! format %{ "pshufd $dst,$src,0x00\n\t" ! "vinsertf128h $dst,$dst,$dst\t! lower replicate8F\n\t" ! "vinsertf64x4h $dst k0,$dst,$dst\t! lower replicate8F" %} ins_encode %{ ! __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); ! __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! // Replicate float (4 byte) scalar zero to be vector ! instruct Repl2F_zero(vecD dst, immF0 zero) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateF zero)); ! format %{ "xorps $dst,$dst\t! replicate2F zero" %} ! ins_encode %{ ! __ xorps($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl4F_zero(vecX dst, immF0 zero) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateF zero)); ! format %{ "xorps $dst,$dst\t! replicate4F zero" %} ! ins_encode %{ ! __ xorps($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl8F_zero(vecY dst, immF0 zero) %{ ! predicate(n->as_Vector()->length() == 8); ! match(Set dst (ReplicateF zero)); ! format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} ins_encode %{ ! int vector_len = 1; ! __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ! ins_pipe( fpu_reg_reg ); %} ! instruct Repl16F_zero(vecZ dst, immF0 zero) %{ ! predicate(n->as_Vector()->length() == 16); match(Set dst (ReplicateF zero)); format %{ "vxorps $dst k0,$dst,$dst\t! replicate16F zero" %} ins_encode %{ int vector_len = 2; __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} ! // Replicate double (8 bytes) scalar to be vector ! instruct Repl2D(vecX dst, regD src) %{ ! predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateD src)); ! format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} ins_encode %{ ! __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); %} ins_pipe( pipe_slow ); %} ! instruct Repl4D(vecY dst, regD src) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateD src)); ! format %{ "pshufd $dst,$src,0x44\n\t" ! "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} ins_encode %{ ! __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); ! __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct Repl8D(vecZ dst, regD src) %{ ! predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateD src)); ! format %{ "pshufd $dst,$src,0x44\n\t" ! "vinsertf128h $dst,$dst,$dst\t! lower replicate4D\n\t" ! "vinsertf64x4h $dst k0,$dst,$dst\t! upper replicate4D" %} ins_encode %{ ! __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); ! __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! // Replicate double (8 byte) scalar zero to be vector ! instruct Repl2D_zero(vecX dst, immD0 zero) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateD zero)); ! format %{ "xorpd $dst,$dst\t! replicate2D zero" %} ! ins_encode %{ ! __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl4D_zero(vecY dst, immD0 zero) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (ReplicateD zero)); ! format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} ins_encode %{ ! int vector_len = 1; ! __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ! ins_pipe( fpu_reg_reg ); %} ! instruct Repl8D_zero(vecZ dst, immD0 zero) %{ ! predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateD zero)); format %{ "vxorpd $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} ins_encode %{ int vector_len = 2; __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); --- 4085,4314 ---- ins_pipe( fpu_reg_reg ); %} // Replicate long (8 byte) scalar to be vector #ifdef _LP64 ! instruct Repl4L_evex(vecY dst, rRegL src) %{ ! predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateL src)); ! format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} ins_encode %{ ! int vector_len = 1; ! __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct Repl8L_evex(vecZ dst, rRegL src) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateL src)); ! format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} ins_encode %{ ! int vector_len = 2; ! __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); %} ins_pipe( pipe_slow ); %} #else // _LP64 ! instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ ! predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateL src)); effect(TEMP dst, USE src, TEMP tmp); format %{ "movdl $dst,$src.lo\n\t" "movdl $tmp,$src.hi\n\t" "punpckldq $dst,$tmp\n\t" ! "vpbroadcastq $dst,$dst\t! replicate4L" %} ins_encode %{ + int vector_len = 1; __ movdl($dst$$XMMRegister, $src$$Register); __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); ! __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateL src)); effect(TEMP dst, USE src, TEMP tmp); format %{ "movdl $dst,$src.lo\n\t" "movdl $tmp,$src.hi\n\t" "punpckldq $dst,$tmp\n\t" ! "vpbroadcastq $dst,$dst\t! replicate8L" %} ins_encode %{ + int vector_len = 2; __ movdl($dst$$XMMRegister, $src$$Register); __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); ! __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} #endif // _LP64 ! instruct Repl4L_imm_evex(vecY dst, immL con) %{ ! predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateL con)); format %{ "movq $dst,[$constantaddress]\n\t" ! "vpbroadcastq $dst,$dst\t! replicate4L" %} ins_encode %{ + int vector_len = 1; __ movq($dst$$XMMRegister, $constantaddress($con)); ! __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct Repl8L_imm_evex(vecZ dst, immL con) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateL con)); format %{ "movq $dst,[$constantaddress]\n\t" ! "vpbroadcastq $dst,$dst\t! replicate8L" %} ins_encode %{ + int vector_len = 2; __ movq($dst$$XMMRegister, $constantaddress($con)); ! __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct Repl4L_mem_evex(vecY dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateL (LoadL mem))); ! format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} ins_encode %{ ! int vector_len = 1; ! __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateL (LoadL mem))); ! format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} ins_encode %{ ! int vector_len = 2; ! __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateL zero)); format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). int vector_len = 2; __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} ! instruct Repl8F_evex(vecY dst, regF src) %{ ! predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateF src)); ! format %{ "vbroadcastss $dst,$src\t! replicate8F" %} ins_encode %{ ! int vector_len = 1; ! __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct Repl8F_mem_evex(vecY dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateF (LoadF mem))); ! format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} ins_encode %{ ! int vector_len = 1; ! __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct Repl16F_evex(vecZ dst, regF src) %{ ! predicate(n->as_Vector()->length() == 16 && UseAVX > 2); match(Set dst (ReplicateF src)); ! format %{ "vbroadcastss $dst,$src\t! replicate16F" %} ins_encode %{ ! int vector_len = 2; ! __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 16 && UseAVX > 2); ! match(Set dst (ReplicateF (LoadF mem))); ! format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} ins_encode %{ ! int vector_len = 2; ! __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); %} ! ins_pipe( pipe_slow ); %} ! instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ ! predicate(n->as_Vector()->length() == 16 && UseAVX > 2); match(Set dst (ReplicateF zero)); format %{ "vxorps $dst k0,$dst,$dst\t! replicate16F zero" %} ins_encode %{ int vector_len = 2; __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} ! instruct Repl4D_evex(vecY dst, regD src) %{ ! predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateD src)); ! format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} ins_encode %{ ! int vector_len = 1; ! __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct Repl4D_mem_evex(vecY dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); ! match(Set dst (ReplicateD (LoadD mem))); ! format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} ins_encode %{ ! int vector_len = 1; ! __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct Repl8D_evex(vecZ dst, regD src) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateD src)); ! format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} ins_encode %{ ! int vector_len = 2; ! __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 2); ! match(Set dst (ReplicateD (LoadD mem))); ! format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} ins_encode %{ ! int vector_len = 2; ! __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); %} ! ins_pipe( pipe_slow ); %} ! instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ ! predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateD zero)); format %{ "vxorpd $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} ins_encode %{ int vector_len = 2; __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
*** 4961,4975 **** ins_pipe( pipe_slow ); %} instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVB src1 src2)); ! format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} ins_encode %{ int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8B(vecD dst, vecD src) %{ --- 5401,5426 ---- ins_pipe( pipe_slow ); %} instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVB src1 src2)); ! format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVB src (LoadVector mem))); ! format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} ins_encode %{ int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8B(vecD dst, vecD src) %{
*** 4991,5000 **** --- 5442,5462 ---- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (AddVB src (LoadVector mem))); + format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vadd16B(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 16); match(Set dst (AddVB dst src)); format %{ "paddb $dst,$src\t! add packed16B" %} ins_encode %{
*** 5089,5098 **** --- 5551,5571 ---- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vadd4S(vecD dst, vecD src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (AddVS dst src)); format %{ "paddw $dst,$src\t! add packed4S" %} ins_encode %{
*** 5110,5119 **** --- 5583,5603 ---- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vadd8S(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 8); match(Set dst (AddVS dst src)); format %{ "paddw $dst,$src\t! add packed8S" %} ins_encode %{
*** 5208,5217 **** --- 5692,5712 ---- __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVI src (LoadVector mem))); + format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} + ins_encode %{ + int vector_len = 0; + __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vadd4I(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (AddVI dst src)); format %{ "paddd $dst,$src\t! add packed4I" %} ins_encode %{
*** 5383,5392 **** --- 5878,5898 ---- __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVF src (LoadVector mem))); + format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} + ins_encode %{ + int vector_len = 0; + __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vadd4F(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (AddVF dst src)); format %{ "addps $dst,$src\t! add packed4F" %} ins_encode %{
*** 5560,5569 **** --- 6066,6086 ---- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vsub8B(vecD dst, vecD src) %{ predicate(n->as_Vector()->length() == 8); match(Set dst (SubVB dst src)); format %{ "psubb $dst,$src\t! sub packed8B" %} ins_encode %{
*** 5581,5590 **** --- 6098,6118 ---- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vsub16B(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 16); match(Set dst (SubVB dst src)); format %{ "psubb $dst,$src\t! sub packed16B" %} ins_encode %{
*** 5679,5688 **** --- 6207,6227 ---- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (SubVS src (LoadVector mem))); + format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vsub4S(vecD dst, vecD src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (SubVS dst src)); format %{ "psubw $dst,$src\t! sub packed4S" %} ins_encode %{
*** 5700,5709 **** --- 6239,6259 ---- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (SubVS src (LoadVector mem))); + format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vsub8S(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 8); match(Set dst (SubVS dst src)); format %{ "psubw $dst,$src\t! sub packed8S" %} ins_encode %{
*** 5798,5807 **** --- 6348,6368 ---- __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (SubVI src (LoadVector mem))); + format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} + ins_encode %{ + int vector_len = 0; + __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vsub4I(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (SubVI dst src)); format %{ "psubd $dst,$src\t! sub packed4I" %} ins_encode %{
*** 5973,5982 **** --- 6534,6554 ---- __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (SubVF src (LoadVector mem))); + format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} + ins_encode %{ + int vector_len = 0; + __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vsub4F(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (SubVF dst src)); format %{ "subps $dst,$src\t! sub packed4F" %} ins_encode %{
*** 6150,6159 **** --- 6722,6742 ---- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (MulVS src (LoadVector mem))); + format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vmul4S(vecD dst, vecD src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (MulVS dst src)); format %{ "pmullw $dst,$src\t! mul packed4S" %} ins_encode %{
*** 6171,6180 **** --- 6754,6774 ---- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (MulVS src (LoadVector mem))); + format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vmul8S(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 8); match(Set dst (MulVS dst src)); format %{ "pmullw $dst,$src\t! mul packed8S" %} ins_encode %{
*** 6269,6285 **** __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); ! match(Set dst (MulVL src1 src2)); ! format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} ins_encode %{ int vector_len = 0; ! __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4I(vecX dst, vecX src) %{ --- 6863,6879 ---- __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (MulVI src (LoadVector mem))); ! format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} ins_encode %{ int vector_len = 0; ! __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4I(vecX dst, vecX src) %{
*** 6312,6321 **** --- 6906,6937 ---- __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); + match(Set dst (MulVL src1 src2)); + format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} + ins_encode %{ + int vector_len = 0; + __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); + %} + + instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); + match(Set dst (MulVL src (LoadVector mem))); + format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} + ins_encode %{ + int vector_len = 0; + __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); match(Set dst (MulVL src1 src2)); format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} ins_encode %{
*** 6334,6371 **** __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 8); ! match(Set dst (MulVI src1 src2)); ! format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} ins_encode %{ ! int vector_len = 1; ! __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); ! match(Set dst (MulVL src1 src2)); ! format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} ins_encode %{ int vector_len = 2; ! __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (MulVI src1 src2)); ! format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} ins_encode %{ ! int vector_len = 2; __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} --- 6950,6987 ---- __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); ! match(Set dst (MulVL src1 src2)); ! format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} ins_encode %{ ! int vector_len = 2; ! __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); ! match(Set dst (MulVL src (LoadVector mem))); ! format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} ins_encode %{ int vector_len = 2; ! __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (MulVI src1 src2)); ! format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} ins_encode %{ ! int vector_len = 1; __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %}
*** 6378,6394 **** __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); ! match(Set dst (MulVL src (LoadVector mem))); ! format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} ins_encode %{ int vector_len = 2; ! __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ --- 6994,7010 ---- __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (MulVI src1 src2)); ! format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} ins_encode %{ int vector_len = 2; ! __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{
*** 6422,6431 **** --- 7038,7058 ---- __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (MulVF src (LoadVector mem))); + format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} + ins_encode %{ + int vector_len = 0; + __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vmul4F(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (MulVF dst src)); format %{ "mulps $dst,$src\t! mul packed4F" %} ins_encode %{
*** 6599,6608 **** --- 7226,7246 ---- __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (DivVF src (LoadVector mem))); + format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} + ins_encode %{ + int vector_len = 0; + __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vdiv4F(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (DivVF dst src)); format %{ "divps $dst,$src\t! div packed4F" %} ins_encode %{
*** 7876,7885 **** --- 8514,8534 ---- __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); + match(Set dst (AndV src (LoadVector mem))); + format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} + ins_encode %{ + int vector_len = 0; + __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vand8B(vecD dst, vecD src) %{ predicate(n->as_Vector()->length_in_bytes() == 8); match(Set dst (AndV dst src)); format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} ins_encode %{
*** 7897,7906 **** --- 8546,8566 ---- __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); + match(Set dst (AndV src (LoadVector mem))); + format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} + ins_encode %{ + int vector_len = 0; + __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vand16B(vecX dst, vecX src) %{ predicate(n->as_Vector()->length_in_bytes() == 16); match(Set dst (AndV dst src)); format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} ins_encode %{
*** 7996,8005 **** --- 8656,8676 ---- __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); + match(Set dst (OrV src (LoadVector mem))); + format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} + ins_encode %{ + int vector_len = 0; + __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vor8B(vecD dst, vecD src) %{ predicate(n->as_Vector()->length_in_bytes() == 8); match(Set dst (OrV dst src)); format %{ "por $dst,$src\t! or vectors (8 bytes)" %} ins_encode %{
*** 8017,8026 **** --- 8688,8708 ---- __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); + match(Set dst (OrV src (LoadVector mem))); + format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} + ins_encode %{ + int vector_len = 0; + __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vor16B(vecX dst, vecX src) %{ predicate(n->as_Vector()->length_in_bytes() == 16); match(Set dst (OrV dst src)); format %{ "por $dst,$src\t! or vectors (16 bytes)" %} ins_encode %{
*** 8116,8125 **** --- 8798,8818 ---- __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); + match(Set dst (XorV src (LoadVector mem))); + format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} + ins_encode %{ + int vector_len = 0; + __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vxor8B(vecD dst, vecD src) %{ predicate(n->as_Vector()->length_in_bytes() == 8); match(Set dst (XorV dst src)); format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} ins_encode %{
*** 8137,8146 **** --- 8830,8850 ---- __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); + match(Set dst (XorV src (LoadVector mem))); + format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} + ins_encode %{ + int vector_len = 0; + __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vxor16B(vecX dst, vecX src) %{ predicate(n->as_Vector()->length_in_bytes() == 16); match(Set dst (XorV dst src)); format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} ins_encode %{
< prev index next >