src/cpu/x86/vm/x86.ad
Index
Unified diffs
Context diffs
Sdiffs
Patch
New
Old
Previous File
Next File
hotspot Cdiff src/cpu/x86/vm/x86.ad
src/cpu/x86/vm/x86.ad
Print this page
rev 10354 : imported patch vextrinscleanup2
*** 3177,3209 ****
match(Set dst (ReplicateB src));
format %{ "movd $dst,$src\n\t"
"punpcklbw $dst,$dst\n\t"
"pshuflw $dst,$dst,0x00\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct Repl32B_mem(vecY dst, memory mem) %{
predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB (LoadB mem)));
format %{ "punpcklbw $dst,$mem\n\t"
"pshuflw $dst,$dst,0x00\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
ins_encode %{
__ punpcklbw($dst$$XMMRegister, $mem$$Address);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16B_imm(vecX dst, immI con) %{
--- 3177,3209 ----
match(Set dst (ReplicateB src));
format %{ "movd $dst,$src\n\t"
"punpcklbw $dst,$dst\n\t"
"pshuflw $dst,$dst,0x00\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128 $dst,$dst,$dst,0x1\t! replicate32B" %}
ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
instruct Repl32B_mem(vecY dst, memory mem) %{
predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB (LoadB mem)));
format %{ "punpcklbw $dst,$mem\n\t"
"pshuflw $dst,$dst,0x00\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128 $dst,$dst,$dst,0x1\t! replicate32B" %}
ins_encode %{
__ punpcklbw($dst$$XMMRegister, $mem$$Address);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16B_imm(vecX dst, immI con) %{
*** 3221,3235 ****
instruct Repl32B_imm(vecY dst, immI con) %{
predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB con));
format %{ "movq $dst,[$constantaddress]\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4S(vecD dst, rRegI src) %{
--- 3221,3235 ----
instruct Repl32B_imm(vecY dst, immI con) %{
predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB con));
format %{ "movq $dst,[$constantaddress]\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128 $dst,$dst,$dst,0x1\t! lreplicate32B($con)" %}
ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4S(vecD dst, rRegI src) %{
*** 3296,3339 ****
predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS src));
format %{ "movd $dst,$src\n\t"
"pshuflw $dst,$dst,0x00\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16S_mem(vecY dst, memory mem) %{
predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS (LoadS mem)));
format %{ "pshuflw $dst,$mem,0x00\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
ins_encode %{
__ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16S_imm(vecY dst, immI con) %{
predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS con));
format %{ "movq $dst,[$constantaddress]\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4I(vecX dst, rRegI src) %{
--- 3296,3339 ----
predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS src));
format %{ "movd $dst,$src\n\t"
"pshuflw $dst,$dst,0x00\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128 $dst,$dst,$dst,0x1\t! replicate16S" %}
ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16S_mem(vecY dst, memory mem) %{
predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS (LoadS mem)));
format %{ "pshuflw $dst,$mem,0x00\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128 $dst,$dst,$dst,0x1\t! replicate16S" %}
ins_encode %{
__ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16S_imm(vecY dst, immI con) %{
predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS con));
format %{ "movq $dst,[$constantaddress]\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128 $dst,$dst,$dst,0x1\t! replicate16S($con)" %}
ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4I(vecX dst, rRegI src) %{
*** 3361,3387 ****
instruct Repl8I(vecY dst, rRegI src) %{
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateI src));
format %{ "movd $dst,$src\n\t"
"pshufd $dst,$dst,0x00\n\t"
! "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8I_mem(vecY dst, memory mem) %{
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateI (LoadI mem)));
format %{ "pshufd $dst,$mem,0x00\n\t"
! "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4I_imm(vecX dst, immI con) %{
--- 3361,3387 ----
instruct Repl8I(vecY dst, rRegI src) %{
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateI src));
format %{ "movd $dst,$src\n\t"
"pshufd $dst,$dst,0x00\n\t"
! "vinserti128 $dst,$dst,$dst,0x1\t! replicate8I" %}
ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8I_mem(vecY dst, memory mem) %{
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateI (LoadI mem)));
format %{ "pshufd $dst,$mem,0x00\n\t"
! "vinserti128 $dst,$dst,$dst,0x1\t! replicate8I" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4I_imm(vecX dst, immI con) %{
*** 3399,3413 ****
instruct Repl8I_imm(vecY dst, immI con) %{
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateI con));
format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128h $dst,$dst,$dst" %}
ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
// Long could be loaded into xmm register directly from memory.
--- 3399,3413 ----
instruct Repl8I_imm(vecY dst, immI con) %{
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateI con));
format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128 $dst,$dst,$dst,0x1" %}
ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
// Long could be loaded into xmm register directly from memory.
*** 3428,3442 ****
instruct Repl4L(vecY dst, rRegL src) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateL src));
format %{ "movdq $dst,$src\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
ins_encode %{
__ movdq($dst$$XMMRegister, $src$$Register);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
#else // _LP64
instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
--- 3428,3442 ----
instruct Repl4L(vecY dst, rRegL src) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateL src));
format %{ "movdq $dst,$src\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128 $dst,$dst,$dst,0x1\t! replicate4L" %}
ins_encode %{
__ movdq($dst$$XMMRegister, $src$$Register);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
#else // _LP64
instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
*** 3445,3490 ****
effect(TEMP dst, USE src, TEMP tmp);
format %{ "movdl $dst,$src.lo\n\t"
"movdl $tmp,$src.hi\n\t"
"punpckldq $dst,$tmp\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
#endif // _LP64
instruct Repl4L_imm(vecY dst, immL con) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateL con));
format %{ "movq $dst,[$constantaddress]\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress($con));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4L_mem(vecY dst, memory mem) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateL (LoadL mem)));
format %{ "movq $dst,$mem\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
ins_encode %{
__ movq($dst$$XMMRegister, $mem$$Address);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct Repl2F_mem(vecD dst, memory mem) %{
--- 3445,3490 ----
effect(TEMP dst, USE src, TEMP tmp);
format %{ "movdl $dst,$src.lo\n\t"
"movdl $tmp,$src.hi\n\t"
"punpckldq $dst,$tmp\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128 $dst,$dst,$dst,0x1\t! replicate4L" %}
ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
#endif // _LP64
instruct Repl4L_imm(vecY dst, immL con) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateL con));
format %{ "movq $dst,[$constantaddress]\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128 $dst,$dst,$dst,0x1\t! replicate4L($con)" %}
ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress($con));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4L_mem(vecY dst, memory mem) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateL (LoadL mem)));
format %{ "movq $dst,$mem\n\t"
"punpcklqdq $dst,$dst\n\t"
! "vinserti128 $dst,$dst,$dst,0x1\t! replicate4L" %}
ins_encode %{
__ movq($dst$$XMMRegister, $mem$$Address);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
instruct Repl2F_mem(vecD dst, memory mem) %{
*** 3509,3534 ****
instruct Repl8F(vecY dst, regF src) %{
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateF src));
format %{ "pshufd $dst,$src,0x00\n\t"
! "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
! __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8F_mem(vecY dst, memory mem) %{
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateF (LoadF mem)));
format %{ "pshufd $dst,$mem,0x00\n\t"
! "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
! __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct Repl2F_zero(vecD dst, immF0 zero) %{
--- 3509,3534 ----
instruct Repl8F(vecY dst, regF src) %{
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateF src));
format %{ "pshufd $dst,$src,0x00\n\t"
! "vinsertf128 $dst,$dst,$dst,0x1\t! replicate8F" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
! __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8F_mem(vecY dst, memory mem) %{
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateF (LoadF mem)));
format %{ "pshufd $dst,$mem,0x00\n\t"
! "vinsertf128 $dst,$dst,$dst,0x1\t! replicate8F" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
! __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
instruct Repl2F_zero(vecD dst, immF0 zero) %{
*** 3574,3599 ****
instruct Repl4D(vecY dst, regD src) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateD src));
format %{ "pshufd $dst,$src,0x44\n\t"
! "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
! __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4D_mem(vecY dst, memory mem) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateD (LoadD mem)));
format %{ "pshufd $dst,$mem,0x44\n\t"
! "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
! __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
// Replicate double (8 byte) scalar zero to be vector
--- 3574,3599 ----
instruct Repl4D(vecY dst, regD src) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateD src));
format %{ "pshufd $dst,$src,0x44\n\t"
! "vinsertf128 $dst,$dst,$dst,0x1\t! replicate4D" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
! __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4D_mem(vecY dst, memory mem) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateD (LoadD mem)));
format %{ "pshufd $dst,$mem,0x44\n\t"
! "vinsertf128 $dst,$dst,$dst,0x1\t! replicate4D" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
! __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
// Replicate double (8 byte) scalar zero to be vector
*** 4789,4808 ****
predicate(VM_Version::supports_avxonly());
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vphaddd $tmp,$src2,$src2\n\t"
"vphaddd $tmp,$tmp,$tmp2\n\t"
! "vextracti128 $tmp2,$tmp\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t"
"movd $tmp2,$src1\n\t"
"vpaddd $tmp2,$tmp2,$tmp\n\t"
"movd $dst,$tmp2\t! add reduction8I" %}
ins_encode %{
int vector_len = 1;
__ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
__ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
! __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
__ movdl($tmp2$$XMMRegister, $src1$$Register);
__ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
--- 4789,4808 ----
predicate(VM_Version::supports_avxonly());
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vphaddd $tmp,$src2,$src2\n\t"
"vphaddd $tmp,$tmp,$tmp2\n\t"
! "vextracti128 $tmp2,$tmp,0x1\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t"
"movd $tmp2,$src1\n\t"
"vpaddd $tmp2,$tmp2,$tmp\n\t"
"movd $dst,$tmp2\t! add reduction8I" %}
ins_encode %{
int vector_len = 1;
__ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
__ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
! __ vextracti128($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
__ movdl($tmp2$$XMMRegister, $src1$$Register);
__ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
*** 4811,4832 ****
instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
! format %{ "vextracti128 $tmp,$src2\n\t"
"vpaddd $tmp,$tmp,$src2\n\t"
"pshufd $tmp2,$tmp,0xE\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t"
"pshufd $tmp2,$tmp,0x1\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t"
"movd $tmp2,$src1\n\t"
"vpaddd $tmp2,$tmp,$tmp2\n\t"
"movd $dst,$tmp2\t! add reduction8I" %}
ins_encode %{
int vector_len = 0;
! __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
--- 4811,4832 ----
instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
! format %{ "vextracti128 $tmp,$src2,0x1\n\t"
"vpaddd $tmp,$tmp,$src2\n\t"
"pshufd $tmp2,$tmp,0xE\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t"
"pshufd $tmp2,$tmp,0x1\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t"
"movd $tmp2,$src1\n\t"
"vpaddd $tmp2,$tmp,$tmp2\n\t"
"movd $dst,$tmp2\t! add reduction8I" %}
ins_encode %{
int vector_len = 0;
! __ vextracti128($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
*** 4841,4863 ****
predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t"
"vpaddd $tmp3,$tmp3,$src2\n\t"
! "vextracti128 $tmp,$tmp3\n\t"
"vpaddd $tmp,$tmp,$tmp3\n\t"
"pshufd $tmp2,$tmp,0xE\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t"
"pshufd $tmp2,$tmp,0x1\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t"
"movd $tmp2,$src1\n\t"
"vpaddd $tmp2,$tmp,$tmp2\n\t"
"movd $dst,$tmp2\t! mul reduction16I" %}
ins_encode %{
! __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
__ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
! __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
--- 4841,4863 ----
predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t"
"vpaddd $tmp3,$tmp3,$src2\n\t"
! "vextracti128 $tmp,$tmp3,0x1\n\t"
"vpaddd $tmp,$tmp,$tmp3\n\t"
"pshufd $tmp2,$tmp,0xE\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t"
"pshufd $tmp2,$tmp,0x1\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t"
"movd $tmp2,$src1\n\t"
"vpaddd $tmp2,$tmp,$tmp2\n\t"
"movd $dst,$tmp2\t! mul reduction16I" %}
ins_encode %{
! __ vextracti64x4($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
! __ vextracti128($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x1);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
*** 4890,4908 ****
instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
! format %{ "vextracti128 $tmp,$src2\n\t"
"vpaddq $tmp2,$tmp,$src2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"movdq $tmp,$src1\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! add reduction4L" %}
ins_encode %{
! __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
__ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ movdq($tmp$$XMMRegister, $src1$$Register);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
--- 4890,4908 ----
instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
! format %{ "vextracti128 $tmp,$src2,0x1\n\t"
"vpaddq $tmp2,$tmp,$src2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"movdq $tmp,$src1\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! add reduction4L" %}
ins_encode %{
! __ vextracti128($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ movdq($tmp$$XMMRegister, $src1$$Register);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
*** 4915,4935 ****
predicate(UseAVX > 2);
match(Set dst (AddReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t"
"vpaddq $tmp2,$tmp2,$src2\n\t"
! "vextracti128 $tmp,$tmp2\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"movdq $tmp,$src1\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! add reduction8L" %}
ins_encode %{
! __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
! __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ movdq($tmp$$XMMRegister, $src1$$Register);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
--- 4915,4935 ----
predicate(UseAVX > 2);
match(Set dst (AddReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t"
"vpaddq $tmp2,$tmp2,$src2\n\t"
! "vextracti128 $tmp,$tmp2,0x1\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"movdq $tmp,$src1\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! add reduction8L" %}
ins_encode %{
! __ vextracti64x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
! __ vextracti128($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ movdq($tmp$$XMMRegister, $src1$$Register);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
*** 5024,5034 ****
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x02\n\t"
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
"vaddss $dst,$dst,$tmp\n\t"
! "vextractf128 $tmp2,$src2\n\t"
"vaddss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x02\n\t"
"vaddss $dst,$dst,$tmp\n\t"
--- 5024,5034 ----
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x02\n\t"
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
"vaddss $dst,$dst,$tmp\n\t"
! "vextractf128 $tmp2,$src2,0x1\n\t"
"vaddss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x02\n\t"
"vaddss $dst,$dst,$tmp\n\t"
*** 5040,5050 ****
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
--- 5040,5050 ----
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
*** 5063,5089 ****
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x02\n\t"
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
"vaddss $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2, 0x1\n\t"
"vaddss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x02\n\t"
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x03\n\t"
"vaddss $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2, 0x2\n\t"
"vaddss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x02\n\t"
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x03\n\t"
"vaddss $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2, 0x3\n\t"
"vaddss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x02\n\t"
"vaddss $dst,$dst,$tmp\n\t"
--- 5063,5089 ----
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x02\n\t"
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
"vaddss $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2,0x1\n\t"
"vaddss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x02\n\t"
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x03\n\t"
"vaddss $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2,0x2\n\t"
"vaddss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x02\n\t"
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x03\n\t"
"vaddss $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2,0x3\n\t"
"vaddss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x02\n\t"
"vaddss $dst,$dst,$tmp\n\t"
*** 5095,5121 ****
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
--- 5095,5121 ----
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
*** 5160,5178 ****
match(Set dst (AddReductionVD dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
format %{ "vaddsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t"
! "vextractf32x4h $tmp2,$src2, 0x1\n\t"
"vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\t! add reduction4D" %}
ins_encode %{
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
--- 5160,5178 ----
match(Set dst (AddReductionVD dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
format %{ "vaddsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2,0x1\n\t"
"vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\t! add reduction4D" %}
ins_encode %{
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
*** 5183,5217 ****
match(Set dst (AddReductionVD dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
format %{ "vaddsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2, 0x1\n\t"
"vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2, 0x2\n\t"
"vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2, 0x3\n\t"
"vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\t! add reduction8D" %}
ins_encode %{
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
--- 5183,5217 ----
match(Set dst (AddReductionVD dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
format %{ "vaddsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2,0x1\n\t"
"vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2,0x2\n\t"
"vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2,0x3\n\t"
"vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\t! add reduction8D" %}
ins_encode %{
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
*** 5305,5326 ****
instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
predicate(UseAVX > 0);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
! format %{ "vextracti128 $tmp,$src2\n\t"
"vpmulld $tmp,$tmp,$src2\n\t"
"pshufd $tmp2,$tmp,0xE\n\t"
"vpmulld $tmp,$tmp,$tmp2\n\t"
"pshufd $tmp2,$tmp,0x1\n\t"
"vpmulld $tmp,$tmp,$tmp2\n\t"
"movd $tmp2,$src1\n\t"
"vpmulld $tmp2,$tmp,$tmp2\n\t"
"movd $dst,$tmp2\t! mul reduction8I" %}
ins_encode %{
int vector_len = 0;
! __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
--- 5305,5326 ----
instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
predicate(UseAVX > 0);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
! format %{ "vextracti128 $tmp,$src2,0x1\n\t"
"vpmulld $tmp,$tmp,$src2\n\t"
"pshufd $tmp2,$tmp,0xE\n\t"
"vpmulld $tmp,$tmp,$tmp2\n\t"
"pshufd $tmp2,$tmp,0x1\n\t"
"vpmulld $tmp,$tmp,$tmp2\n\t"
"movd $tmp2,$src1\n\t"
"vpmulld $tmp2,$tmp,$tmp2\n\t"
"movd $dst,$tmp2\t! mul reduction8I" %}
ins_encode %{
int vector_len = 0;
! __ vextracti128($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
*** 5335,5357 ****
predicate(UseAVX > 2);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t"
"vpmulld $tmp3,$tmp3,$src2\n\t"
! "vextracti128 $tmp,$tmp3\n\t"
"vpmulld $tmp,$tmp,$src2\n\t"
"pshufd $tmp2,$tmp,0xE\n\t"
"vpmulld $tmp,$tmp,$tmp2\n\t"
"pshufd $tmp2,$tmp,0x1\n\t"
"vpmulld $tmp,$tmp,$tmp2\n\t"
"movd $tmp2,$src1\n\t"
"vpmulld $tmp2,$tmp,$tmp2\n\t"
"movd $dst,$tmp2\t! mul reduction16I" %}
ins_encode %{
! __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
__ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
! __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
--- 5335,5357 ----
predicate(UseAVX > 2);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t"
"vpmulld $tmp3,$tmp3,$src2\n\t"
! "vextracti128 $tmp,$tmp3,0x1\n\t"
"vpmulld $tmp,$tmp,$src2\n\t"
"pshufd $tmp2,$tmp,0xE\n\t"
"vpmulld $tmp,$tmp,$tmp2\n\t"
"pshufd $tmp2,$tmp,0x1\n\t"
"vpmulld $tmp,$tmp,$tmp2\n\t"
"movd $tmp2,$src1\n\t"
"vpmulld $tmp2,$tmp,$tmp2\n\t"
"movd $dst,$tmp2\t! mul reduction16I" %}
ins_encode %{
! __ vextracti64x4($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
! __ vextracti128($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x1);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
*** 5384,5402 ****
instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
match(Set dst (MulReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
! format %{ "vextracti128 $tmp,$src2\n\t"
"vpmullq $tmp2,$tmp,$src2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"movdq $tmp,$src1\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! mul reduction4L" %}
ins_encode %{
! __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
__ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ movdq($tmp$$XMMRegister, $src1$$Register);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
--- 5384,5402 ----
instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
match(Set dst (MulReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
! format %{ "vextracti128 $tmp,$src2,0x1\n\t"
"vpmullq $tmp2,$tmp,$src2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"movdq $tmp,$src1\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! mul reduction4L" %}
ins_encode %{
! __ vextracti128($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ movdq($tmp$$XMMRegister, $src1$$Register);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
*** 5409,5429 ****
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
match(Set dst (MulReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t"
"vpmullq $tmp2,$tmp2,$src2\n\t"
! "vextracti128 $tmp,$tmp2\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"movdq $tmp,$src1\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! mul reduction8L" %}
ins_encode %{
! __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
! __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ movdq($tmp$$XMMRegister, $src1$$Register);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
--- 5409,5429 ----
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
match(Set dst (MulReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t"
"vpmullq $tmp2,$tmp2,$src2\n\t"
! "vextracti128 $tmp,$tmp2,0x1\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"movdq $tmp,$src1\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! mul reduction8L" %}
ins_encode %{
! __ vextracti64x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
! __ vextracti128($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ movdq($tmp$$XMMRegister, $src1$$Register);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
*** 5518,5528 ****
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x02\n\t"
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
"vmulss $dst,$dst,$tmp\n\t"
! "vextractf128 $tmp2,$src2\n\t"
"vmulss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x02\n\t"
"vmulss $dst,$dst,$tmp\n\t"
--- 5518,5528 ----
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x02\n\t"
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
"vmulss $dst,$dst,$tmp\n\t"
! "vextractf128 $tmp2,$src2,0x1\n\t"
"vmulss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x02\n\t"
"vmulss $dst,$dst,$tmp\n\t"
*** 5534,5544 ****
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
--- 5534,5544 ----
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
*** 5557,5583 ****
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x02\n\t"
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
"vmulss $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2, 0x1\n\t"
"vmulss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x02\n\t"
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x03\n\t"
"vmulss $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2, 0x2\n\t"
"vmulss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x02\n\t"
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x03\n\t"
"vmulss $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2, 0x3\n\t"
"vmulss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x02\n\t"
"vmulss $dst,$dst,$tmp\n\t"
--- 5557,5583 ----
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x02\n\t"
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
"vmulss $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2,0x1\n\t"
"vmulss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x02\n\t"
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x03\n\t"
"vmulss $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2,0x2\n\t"
"vmulss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x02\n\t"
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x03\n\t"
"vmulss $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2,0x3\n\t"
"vmulss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x02\n\t"
"vmulss $dst,$dst,$tmp\n\t"
*** 5589,5615 ****
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
--- 5589,5615 ----
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
*** 5654,5672 ****
match(Set dst (MulReductionVD dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
format %{ "vmulsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\n\t"
! "vextractf128 $tmp2,$src2\n\t"
"vmulsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\t! mul reduction4D" %}
ins_encode %{
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
--- 5654,5672 ----
match(Set dst (MulReductionVD dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
format %{ "vmulsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\n\t"
! "vextractf128 $tmp2,$src2,0x1\n\t"
"vmulsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\t! mul reduction4D" %}
ins_encode %{
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
*** 5677,5711 ****
match(Set dst (MulReductionVD dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
format %{ "vmulsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2, 0x1\n\t"
"vmulsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2, 0x2\n\t"
"vmulsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2, 0x3\n\t"
"vmulsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\t! mul reduction8D" %}
ins_encode %{
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
--- 5677,5711 ----
match(Set dst (MulReductionVD dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
format %{ "vmulsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2,0x1\n\t"
"vmulsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2,0x2\n\t"
"vmulsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\n\t"
! "vextractf32x4 $tmp2,$src2,0x3\n\t"
"vmulsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\t! mul reduction8D" %}
ins_encode %{
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
! __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
src/cpu/x86/vm/x86.ad
Index
Unified diffs
Context diffs
Sdiffs
Patch
New
Old
Previous File
Next File