src/cpu/x86/vm/x86.ad
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File
*** old/src/cpu/x86/vm/x86.ad	Wed Mar  2 14:58:33 2016
--- new/src/cpu/x86/vm/x86.ad	Wed Mar  2 14:58:33 2016

*** 3177,3209 **** --- 3177,3209 ---- match(Set dst (ReplicateB src)); format %{ "movd $dst,$src\n\t" "punpcklbw $dst,$dst\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate32B" %} ! "vinserti128 $dst,$dst,$dst,0x1\t! replicate32B" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl32B_mem(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB (LoadB mem))); format %{ "punpcklbw $dst,$mem\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate32B" %} ! "vinserti128 $dst,$dst,$dst,0x1\t! replicate32B" %} ins_encode %{ __ punpcklbw($dst$$XMMRegister, $mem$$Address); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl16B_imm(vecX dst, immI con) %{
*** 3221,3235 **** --- 3221,3235 ---- instruct Repl32B_imm(vecY dst, immI con) %{ predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} ! "vinserti128 $dst,$dst,$dst,0x1\t! lreplicate32B($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl4S(vecD dst, rRegI src) %{
*** 3296,3339 **** --- 3296,3339 ---- predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS src)); format %{ "movd $dst,$src\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate16S" %} ! "vinserti128 $dst,$dst,$dst,0x1\t! replicate16S" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl16S_mem(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS (LoadS mem))); format %{ "pshuflw $dst,$mem,0x00\n\t" "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate16S" %} ! "vinserti128 $dst,$dst,$dst,0x1\t! replicate16S" %} ins_encode %{ __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl16S_imm(vecY dst, immI con) %{ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} ! "vinserti128 $dst,$dst,$dst,0x1\t! replicate16S($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl4I(vecX dst, rRegI src) %{
*** 3361,3387 **** --- 3361,3387 ---- instruct Repl8I(vecY dst, rRegI src) %{ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI src)); format %{ "movd $dst,$src\n\t" "pshufd $dst,$dst,0x00\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate8I" %} ! "vinserti128 $dst,$dst,$dst,0x1\t! replicate8I" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl8I_mem(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI (LoadI mem))); format %{ "pshufd $dst,$mem,0x00\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate8I" %} ! "vinserti128 $dst,$dst,$dst,0x1\t! replicate8I" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl4I_imm(vecX dst, immI con) %{
*** 3399,3413 **** --- 3399,3413 ---- instruct Repl8I_imm(vecY dst, immI con) %{ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI con)); format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst" %} ! "vinserti128 $dst,$dst,$dst,0x1" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} // Long could be loaded into xmm register directly from memory.
*** 3428,3442 **** --- 3428,3442 ---- instruct Repl4L(vecY dst, rRegL src) %{ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateL src)); format %{ "movdq $dst,$src\n\t" "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate4L" %} ! "vinserti128 $dst,$dst,$dst,0x1\t! replicate4L" %} ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} #else // _LP64 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
*** 3445,3490 **** --- 3445,3490 ---- effect(TEMP dst, USE src, TEMP tmp); format %{ "movdl $dst,$src.lo\n\t" "movdl $tmp,$src.hi\n\t" "punpckldq $dst,$tmp\n\t" "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate4L" %} ! "vinserti128 $dst,$dst,$dst,0x1\t! replicate4L" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} #endif // _LP64 instruct Repl4L_imm(vecY dst, immL con) %{ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateL con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} ! "vinserti128 $dst,$dst,$dst,0x1\t! replicate4L($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress($con)); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl4L_mem(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateL (LoadL mem))); format %{ "movq $dst,$mem\n\t" "punpcklqdq $dst,$dst\n\t" ! "vinserti128h $dst,$dst,$dst\t! replicate4L" %} ! "vinserti128 $dst,$dst,$dst,0x1\t! replicate4L" %} ins_encode %{ __ movq($dst$$XMMRegister, $mem$$Address); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl2F_mem(vecD dst, memory mem) %{
*** 3509,3534 **** --- 3509,3534 ---- instruct Repl8F(vecY dst, regF src) %{ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateF src)); format %{ "pshufd $dst,$src,0x00\n\t" ! "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} ! "vinsertf128 $dst,$dst,$dst,0x1\t! replicate8F" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); ! __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl8F_mem(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateF (LoadF mem))); format %{ "pshufd $dst,$mem,0x00\n\t" ! "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} ! "vinsertf128 $dst,$dst,$dst,0x1\t! replicate8F" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); ! __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl2F_zero(vecD dst, immF0 zero) %{
*** 3574,3599 **** --- 3574,3599 ---- instruct Repl4D(vecY dst, regD src) %{ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateD src)); format %{ "pshufd $dst,$src,0x44\n\t" ! "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} ! "vinsertf128 $dst,$dst,$dst,0x1\t! replicate4D" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); ! __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl4D_mem(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateD (LoadD mem))); format %{ "pshufd $dst,$mem,0x44\n\t" ! "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} ! "vinsertf128 $dst,$dst,$dst,0x1\t! replicate4D" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); ! __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} // Replicate double (8 byte) scalar zero to be vector
*** 4789,4808 **** --- 4789,4808 ---- predicate(VM_Version::supports_avxonly()); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "vphaddd $tmp,$tmp,$tmp2\n\t" ! "vextracti128 $tmp2,$tmp,0x1\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t" "movd $dst,$tmp2\t! add reduction8I" %} ins_encode %{ int vector_len = 1; __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); ! __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextracti128($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdl($dst$$Register, $tmp2$$XMMRegister); %}
*** 4811,4832 **** --- 4811,4832 ---- instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); ! format %{ "vextracti128 $tmp,$src2,0x1\n\t" "vpaddd $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! add reduction8I" %} ins_encode %{ int vector_len = 0; ! __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); ! __ vextracti128($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
*** 4841,4863 **** --- 4841,4863 ---- predicate(UseAVX > 2); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" "vpaddd $tmp3,$tmp3,$src2\n\t" ! "vextracti128 $tmp,$tmp3,0x1\n\t" "vpaddd $tmp,$tmp,$tmp3\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ ! __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); ! __ vextracti64x4($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); ! __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); ! __ vextracti128($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x1); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
*** 4890,4908 **** --- 4890,4908 ---- instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2); match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); ! format %{ "vextracti128 $tmp,$src2,0x1\n\t" "vpaddq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction4L" %} ins_encode %{ ! __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); ! __ vextracti128($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($tmp$$XMMRegister, $src1$$Register); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
*** 4915,4935 **** --- 4915,4935 ---- predicate(UseAVX > 2); match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" "vpaddq $tmp2,$tmp2,$src2\n\t" ! "vextracti128 $tmp,$tmp2,0x1\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction8L" %} ins_encode %{ ! __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); ! __ vextracti64x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); ! __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); ! __ vextracti128($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($tmp$$XMMRegister, $src1$$Register); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
*** 5024,5034 **** --- 5024,5034 ---- "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" ! "vextractf128 $tmp2,$src2,0x1\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vaddss $dst,$dst,$tmp\n\t"
*** 5040,5050 **** --- 5040,5050 ---- __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); ! __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
*** 5063,5089 **** --- 5063,5089 ---- "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x1\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x2\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x3\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vaddss $dst,$dst,$tmp\n\t"
*** 5095,5121 **** --- 5095,5121 ---- __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
*** 5160,5178 **** --- 5160,5178 ---- match(Set dst (AddReductionVD dst src2)); effect(TEMP tmp, TEMP dst, TEMP tmp2); format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" ! "vextractf32x4h $tmp2,$src2, 0x1\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} ins_encode %{ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow );
*** 5183,5217 **** --- 5183,5217 ---- match(Set dst (AddReductionVD dst src2)); effect(TEMP tmp, TEMP dst, TEMP tmp2); format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x1\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x2\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x3\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} ins_encode %{ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow );
*** 5305,5326 **** --- 5305,5326 ---- instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); ! format %{ "vextracti128 $tmp,$src2,0x1\n\t" "vpmulld $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction8I" %} ins_encode %{ int vector_len = 0; ! __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); ! __ vextracti128($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
*** 5335,5357 **** --- 5335,5357 ---- predicate(UseAVX > 2); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" "vpmulld $tmp3,$tmp3,$src2\n\t" ! "vextracti128 $tmp,$tmp3,0x1\n\t" "vpmulld $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ ! __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); ! __ vextracti64x4($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); ! __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); ! __ vextracti128($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x1); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
*** 5384,5402 **** --- 5384,5402 ---- instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); match(Set dst (MulReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); ! format %{ "vextracti128 $tmp,$src2,0x1\n\t" "vpmullq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction4L" %} ins_encode %{ ! __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); ! __ vextracti128($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($tmp$$XMMRegister, $src1$$Register); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
*** 5409,5429 **** --- 5409,5429 ---- predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); match(Set dst (MulReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" "vpmullq $tmp2,$tmp2,$src2\n\t" ! "vextracti128 $tmp,$tmp2,0x1\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction8L" %} ins_encode %{ ! __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); ! __ vextracti64x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); ! __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); ! __ vextracti128($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($tmp$$XMMRegister, $src1$$Register); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
*** 5518,5528 **** --- 5518,5528 ---- "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" ! "vextractf128 $tmp2,$src2,0x1\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vmulss $dst,$dst,$tmp\n\t"
*** 5534,5544 **** --- 5534,5544 ---- __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); ! __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
*** 5557,5583 **** --- 5557,5583 ---- "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x1\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x2\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x3\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vmulss $dst,$dst,$tmp\n\t"
*** 5589,5615 **** --- 5589,5615 ---- __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
*** 5654,5672 **** --- 5654,5672 ---- match(Set dst (MulReductionVD dst src2)); effect(TEMP tmp, TEMP dst, TEMP tmp2); format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" ! "vextractf128 $tmp2,$src2,0x1\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} ins_encode %{ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); ! __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow );
*** 5677,5711 **** --- 5677,5711 ---- match(Set dst (MulReductionVD dst src2)); effect(TEMP tmp, TEMP dst, TEMP tmp2); format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x1\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x2\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x3\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} ins_encode %{ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow );

src/cpu/x86/vm/x86.ad
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File