src/cpu/x86/vm/x86.ad
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File
*** old/src/cpu/x86/vm/x86.ad	Mon Mar 30 19:42:55 2015
--- new/src/cpu/x86/vm/x86.ad	Mon Mar 30 19:42:55 2015

*** 621,630 **** --- 621,646 ---- break; case Op_MulVI: if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX return false; break; + case Op_AddReductionVL: + if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here + return false; + case Op_AddReductionVI: + if (UseSSE < 3) // requires at least SSE3 + return false; + case Op_MulReductionVI: + if (UseSSE < 4) // requires at least SSE4 + return false; + case Op_AddReductionVF: + case Op_AddReductionVD: + case Op_MulReductionVF: + case Op_MulReductionVD: + if (UseSSE < 1) // requires at least SSE + return false; + break; case Op_CompareAndSwapL: #ifdef _LP64 case Op_CompareAndSwapP: #endif if (!VM_Version::supports_cx8())
*** 2530,2539 **** --- 2546,3123 ---- __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); %} ins_pipe( fpu_reg_reg ); %} + // ====================REDUCTION ARITHMETIC======================================= + + instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ + predicate(UseSSE > 2 && UseAVX == 0); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP tmp2, TEMP tmp); + format %{ "movdqu $tmp2,$src2\n\t" + "phaddd $tmp2,$tmp2\n\t" + "movd $tmp,$src1\n\t" + "paddd $tmp,$tmp2\n\t" + "movd $dst,$tmp\t! add reduction2I" %} + ins_encode %{ + __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); + __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); + __ movdl($tmp$$XMMRegister, $src1$$Register); + __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ movdl($dst$$Register, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ + predicate(UseAVX > 0); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "vphaddd $tmp,$src2,$src2\n\t" + "movd $tmp2,$src1\n\t" + "vpaddd $tmp2,$tmp2,$tmp\n\t" + "movd $dst,$tmp2\t! add reduction2I" %} + ins_encode %{ + __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false); + __ movdl($tmp2$$XMMRegister, $src1$$Register); + __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false); + __ movdl($dst$$Register, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ + predicate(UseSSE > 2 && UseAVX == 0); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP tmp2, TEMP tmp); + format %{ "movdqu $tmp2,$src2\n\t" + "phaddd $tmp2,$tmp2\n\t" + "phaddd $tmp2,$tmp2\n\t" + "movd $tmp,$src1\n\t" + "paddd $tmp,$tmp2\n\t" + "movd $dst,$tmp\t! add reduction4I" %} + ins_encode %{ + __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); + __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); + __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); + __ movdl($tmp$$XMMRegister, $src1$$Register); + __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ movdl($dst$$Register, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ + predicate(UseAVX > 0); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "vphaddd $tmp,$src2,$src2\n\t" + "vphaddd $tmp,$tmp,$tmp2\n\t" + "movd $tmp2,$src1\n\t" + "vpaddd $tmp2,$tmp2,$tmp\n\t" + "movd $dst,$tmp2\t! add reduction4I" %} + ins_encode %{ + __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false); + __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); + __ movdl($tmp2$$XMMRegister, $src1$$Register); + __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false); + __ movdl($dst$$Register, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ + predicate(UseAVX > 0); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "vphaddd $tmp,$src2,$src2\n\t" + "vphaddd $tmp,$tmp,$tmp2\n\t" + "vextractf128 $tmp2,$tmp\n\t" + "vpaddd $tmp,$tmp,$tmp2\n\t" + "movd $tmp2,$src1\n\t" + "vpaddd $tmp2,$tmp2,$tmp\n\t" + "movd $dst,$tmp2\t! add reduction8I" %} + ins_encode %{ + __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, true); + __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, true); + __ vextractf128h($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); + __ movdl($tmp2$$XMMRegister, $src1$$Register); + __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false); + __ movdl($dst$$Register, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ + predicate(UseSSE >= 1 && UseAVX == 0); + match(Set dst (AddReductionVF src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "movdqu $tmp,$src1\n\t" + "addss $tmp,$src2\n\t" + "pshufd $tmp2,$src2,0x01\n\t" + "addss $tmp,$tmp2\n\t" + "movdqu $dst,$tmp\t! add reduction2F" %} + ins_encode %{ + __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); + __ addss($tmp$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); + __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ + predicate(UseAVX > 0); + match(Set dst (AddReductionVF src1 src2)); + effect(TEMP tmp2, TEMP tmp); + format %{ "vaddss $tmp2,$src1,$src2\n\t" + "pshufd $tmp,$src2,0x01\n\t" + "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %} + ins_encode %{ + __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); + __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ + predicate(UseSSE >= 1 && UseAVX == 0); + match(Set dst (AddReductionVF src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "movdqu $tmp,$src1\n\t" + "addss $tmp,$src2\n\t" + "pshufd $tmp2,$src2,0x01\n\t" + "addss $tmp,$tmp2\n\t" + "pshufd $tmp2,$src2,0x02\n\t" + "addss $tmp,$tmp2\n\t" + "pshufd $tmp2,$src2,0x03\n\t" + "addss $tmp,$tmp2\n\t" + "movdqu $dst,$tmp\t! add reduction4F" %} + ins_encode %{ + __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); + __ addss($tmp$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); + __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); + __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); + __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ + predicate(UseAVX > 0); + match(Set dst (AddReductionVF src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "vaddss $tmp2,$src1,$src2\n\t" + "pshufd $tmp,$src2,0x01\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$src2,0x02\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$src2,0x03\n\t" + "vaddss $dst,$tmp2,$tmp\t! add reduction4F" %} + ins_encode %{ + __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); + __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ + predicate(UseAVX > 0); + match(Set dst (AddReductionVF src1 src2)); + effect(TEMP tmp, TEMP tmp2, TEMP tmp3); + format %{ "vaddss $tmp2,$src1,$src2\n\t" + "pshufd $tmp,$src2,0x01\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$src2,0x02\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$src2,0x03\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "vextractf128 $tmp3,$src2\n\t" + "vaddss $tmp2,$tmp2,$tmp3\n\t" + "pshufd $tmp,$tmp3,0x01\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$tmp3,0x02\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$tmp3,0x03\n\t" + "vaddss $dst,$tmp2,$tmp\t! add reduction8F" %} + ins_encode %{ + __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); + __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ + predicate(UseSSE >= 1 && UseAVX == 0); + match(Set dst (AddReductionVD src1 src2)); + effect(TEMP tmp, TEMP dst); + format %{ "movdqu $tmp,$src1\n\t" + "addsd $tmp,$src2\n\t" + "pshufd $dst,$src2,0xE\n\t" + "addsd $dst,$tmp\t! add reduction2D" %} + ins_encode %{ + __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); + __ addsd($tmp$$XMMRegister, $src2$$XMMRegister); + __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); + __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ + predicate(UseAVX > 0); + match(Set dst (AddReductionVD src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "vaddsd $tmp2,$src1,$src2\n\t" + "pshufd $tmp,$src2,0xE\n\t" + "vaddsd $dst,$tmp2,$tmp\t! add reduction2D" %} + ins_encode %{ + __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); + __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ + predicate(UseAVX > 0); + match(Set dst (AddReductionVD src1 src2)); + effect(TEMP tmp, TEMP tmp2, TEMP tmp3); + format %{ "vaddsd $tmp2,$src1,$src2\n\t" + "pshufd $tmp,$src2,0xE\n\t" + "vaddsd $tmp2,$tmp2,$tmp\n\t" + "vextractf128 $tmp3,$src2\n\t" + "vaddsd $tmp2,$tmp2,$tmp3\n\t" + "pshufd $tmp,$tmp3,0xE\n\t" + "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %} + ins_encode %{ + __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); + __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); + __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); + __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ + predicate(UseSSE > 3 && UseAVX == 0); + match(Set dst (MulReductionVI src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "pshufd $tmp2,$src2,0x1\n\t" + "pmulld $tmp2,$src2\n\t" + "movd $tmp,$src1\n\t" + "pmulld $tmp2,$tmp\n\t" + "movd $dst,$tmp2\t! mul reduction2I" %} + ins_encode %{ + __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); + __ movdl($tmp$$XMMRegister, $src1$$Register); + __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ movdl($dst$$Register, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ + predicate(UseAVX > 0); + match(Set dst (MulReductionVI src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "pshufd $tmp2,$src2,0x1\n\t" + "vpmulld $tmp,$src2,$tmp2\n\t" + "movd $tmp2,$src1\n\t" + "vpmulld $tmp2,$tmp,$tmp2\n\t" + "movd $dst,$tmp2\t! mul reduction2I" %} + ins_encode %{ + __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false); + __ movdl($tmp2$$XMMRegister, $src1$$Register); + __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); + __ movdl($dst$$Register, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ + predicate(UseSSE > 3 && UseAVX == 0); + match(Set dst (MulReductionVI src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "pshufd $tmp2,$src2,0xE\n\t" + "pmulld $tmp2,$src2\n\t" + "pshufd $tmp,$tmp2,0x1\n\t" + "pmulld $tmp2,$tmp\n\t" + "movd $tmp,$src1\n\t" + "pmulld $tmp2,$tmp\n\t" + "movd $dst,$tmp2\t! mul reduction4I" %} + ins_encode %{ + __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); + __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); + __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ movdl($tmp$$XMMRegister, $src1$$Register); + __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ movdl($dst$$Register, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ + predicate(UseAVX > 0); + match(Set dst (MulReductionVI src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "pshufd $tmp2,$src2,0xE\n\t" + "vpmulld $tmp,$src2,$tmp2\n\t" + "pshufd $tmp2,$tmp,0x1\n\t" + "vpmulld $tmp,$tmp,$tmp2\n\t" + "movd $tmp2,$src1\n\t" + "vpmulld $tmp2,$tmp,$tmp2\n\t" + "movd $dst,$tmp2\t! mul reduction4I" %} + ins_encode %{ + __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); + __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false); + __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); + __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); + __ movdl($tmp2$$XMMRegister, $src1$$Register); + __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); + __ movdl($dst$$Register, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ + predicate(UseAVX > 0); + match(Set dst (MulReductionVI src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "vextractf128 $tmp,$src2\n\t" + "vpmulld $tmp,$tmp,$src2\n\t" + "pshufd $tmp2,$tmp,0xE\n\t" + "vpmulld $tmp,$tmp,$tmp2\n\t" + "pshufd $tmp2,$tmp,0x1\n\t" + "vpmulld $tmp,$tmp,$tmp2\n\t" + "movd $tmp2,$src1\n\t" + "vpmulld $tmp2,$tmp,$tmp2\n\t" + "movd $dst,$tmp2\t! mul reduction8I" %} + ins_encode %{ + __ vextractf128h($tmp$$XMMRegister, $src2$$XMMRegister); + __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, false); + __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); + __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); + __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); + __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); + __ movdl($tmp2$$XMMRegister, $src1$$Register); + __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); + __ movdl($dst$$Register, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rsmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ + predicate(UseSSE >= 1 && UseAVX == 0); + match(Set dst (MulReductionVF src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "movdqu $tmp,$src1\n\t" + "mulss $tmp,$src2\n\t" + "pshufd $tmp2,$src2,0x01\n\t" + "mulss $tmp,$tmp2\n\t" + "movdqu $dst,$tmp\t! add reduction2F" %} + ins_encode %{ + __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); + __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); + __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ + predicate(UseAVX > 0); + match(Set dst (MulReductionVF src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "vmulss $tmp2,$src1,$src2\n\t" + "pshufd $tmp,$src2,0x01\n\t" + "vmulss $dst,$tmp2,$tmp\t! add reduction2F" %} + ins_encode %{ + __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); + __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ + predicate(UseSSE >= 1 && UseAVX == 0); + match(Set dst (MulReductionVF src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "movdqu $tmp,$src1\n\t" + "mulss $tmp,$src2\n\t" + "pshufd $tmp2,$src2,0x01\n\t" + "mulss $tmp,$tmp2\n\t" + "pshufd $tmp2,$src2,0x02\n\t" + "mulss $tmp,$tmp2\n\t" + "pshufd $tmp2,$src2,0x03\n\t" + "mulss $tmp,$tmp2\n\t" + "movdqu $dst,$tmp\t! add reduction4F" %} + ins_encode %{ + __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); + __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); + __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); + __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); + __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ + predicate(UseAVX > 0); + match(Set dst (MulReductionVF src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "vmulss $tmp2,$src1,$src2\n\t" + "pshufd $tmp,$src2,0x01\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$src2,0x02\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$src2,0x03\n\t" + "vmulss $dst,$tmp2,$tmp\t! add reduction4F" %} + ins_encode %{ + __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); + __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ + predicate(UseAVX > 0); + match(Set dst (MulReductionVF src1 src2)); + effect(TEMP tmp, TEMP tmp2, TEMP tmp3); + format %{ "vmulss $tmp2,$src1,$src2\n\t" + "pshufd $tmp,$src2,0x01\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$src2,0x02\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$src2,0x03\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "vextractf128 $tmp3,$src2\n\t" + "vmulss $tmp2,$tmp2,$tmp3\n\t" + "pshufd $tmp,$tmp3,0x01\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$tmp3,0x02\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$tmp3,0x03\n\t" + "vmulss $dst,$tmp2,$tmp\t! mul reduction8F" %} + ins_encode %{ + __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); + __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ + predicate(UseSSE >= 1 && UseAVX == 0); + match(Set dst (MulReductionVD src1 src2)); + effect(TEMP tmp, TEMP dst); + format %{ "movdqu $tmp,$src1\n\t" + "mulsd $tmp,$src2\n\t" + "pshufd $dst,$src2,0xE\n\t" + "mulsd $dst,$tmp\t! add reduction2D" %} + ins_encode %{ + __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); + __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister); + __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); + __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ + predicate(UseAVX > 0); + match(Set dst (MulReductionVD src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "vmulsd $tmp2,$src1,$src2\n\t" + "pshufd $tmp,$src2,0xE\n\t" + "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %} + ins_encode %{ + __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); + __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ + predicate(UseAVX > 0); + match(Set dst (MulReductionVD src1 src2)); + effect(TEMP tmp, TEMP tmp2, TEMP tmp3); + format %{ "vmulsd $tmp2,$src1,$src2\n\t" + "pshufd $tmp,$src2,0xE\n\t" + "vmulsd $tmp2,$tmp2,$tmp\n\t" + "vextractf128 $tmp3,$src2\n\t" + "vmulsd $tmp2,$tmp2,$tmp3\n\t" + "pshufd $tmp,$tmp3,0xE\n\t" + "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %} + ins_encode %{ + __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); + __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); + __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); + __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + // ====================VECTOR ARITHMETIC======================================= // --------------------------------- ADD -------------------------------------- // Bytes vector add

src/cpu/x86/vm/x86.ad
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File