< prev index next >

src/cpu/x86/vm/x86.ad

Print this page

        

*** 1714,1723 **** --- 1714,1753 ---- } return ret_value; // Per default match rules are supported. } + const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + bool ret_value = match_rule_supported(opcode); + if (ret_value) { + switch (opcode) { + case Op_AddVB: + case Op_SubVB: + if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) + ret_value = false; + break; + case Op_URShiftVS: + case Op_RShiftVS: + case Op_LShiftVS: + case Op_MulVS: + case Op_AddVS: + case Op_SubVS: + if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) + ret_value = false; + break; + case Op_CMoveVD: + if (vlen != 4) + ret_value = false; + break; + } + } + + return ret_value; // Per default match rules are supported. + } + const int Matcher::float_pressure(int default_pressure_threshold) { int float_pressure_threshold = default_pressure_threshold; #ifdef _LP64 if (UseAVX > 2) { // Increase pressure threshold on machines with AVX3 which have
*** 1757,1771 **** case T_CHAR: if (size < 4) return 0; break; case T_BYTE: if (size < 4) return 0; - if ((size > 32) && !VM_Version::supports_avx512bw()) return 0; break; case T_SHORT: if (size < 4) return 0; - if ((size > 16) && !VM_Version::supports_avx512bw()) return 0; break; default: ShouldNotReachHere(); } return size; --- 1787,1799 ----
*** 1965,1995 **** #endif } bool is_single_byte = false; int vec_len = 0; if ((UseAVX > 2) && (stack_offset != 0)) { switch (ireg) { case Op_VecS: case Op_VecD: case Op_VecX: break; case Op_VecY: vec_len = 1; break; case Op_VecZ: vec_len = 2; break; } ! is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, Assembler::EVEX_FVM, Assembler::EVEX_32bit, 0); } int offset_size = 0; int size = 5; if (UseAVX > 2 ) { ! if ((VM_Version::supports_avx512vl() == false) && (vec_len == 2)) { offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); size += 2; // Need an additional two bytes for EVEX encoding ! } else if ((VM_Version::supports_avx512vl() == false) && (vec_len < 2)) { offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); } else { offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); size += 2; // Need an additional two bytes for EVEX encodding } --- 1993,2030 ---- #endif } bool is_single_byte = false; int vec_len = 0; if ((UseAVX > 2) && (stack_offset != 0)) { + int tuple_type = Assembler::EVEX_FVM; + int input_size = Assembler::EVEX_32bit; switch (ireg) { case Op_VecS: + tuple_type = Assembler::EVEX_T1S; + break; case Op_VecD: + tuple_type = Assembler::EVEX_T1S; + input_size = Assembler::EVEX_64bit; + break; case Op_VecX: break; case Op_VecY: vec_len = 1; break; case Op_VecZ: vec_len = 2; break; } ! is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); } int offset_size = 0; int size = 5; if (UseAVX > 2 ) { ! if (VM_Version::supports_avx512novl() && (vec_len == 2)) { offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); size += 2; // Need an additional two bytes for EVEX encoding ! } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); } else { offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); size += 2; // Need an additional two bytes for EVEX encodding }
*** 2709,2729 **** %} ins_pipe(pipe_slow); %} instruct absF_reg_reg(regF dst, regF src) %{ ! predicate(UseAVX > 0); match(Set dst (AbsF src)); ins_cost(150); format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} ins_encode %{ int vector_len = 0; __ vandps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(float_signmask()), vector_len); %} ins_pipe(pipe_slow); %} instruct absD_reg(regD dst) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (AbsD dst)); ins_cost(150); --- 2744,2806 ---- %} ins_pipe(pipe_slow); %} instruct absF_reg_reg(regF dst, regF src) %{ ! predicate(VM_Version::supports_avx256only()); ! match(Set dst (AbsF src)); ! ins_cost(150); ! format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} ! ins_encode %{ ! int vector_len = 0; ! __ vandps($dst$$XMMRegister, $src$$XMMRegister, ! ExternalAddress(float_signmask()), vector_len); ! %} ! ins_pipe(pipe_slow); ! %} ! ! #ifdef _LP64 ! instruct absF_reg_reg_evex(regF dst, regF src) %{ ! predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); ! match(Set dst (AbsF src)); ! ins_cost(150); ! format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} ! ins_encode %{ ! int vector_len = 0; ! __ vandps($dst$$XMMRegister, $src$$XMMRegister, ! ExternalAddress(float_signmask()), vector_len); ! %} ! ins_pipe(pipe_slow); ! %} ! ! instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ ! predicate(VM_Version::supports_avx512novl()); ! match(Set dst (AbsF src1)); ! effect(TEMP src2); ! ins_cost(150); ! format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} ! ins_encode %{ ! int vector_len = 0; ! __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, ! ExternalAddress(float_signmask()), vector_len); ! %} ! ins_pipe(pipe_slow); ! %} ! #else // _LP64 ! instruct absF_reg_reg_evex(regF dst, regF src) %{ ! predicate(UseAVX > 2); match(Set dst (AbsF src)); ins_cost(150); format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} ins_encode %{ int vector_len = 0; __ vandps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(float_signmask()), vector_len); %} ins_pipe(pipe_slow); %} + #endif instruct absD_reg(regD dst) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (AbsD dst)); ins_cost(150);
*** 2734,2744 **** %} ins_pipe(pipe_slow); %} instruct absD_reg_reg(regD dst, regD src) %{ ! predicate(UseAVX > 0); match(Set dst (AbsD src)); ins_cost(150); format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" "# abs double by sign masking" %} ins_encode %{ --- 2811,2864 ---- %} ins_pipe(pipe_slow); %} instruct absD_reg_reg(regD dst, regD src) %{ ! predicate(VM_Version::supports_avx256only()); ! match(Set dst (AbsD src)); ! ins_cost(150); ! format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" ! "# abs double by sign masking" %} ! ins_encode %{ ! int vector_len = 0; ! __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ! ExternalAddress(double_signmask()), vector_len); ! %} ! ins_pipe(pipe_slow); ! %} ! ! #ifdef _LP64 ! instruct absD_reg_reg_evex(regD dst, regD src) %{ ! predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); ! match(Set dst (AbsD src)); ! ins_cost(150); ! format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" ! "# abs double by sign masking" %} ! ins_encode %{ ! int vector_len = 0; ! __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ! ExternalAddress(double_signmask()), vector_len); ! %} ! ins_pipe(pipe_slow); ! %} ! ! instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ ! predicate(VM_Version::supports_avx512novl()); ! match(Set dst (AbsD src1)); ! effect(TEMP src2); ! ins_cost(150); ! format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} ! ins_encode %{ ! int vector_len = 0; ! __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, ! ExternalAddress(double_signmask()), vector_len); ! %} ! ins_pipe(pipe_slow); ! %} ! #else // _LP64 ! instruct absD_reg_reg_evex(regD dst, regD src) %{ ! predicate(UseAVX > 2); match(Set dst (AbsD src)); ins_cost(150); format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" "# abs double by sign masking" %} ins_encode %{
*** 2746,2755 **** --- 2866,2876 ---- __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(double_signmask()), vector_len); %} ins_pipe(pipe_slow); %} + #endif instruct negF_reg(regF dst) %{ predicate((UseSSE>=1) && (UseAVX == 0)); match(Set dst (NegF dst)); ins_cost(150);
*** 4552,4562 **** %} ins_pipe( pipe_slow ); %} instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ ! predicate(UseAVX > 0 && UseAVX < 3); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t" --- 4673,4683 ---- %} ins_pipe( pipe_slow ); %} instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ ! predicate(VM_Version::supports_avx256only()); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t"
*** 4611,4621 **** %} ins_pipe( pipe_slow ); %} instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ ! predicate(UseAVX > 0 && UseAVX < 3); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "vphaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" --- 4732,4742 ---- %} ins_pipe( pipe_slow ); %} instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ ! predicate(VM_Version::supports_avx256only()); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "vphaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t"
*** 4655,4665 **** %} ins_pipe( pipe_slow ); %} instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ ! predicate(UseAVX > 0 && UseAVX < 3); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "vphaddd $tmp,$tmp,$tmp2\n\t" "vextracti128 $tmp2,$tmp\n\t" --- 4776,4786 ---- %} ins_pipe( pipe_slow ); %} instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ ! predicate(VM_Version::supports_avx256only()); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "vphaddd $tmp,$tmp,$tmp2\n\t" "vextracti128 $tmp2,$tmp\n\t"
*** 4710,4720 **** instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ predicate(UseAVX > 2); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); ! format %{ "vextracti64x4 $tmp3,$src2\n\t" "vpaddd $tmp3,$tmp3,$src2\n\t" "vextracti128 $tmp,$tmp3\n\t" "vpaddd $tmp,$tmp,$tmp3\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" --- 4831,4841 ---- instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ predicate(UseAVX > 2); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); ! format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" "vpaddd $tmp3,$tmp3,$src2\n\t" "vextracti128 $tmp,$tmp3\n\t" "vpaddd $tmp,$tmp,$tmp3\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t"
*** 4722,4732 **** "vpaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ ! __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); --- 4843,4853 ---- "vpaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ ! __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
*** 4761,4779 **** instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2); match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); ! format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" "vpaddq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction4L" %} ins_encode %{ ! __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($tmp$$XMMRegister, $src1$$Register); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); --- 4882,4900 ---- instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2); match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); ! format %{ "vextracti128 $tmp,$src2\n\t" "vpaddq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction4L" %} ins_encode %{ ! __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($tmp$$XMMRegister, $src1$$Register); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
*** 4784,4804 **** instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2); match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); ! format %{ "vextracti64x4 $tmp2,$src2\n\t" "vpaddq $tmp2,$tmp2,$src2\n\t" "vextracti128 $tmp,$tmp2\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction8L" %} ins_encode %{ ! __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); --- 4905,4925 ---- instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2); match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); ! format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" "vpaddq $tmp2,$tmp2,$src2\n\t" "vextracti128 $tmp,$tmp2\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction8L" %} ins_encode %{ ! __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
*** 4808,5101 **** %} ins_pipe( pipe_slow ); %} #endif ! instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ predicate(UseSSE >= 1 && UseAVX == 0); ! match(Set dst (AddReductionVF src1 src2)); ! effect(TEMP tmp, TEMP tmp2); ! format %{ "movdqu $tmp,$src1\n\t" ! "addss $tmp,$src2\n\t" ! "pshufd $tmp2,$src2,0x01\n\t" ! "addss $tmp,$tmp2\n\t" ! "movdqu $dst,$tmp\t! add reduction2F" %} ins_encode %{ ! __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); ! __ addss($tmp$$XMMRegister, $src2$$XMMRegister); ! __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); ! __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0); ! match(Set dst (AddReductionVF src1 src2)); ! effect(TEMP tmp2, TEMP tmp); ! format %{ "vaddss $tmp2,$src1,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %} ins_encode %{ ! __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseSSE >= 1 && UseAVX == 0); ! match(Set dst (AddReductionVF src1 src2)); ! effect(TEMP tmp, TEMP tmp2); ! format %{ "movdqu $tmp,$src1\n\t" ! "addss $tmp,$src2\n\t" ! "pshufd $tmp2,$src2,0x01\n\t" ! "addss $tmp,$tmp2\n\t" ! "pshufd $tmp2,$src2,0x02\n\t" ! "addss $tmp,$tmp2\n\t" ! "pshufd $tmp2,$src2,0x03\n\t" ! "addss $tmp,$tmp2\n\t" ! "movdqu $dst,$tmp\t! add reduction4F" %} ! ins_encode %{ ! __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); ! __ addss($tmp$$XMMRegister, $src2$$XMMRegister); ! __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); ! __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); ! __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); ! __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0); ! match(Set dst (AddReductionVF src1 src2)); ! effect(TEMP tmp, TEMP tmp2); ! format %{ "vaddss $tmp2,$src1,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" ! "vaddss $dst,$tmp2,$tmp\t! add reduction4F" %} ins_encode %{ ! __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); ! __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ predicate(UseAVX > 0); ! match(Set dst (AddReductionVF src1 src2)); ! effect(TEMP tmp, TEMP tmp2, TEMP tmp3); ! format %{ "vaddss $tmp2,$src1,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" ! "vextractf128 $tmp3,$src2\n\t" ! "vaddss $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0x01\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" ! "pshufd $tmp,$tmp3,0x02\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" ! "pshufd $tmp,$tmp3,0x03\n\t" ! "vaddss $dst,$tmp2,$tmp\t! add reduction8F" %} ins_encode %{ ! __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); ! __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ predicate(UseAVX > 2); ! match(Set dst (AddReductionVF src1 src2)); ! effect(TEMP tmp, TEMP tmp2, TEMP tmp3); ! format %{ "vaddss $tmp2,$src1,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" ! "vextractf64x2 $tmp3,$src2, 0x1\n\t" ! "vaddss $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0x01\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" ! "pshufd $tmp,$tmp3,0x02\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" ! "pshufd $tmp,$tmp3,0x03\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" ! "vextractf64x2 $tmp3,$src2, 0x2\n\t" ! "vaddss $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0x01\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" ! "pshufd $tmp,$tmp3,0x02\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" ! "pshufd $tmp,$tmp3,0x03\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" ! "vextractf64x2 $tmp3,$src2, 0x3\n\t" ! "vaddss $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0x01\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" ! "pshufd $tmp,$tmp3,0x02\n\t" ! "vaddss $tmp2,$tmp2,$tmp\n\t" ! "pshufd $tmp,$tmp3,0x03\n\t" ! "vaddss $dst,$tmp2,$tmp\t! add reduction16F" %} ins_encode %{ ! __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); ! __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); ! __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); ! match(Set dst (AddReductionVD src1 src2)); effect(TEMP tmp, TEMP dst); ! format %{ "movdqu $tmp,$src1\n\t" ! "addsd $tmp,$src2\n\t" ! "pshufd $dst,$src2,0xE\n\t" "addsd $dst,$tmp\t! add reduction2D" %} ins_encode %{ ! __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); ! __ addsd($tmp$$XMMRegister, $src2$$XMMRegister); ! __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ predicate(UseAVX > 0); ! match(Set dst (AddReductionVD src1 src2)); ! effect(TEMP tmp, TEMP tmp2); ! format %{ "vaddsd $tmp2,$src1,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" ! "vaddsd $dst,$tmp2,$tmp\t! add reduction2D" %} ins_encode %{ ! __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); ! __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ predicate(UseAVX > 0); ! match(Set dst (AddReductionVD src1 src2)); ! effect(TEMP tmp, TEMP tmp2, TEMP tmp3); ! format %{ "vaddsd $tmp2,$src1,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" ! "vaddsd $tmp2,$tmp2,$tmp\n\t" ! "vextractf128 $tmp3,$src2\n\t" ! "vaddsd $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0xE\n\t" ! "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %} ins_encode %{ ! __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); ! __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); ! __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); ! __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ predicate(UseAVX > 2); ! match(Set dst (AddReductionVD src1 src2)); ! effect(TEMP tmp, TEMP tmp2, TEMP tmp3); ! format %{ "vaddsd $tmp2,$src1,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" ! "vaddsd $tmp2,$tmp2,$tmp\n\t" ! "vextractf64x2 $tmp3,$src2, 0x1\n\t" ! "vaddsd $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0xE\n\t" ! "vaddsd $tmp2,$tmp2,$tmp\n\t" ! "vextractf64x2 $tmp3,$src2, 0x2\n\t" ! "vaddsd $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0xE\n\t" ! "vaddsd $tmp2,$tmp2,$tmp\n\t" ! "vextractf64x2 $tmp3,$src2, 0x3\n\t" ! "vaddsd $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0xE\n\t" ! "vaddsd $dst,$tmp2,$tmp\t! add reduction8D" %} ins_encode %{ ! __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); ! __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); ! __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); ! __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); ! __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); ! __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); ! __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); ! __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ --- 4929,5212 ---- %} ins_pipe( pipe_slow ); %} #endif ! instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); ! match(Set dst (AddReductionVF dst src2)); ! effect(TEMP dst, TEMP tmp); ! format %{ "addss $dst,$src2\n\t" ! "pshufd $tmp,$src2,0x01\n\t" ! "addss $dst,$tmp\t! add reduction2F" %} ins_encode %{ ! __ addss($dst$$XMMRegister, $src2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ addss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ predicate(UseAVX > 0); ! match(Set dst (AddReductionVF dst src2)); ! effect(TEMP dst, TEMP tmp); ! format %{ "vaddss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vaddss $dst,$dst,$tmp\t! add reduction2F" %} ins_encode %{ ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); ! match(Set dst (AddReductionVF dst src2)); ! effect(TEMP dst, TEMP tmp); ! format %{ "addss $dst,$src2\n\t" ! "pshufd $tmp,$src2,0x01\n\t" ! "addss $dst,$tmp\n\t" ! "pshufd $tmp,$src2,0x02\n\t" ! "addss $dst,$tmp\n\t" ! "pshufd $tmp,$src2,0x03\n\t" ! "addss $dst,$tmp\t! add reduction4F" %} ! ins_encode %{ ! __ addss($dst$$XMMRegister, $src2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ addss($dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); ! __ addss($dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); ! __ addss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ predicate(UseAVX > 0); ! match(Set dst (AddReductionVF dst src2)); ! effect(TEMP tmp, TEMP dst); ! format %{ "vaddss $dst,dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" ! "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" ! "vaddss $dst,$dst,$tmp\t! add reduction4F" %} ins_encode %{ ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0); ! match(Set dst (AddReductionVF dst src2)); ! effect(TEMP tmp, TEMP dst, TEMP tmp2); ! format %{ "vaddss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" ! "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" ! "vaddss $dst,$dst,$tmp\n\t" ! "vextractf128 $tmp2,$src2\n\t" ! "vaddss $dst,$dst,$tmp2\n\t" ! "pshufd $tmp,$tmp2,0x01\n\t" ! "vaddss $dst,$dst,$tmp\n\t" ! "pshufd $tmp,$tmp2,0x02\n\t" ! "vaddss $dst,$dst,$tmp\n\t" ! "pshufd $tmp,$tmp2,0x03\n\t" ! "vaddss $dst,$dst,$tmp\t! add reduction8F" %} ins_encode %{ ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2); ! match(Set dst (AddReductionVF dst src2)); ! effect(TEMP tmp, TEMP dst, TEMP tmp2); ! format %{ "vaddss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" ! "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" ! "vaddss $dst,$dst,$tmp\n\t" ! "vextractf32x4 $tmp2,$src2, 0x1\n\t" ! "vaddss $dst,$dst,$tmp2\n\t" ! "pshufd $tmp,$tmp2,0x01\n\t" ! "vaddss $dst,$dst,$tmp\n\t" ! "pshufd $tmp,$tmp2,0x02\n\t" ! "vaddss $dst,$dst,$tmp\n\t" ! "pshufd $tmp,$tmp2,0x03\n\t" ! "vaddss $dst,$dst,$tmp\n\t" ! "vextractf32x4 $tmp2,$src2, 0x2\n\t" ! "vaddss $dst,$dst,$tmp2\n\t" ! "pshufd $tmp,$tmp2,0x01\n\t" ! "vaddss $dst,$dst,$tmp\n\t" ! "pshufd $tmp,$tmp2,0x02\n\t" ! "vaddss $dst,$dst,$tmp\n\t" ! "pshufd $tmp,$tmp2,0x03\n\t" ! "vaddss $dst,$dst,$tmp\n\t" ! "vextractf32x4 $tmp2,$src2, 0x3\n\t" ! "vaddss $dst,$dst,$tmp2\n\t" ! "pshufd $tmp,$tmp2,0x01\n\t" ! "vaddss $dst,$dst,$tmp\n\t" ! "pshufd $tmp,$tmp2,0x02\n\t" ! "vaddss $dst,$dst,$tmp\n\t" ! "pshufd $tmp,$tmp2,0x03\n\t" ! "vaddss $dst,$dst,$tmp\t! add reduction16F" %} ins_encode %{ ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); ! __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); ! match(Set dst (AddReductionVD dst src2)); effect(TEMP tmp, TEMP dst); ! format %{ "addsd $dst,$src2\n\t" ! "pshufd $tmp,$src2,0xE\n\t" "addsd $dst,$tmp\t! add reduction2D" %} ins_encode %{ ! __ addsd($dst$$XMMRegister, $src2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ predicate(UseAVX > 0); ! match(Set dst (AddReductionVD dst src2)); ! effect(TEMP tmp, TEMP dst); ! format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" ! "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} ins_encode %{ ! __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); ! __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ predicate(UseAVX > 0); ! match(Set dst (AddReductionVD dst src2)); ! effect(TEMP tmp, TEMP dst, TEMP tmp2); ! format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" ! "vaddsd $dst,$dst,$tmp\n\t" ! "vextractf32x4h $tmp2,$src2, 0x1\n\t" ! "vaddsd $dst,$dst,$tmp2\n\t" ! "pshufd $tmp,$tmp2,0xE\n\t" ! "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} ins_encode %{ ! __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); ! __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); ! __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); ! __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ predicate(UseAVX > 2); ! match(Set dst (AddReductionVD dst src2)); ! effect(TEMP tmp, TEMP dst, TEMP tmp2); ! format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" ! "vaddsd $dst,$dst,$tmp\n\t" ! "vextractf32x4 $tmp2,$src2, 0x1\n\t" ! "vaddsd $dst,$dst,$tmp2\n\t" ! "pshufd $tmp,$tmp2,0xE\n\t" ! "vaddsd $dst,$dst,$tmp\n\t" ! "vextractf32x4 $tmp2,$src2, 0x2\n\t" ! "vaddsd $dst,$dst,$tmp2\n\t" ! "pshufd $tmp,$tmp2,0xE\n\t" ! "vaddsd $dst,$dst,$tmp\n\t" ! "vextractf32x4 $tmp2,$src2, 0x3\n\t" ! "vaddsd $dst,$dst,$tmp2\n\t" ! "pshufd $tmp,$tmp2,0xE\n\t" ! "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} ins_encode %{ ! __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); ! __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); ! __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); ! __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); ! __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); ! __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); ! __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); ! __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
*** 5214,5224 **** instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ predicate(UseAVX > 2); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); ! format %{ "vextracti64x4 $tmp3,$src2\n\t" "vpmulld $tmp3,$tmp3,$src2\n\t" "vextracti128 $tmp,$tmp3\n\t" "vpmulld $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" --- 5325,5335 ---- instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ predicate(UseAVX > 2); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); ! format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" "vpmulld $tmp3,$tmp3,$src2\n\t" "vextracti128 $tmp,$tmp3\n\t" "vpmulld $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t"
*** 5226,5236 **** "vpmulld $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ ! __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); --- 5337,5347 ---- "vpmulld $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ ! __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
*** 5265,5283 **** instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); match(Set dst (MulReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); ! format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" "vpmullq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction4L" %} ins_encode %{ ! __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($tmp$$XMMRegister, $src1$$Register); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); --- 5376,5394 ---- instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); match(Set dst (MulReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); ! format %{ "vextracti128 $tmp,$src2\n\t" "vpmullq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction4L" %} ins_encode %{ ! __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($tmp$$XMMRegister, $src1$$Register); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
*** 5288,5308 **** instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); match(Set dst (MulReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); ! format %{ "vextracti64x4 $tmp2,$src2\n\t" "vpmullq $tmp2,$tmp2,$src2\n\t" "vextracti128 $tmp,$tmp2\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction8L" %} ins_encode %{ ! __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); --- 5399,5419 ---- instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); match(Set dst (MulReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); ! format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" "vpmullq $tmp2,$tmp2,$src2\n\t" "vextracti128 $tmp,$tmp2\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction8L" %} ins_encode %{ ! __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
*** 5312,6542 **** %} ins_pipe( pipe_slow ); %} #endif ! instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ predicate(UseSSE >= 1 && UseAVX == 0); ! match(Set dst (MulReductionVF src1 src2)); ! effect(TEMP tmp, TEMP tmp2); ! format %{ "movdqu $tmp,$src1\n\t" ! "mulss $tmp,$src2\n\t" ! "pshufd $tmp2,$src2,0x01\n\t" ! "mulss $tmp,$tmp2\n\t" ! "movdqu $dst,$tmp\t! mul reduction2F" %} ins_encode %{ ! __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); ! __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); ! __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); ! __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0); ! match(Set dst (MulReductionVF src1 src2)); ! effect(TEMP tmp, TEMP tmp2); ! format %{ "vmulss $tmp2,$src1,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vmulss $dst,$tmp2,$tmp\t! mul reduction2F" %} ins_encode %{ ! __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseSSE >= 1 && UseAVX == 0); ! match(Set dst (MulReductionVF src1 src2)); ! effect(TEMP tmp, TEMP tmp2); ! format %{ "movdqu $tmp,$src1\n\t" ! "mulss $tmp,$src2\n\t" ! "pshufd $tmp2,$src2,0x01\n\t" ! "mulss $tmp,$tmp2\n\t" ! "pshufd $tmp2,$src2,0x02\n\t" ! "mulss $tmp,$tmp2\n\t" ! "pshufd $tmp2,$src2,0x03\n\t" ! "mulss $tmp,$tmp2\n\t" ! "movdqu $dst,$tmp\t! mul reduction4F" %} ! ins_encode %{ ! __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); ! __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); ! __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); ! __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); ! __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); ! __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0); ! match(Set dst (MulReductionVF src1 src2)); ! effect(TEMP tmp, TEMP tmp2); ! format %{ "vmulss $tmp2,$src1,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" ! "vmulss $dst,$tmp2,$tmp\t! mul reduction4F" %} ins_encode %{ ! __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); ! __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ predicate(UseAVX > 0); ! match(Set dst (MulReductionVF src1 src2)); ! effect(TEMP tmp, TEMP tmp2, TEMP tmp3); ! format %{ "vmulss $tmp2,$src1,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" ! "vextractf128 $tmp3,$src2\n\t" ! "vmulss $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0x01\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" ! "pshufd $tmp,$tmp3,0x02\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" ! "pshufd $tmp,$tmp3,0x03\n\t" ! "vmulss $dst,$tmp2,$tmp\t! mul reduction8F" %} ins_encode %{ ! __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); ! __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ predicate(UseAVX > 2); ! match(Set dst (MulReductionVF src1 src2)); ! effect(TEMP tmp, TEMP tmp2, TEMP tmp3); ! format %{ "vmulss $tmp2,$src1,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" ! "vextractf32x4 $tmp3,$src2, 0x1\n\t" ! "vmulss $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0x01\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" ! "pshufd $tmp,$tmp3,0x02\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" ! "pshufd $tmp,$tmp3,0x03\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" ! "vextractf32x4 $tmp3,$src2, 0x2\n\t" ! "vmulss $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0x01\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" ! "pshufd $tmp,$tmp3,0x02\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" ! "pshufd $tmp,$tmp3,0x03\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" ! "vextractf32x4 $tmp3,$src2, 0x3\n\t" ! "vmulss $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0x01\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" ! "pshufd $tmp,$tmp3,0x02\n\t" ! "vmulss $tmp2,$tmp2,$tmp\n\t" ! "pshufd $tmp,$tmp3,0x03\n\t" ! "vmulss $dst,$tmp2,$tmp\t! mul reduction16F" %} ins_encode %{ ! __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); ! __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); ! __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); ! match(Set dst (MulReductionVD src1 src2)); ! effect(TEMP tmp, TEMP dst); ! format %{ "movdqu $tmp,$src1\n\t" ! "mulsd $tmp,$src2\n\t" ! "pshufd $dst,$src2,0xE\n\t" "mulsd $dst,$tmp\t! mul reduction2D" %} ins_encode %{ ! __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); ! __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister); ! __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ predicate(UseAVX > 0); ! match(Set dst (MulReductionVD src1 src2)); ! effect(TEMP tmp, TEMP tmp2); ! format %{ "vmulsd $tmp2,$src1,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" ! "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %} ins_encode %{ ! __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); ! __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ predicate(UseAVX > 0); ! match(Set dst (MulReductionVD src1 src2)); ! effect(TEMP tmp, TEMP tmp2, TEMP tmp3); ! format %{ "vmulsd $tmp2,$src1,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" ! "vmulsd $tmp2,$tmp2,$tmp\n\t" ! "vextractf128 $tmp3,$src2\n\t" ! "vmulsd $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0xE\n\t" ! "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %} ins_encode %{ ! __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); ! __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); ! __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); ! __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ predicate(UseAVX > 2); ! match(Set dst (MulReductionVD src1 src2)); ! effect(TEMP tmp, TEMP tmp2, TEMP tmp3); ! format %{ "vmulsd $tmp2,$src1,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" ! "vmulsd $tmp2,$tmp2,$tmp\n\t" ! "vextractf64x2 $tmp3,$src2, 0x1\n\t" ! "vmulsd $tmp2,$tmp2,$tmp3\n\t" "pshufd $tmp,$src2,0xE\n\t" ! "vmulsd $tmp2,$tmp2,$tmp\n\t" ! "vextractf64x2 $tmp3,$src2, 0x2\n\t" ! "vmulsd $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0xE\n\t" ! "vmulsd $tmp2,$tmp2,$tmp\n\t" ! "vextractf64x2 $tmp3,$src2, 0x3\n\t" ! "vmulsd $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0xE\n\t" ! "vmulsd $dst,$tmp2,$tmp\t! mul reduction8D" %} ins_encode %{ ! __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); ! __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); ! __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); ! __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); ! __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); ! __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); ! __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); ! __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} // ====================VECTOR ARITHMETIC======================================= // --------------------------------- ADD -------------------------------------- // Bytes vector add instruct vadd4B(vecS dst, vecS src) %{ ! predicate(n->as_Vector()->length() == 4); match(Set dst (AddVB dst src)); format %{ "paddb $dst,$src\t! add packed4B" %} ins_encode %{ __ paddb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} ins_encode %{ int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVB src (LoadVector mem))); ! format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} ins_encode %{ int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8B(vecD dst, vecD src) %{ ! predicate(n->as_Vector()->length() == 8); ! match(Set dst (AddVB dst src)); ! format %{ "paddb $dst,$src\t! add packed8B" %} ins_encode %{ ! __ paddb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); ! match(Set dst (AddVB src1 src2)); ! format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} ins_encode %{ int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AddVB src (LoadVector mem))); ! format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} ins_encode %{ int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16B(vecX dst, vecX src) %{ ! predicate(n->as_Vector()->length() == 16); match(Set dst (AddVB dst src)); ! format %{ "paddb $dst,$src\t! add packed16B" %} ins_encode %{ __ paddb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 16); match(Set dst (AddVB src1 src2)); ! format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} ins_encode %{ int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 16); ! match(Set dst (AddVB src (LoadVector mem))); ! format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} ins_encode %{ int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 32); ! match(Set dst (AddVB src1 src2)); ! format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} ins_encode %{ ! int vector_len = 1; __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 32); match(Set dst (AddVB src (LoadVector mem))); ! format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} ins_encode %{ ! int vector_len = 1; __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 64); ! match(Set dst (AddVB src1 src2)); ! format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} ins_encode %{ ! int vector_len = 2; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 64); ! match(Set dst (AddVB src (LoadVector mem))); ! format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} ins_encode %{ ! int vector_len = 2; __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! // Shorts/Chars vector add ! instruct vadd2S(vecS dst, vecS src) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (AddVS dst src)); ! format %{ "paddw $dst,$src\t! add packed2S" %} ins_encode %{ ! __ paddw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVS src1 src2)); ! format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} ins_encode %{ int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVS src (LoadVector mem))); ! format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} ins_encode %{ int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4S(vecD dst, vecD src) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (AddVS dst src)); ! format %{ "paddw $dst,$src\t! add packed4S" %} ins_encode %{ ! __ paddw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVS src1 src2)); ! format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} ins_encode %{ int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVS src (LoadVector mem))); ! format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} ins_encode %{ int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8S(vecX dst, vecX src) %{ ! predicate(n->as_Vector()->length() == 8); ! match(Set dst (AddVS dst src)); ! format %{ "paddw $dst,$src\t! add packed8S" %} ins_encode %{ ! __ paddw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); ! match(Set dst (AddVS src1 src2)); ! format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); ! match(Set dst (AddVS src (LoadVector mem))); ! format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); ! match(Set dst (AddVS src1 src2)); ! format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} ins_encode %{ int vector_len = 1; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); ! match(Set dst (AddVS src (LoadVector mem))); ! format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} ins_encode %{ int vector_len = 1; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); ! match(Set dst (AddVS src1 src2)); ! format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} ins_encode %{ ! int vector_len = 2; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); ! match(Set dst (AddVS src (LoadVector mem))); ! format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} ins_encode %{ int vector_len = 2; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! // Integers vector add ! instruct vadd2I(vecD dst, vecD src) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (AddVI dst src)); ! format %{ "paddd $dst,$src\t! add packed2I" %} ins_encode %{ ! __ paddd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVI src1 src2)); ! format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVI src (LoadVector mem))); ! format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} ins_encode %{ int vector_len = 0; ! __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4I(vecX dst, vecX src) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (AddVI dst src)); ! format %{ "paddd $dst,$src\t! add packed4I" %} ins_encode %{ ! __ paddd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVI src1 src2)); ! format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} ins_encode %{ int vector_len = 0; ! __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVI src (LoadVector mem))); ! format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} ins_encode %{ int vector_len = 0; ! __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 8); ! match(Set dst (AddVI src1 src2)); ! format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 8); ! match(Set dst (AddVI src (LoadVector mem))); ! format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (AddVI src1 src2)); ! format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} ins_encode %{ ! int vector_len = 2; ! __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (AddVI src (LoadVector mem))); ! format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} ins_encode %{ ! int vector_len = 2; ! __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! // Longs vector add ! instruct vadd2L(vecX dst, vecX src) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (AddVL dst src)); ! format %{ "paddq $dst,$src\t! add packed2L" %} ins_encode %{ ! __ paddq($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVL src1 src2)); ! format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} ins_encode %{ int vector_len = 0; ! __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVL src (LoadVector mem))); ! format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} ins_encode %{ int vector_len = 0; ! __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 4); ! match(Set dst (AddVL src1 src2)); ! format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 4); ! match(Set dst (AddVL src (LoadVector mem))); ! format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (AddVL src1 src2)); ! format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} ins_encode %{ ! int vector_len = 2; ! __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (AddVL src (LoadVector mem))); ! format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} ins_encode %{ ! int vector_len = 2; ! __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! // Floats vector add ! instruct vadd2F(vecD dst, vecD src) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (AddVF dst src)); ! format %{ "addps $dst,$src\t! add packed2F" %} ins_encode %{ ! __ addps($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVF src1 src2)); ! format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} ins_encode %{ int vector_len = 0; ! __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVF src (LoadVector mem))); ! format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} ins_encode %{ int vector_len = 0; ! __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4F(vecX dst, vecX src) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (AddVF dst src)); ! format %{ "addps $dst,$src\t! add packed4F" %} ins_encode %{ ! __ addps($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVF src1 src2)); ! format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} ins_encode %{ int vector_len = 0; ! __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVF src (LoadVector mem))); ! format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} ins_encode %{ ! int vector_len = 0; ! __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); ! match(Set dst (AddVF src1 src2)); ! format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} ins_encode %{ int vector_len = 1; ! __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); ! match(Set dst (AddVF src (LoadVector mem))); ! format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} ins_encode %{ int vector_len = 1; ! __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (AddVF src1 src2)); ! format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} ins_encode %{ int vector_len = 2; ! __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (AddVF src (LoadVector mem))); ! format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} ins_encode %{ int vector_len = 2; ! __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! // Doubles vector add ! instruct vadd2D(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 2); ! match(Set dst (AddVD dst src)); ! format %{ "addpd $dst,$src\t! add packed2D" %} ins_encode %{ ! __ addpd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVD src1 src2)); ! format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} ins_encode %{ int vector_len = 0; ! __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVD src (LoadVector mem))); ! format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} ins_encode %{ int vector_len = 0; ! __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVD src1 src2)); ! format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} ins_encode %{ ! int vector_len = 1; ! __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVD src (LoadVector mem))); ! format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} ins_encode %{ int vector_len = 1; ! __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (AddVD src1 src2)); ! format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} ins_encode %{ ! int vector_len = 2; ! __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (AddVD src (LoadVector mem))); ! format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} ins_encode %{ int vector_len = 2; ! __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! // --------------------------------- SUB -------------------------------------- ! // Bytes vector sub ! instruct vsub4B(vecS dst, vecS src) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (SubVB dst src)); ! format %{ "psubb $dst,$src\t! sub packed4B" %} ins_encode %{ ! __ psubb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (SubVB src1 src2)); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} ins_encode %{ int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (SubVB src (LoadVector mem))); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} ins_encode %{ int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub8B(vecD dst, vecD src) %{ ! predicate(n->as_Vector()->length() == 8); ! match(Set dst (SubVB dst src)); ! format %{ "psubb $dst,$src\t! sub packed8B" %} ins_encode %{ ! __ psubb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); ! match(Set dst (SubVB src1 src2)); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); ! match(Set dst (SubVB src (LoadVector mem))); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub16B(vecX dst, vecX src) %{ ! predicate(n->as_Vector()->length() == 16); ! match(Set dst (SubVB dst src)); ! format %{ "psubb $dst,$src\t! sub packed16B" %} ins_encode %{ ! __ psubb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 16); ! match(Set dst (SubVB src1 src2)); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} ins_encode %{ int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 16); ! match(Set dst (SubVB src (LoadVector mem))); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} ins_encode %{ int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 32); ! match(Set dst (SubVB src1 src2)); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} ins_encode %{ ! int vector_len = 1; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 32); ! match(Set dst (SubVB src (LoadVector mem))); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} ins_encode %{ ! int vector_len = 1; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 64); ! match(Set dst (SubVB src1 src2)); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} ins_encode %{ ! int vector_len = 2; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 64); ! match(Set dst (SubVB src (LoadVector mem))); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} ins_encode %{ ! int vector_len = 2; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! // Shorts/Chars vector sub ! instruct vsub2S(vecS dst, vecS src) %{ ! predicate(n->as_Vector()->length() == 2); match(Set dst (SubVS dst src)); ! format %{ "psubw $dst,$src\t! sub packed2S" %} ins_encode %{ __ psubw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVS src1 src2)); ! format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} ins_encode %{ int vector_len = 0; __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (SubVS src (LoadVector mem))); ! format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} ins_encode %{ int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub4S(vecD dst, vecD src) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (SubVS dst src)); ! format %{ "psubw $dst,$src\t! sub packed4S" %} ins_encode %{ ! __ psubw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (SubVS src1 src2)); ! format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} ins_encode %{ int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVS src (LoadVector mem))); ! format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} ins_encode %{ int vector_len = 0; __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub8S(vecX dst, vecX src) %{ ! predicate(n->as_Vector()->length() == 8); ! match(Set dst (SubVS dst src)); ! format %{ "psubw $dst,$src\t! sub packed8S" %} ins_encode %{ ! __ psubw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SubVS src1 src2)); ! format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} ins_encode %{ ! int vector_len = 0; __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); ! match(Set dst (SubVS src (LoadVector mem))); ! format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); ! match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} ins_encode %{ int vector_len = 1; __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} ins_encode %{ int vector_len = 1; __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} ins_encode %{ int vector_len = 2; __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} ins_encode %{ int vector_len = 2; __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); --- 5423,7379 ---- %} ins_pipe( pipe_slow ); %} #endif ! instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); ! match(Set dst (MulReductionVF dst src2)); ! effect(TEMP dst, TEMP tmp); ! format %{ "mulss $dst,$src2\n\t" ! "pshufd $tmp,$src2,0x01\n\t" ! "mulss $dst,$tmp\t! mul reduction2F" %} ins_encode %{ ! __ mulss($dst$$XMMRegister, $src2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ predicate(UseAVX > 0); ! match(Set dst (MulReductionVF dst src2)); ! effect(TEMP tmp, TEMP dst); ! format %{ "vmulss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} ins_encode %{ ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); ! match(Set dst (MulReductionVF dst src2)); ! effect(TEMP dst, TEMP tmp); ! format %{ "mulss $dst,$src2\n\t" ! "pshufd $tmp,$src2,0x01\n\t" ! "mulss $dst,$tmp\n\t" ! "pshufd $tmp,$src2,0x02\n\t" ! "mulss $dst,$tmp\n\t" ! "pshufd $tmp,$src2,0x03\n\t" ! "mulss $dst,$tmp\t! mul reduction4F" %} ! ins_encode %{ ! __ mulss($dst$$XMMRegister, $src2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); ! __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); ! __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ predicate(UseAVX > 0); ! match(Set dst (MulReductionVF dst src2)); ! effect(TEMP tmp, TEMP dst); ! format %{ "vmulss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" ! "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" ! "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} ins_encode %{ ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0); ! match(Set dst (MulReductionVF dst src2)); ! effect(TEMP tmp, TEMP dst, TEMP tmp2); ! format %{ "vmulss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" ! "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" ! "vmulss $dst,$dst,$tmp\n\t" ! "vextractf128 $tmp2,$src2\n\t" ! "vmulss $dst,$dst,$tmp2\n\t" ! "pshufd $tmp,$tmp2,0x01\n\t" ! "vmulss $dst,$dst,$tmp\n\t" ! "pshufd $tmp,$tmp2,0x02\n\t" ! "vmulss $dst,$dst,$tmp\n\t" ! "pshufd $tmp,$tmp2,0x03\n\t" ! "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} ins_encode %{ ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2); ! match(Set dst (MulReductionVF dst src2)); ! effect(TEMP tmp, TEMP dst, TEMP tmp2); ! format %{ "vmulss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" ! "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" ! "vmulss $dst,$dst,$tmp\n\t" ! "vextractf32x4 $tmp2,$src2, 0x1\n\t" ! "vmulss $dst,$dst,$tmp2\n\t" ! "pshufd $tmp,$tmp2,0x01\n\t" ! "vmulss $dst,$dst,$tmp\n\t" ! "pshufd $tmp,$tmp2,0x02\n\t" ! "vmulss $dst,$dst,$tmp\n\t" ! "pshufd $tmp,$tmp2,0x03\n\t" ! "vmulss $dst,$dst,$tmp\n\t" ! "vextractf32x4 $tmp2,$src2, 0x2\n\t" ! "vmulss $dst,$dst,$tmp2\n\t" ! "pshufd $tmp,$tmp2,0x01\n\t" ! "vmulss $dst,$dst,$tmp\n\t" ! "pshufd $tmp,$tmp2,0x02\n\t" ! "vmulss $dst,$dst,$tmp\n\t" ! "pshufd $tmp,$tmp2,0x03\n\t" ! "vmulss $dst,$dst,$tmp\n\t" ! "vextractf32x4 $tmp2,$src2, 0x3\n\t" ! "vmulss $dst,$dst,$tmp2\n\t" ! "pshufd $tmp,$tmp2,0x01\n\t" ! "vmulss $dst,$dst,$tmp\n\t" ! "pshufd $tmp,$tmp2,0x02\n\t" ! "vmulss $dst,$dst,$tmp\n\t" ! "pshufd $tmp,$tmp2,0x03\n\t" ! "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} ins_encode %{ ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); ! __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); ! match(Set dst (MulReductionVD dst src2)); ! effect(TEMP dst, TEMP tmp); ! format %{ "mulsd $dst,$src2\n\t" ! "pshufd $tmp,$src2,0xE\n\t" "mulsd $dst,$tmp\t! mul reduction2D" %} ins_encode %{ ! __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ predicate(UseAVX > 0); ! match(Set dst (MulReductionVD dst src2)); ! effect(TEMP tmp, TEMP dst); ! format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" ! "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} ins_encode %{ ! __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); ! __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ predicate(UseAVX > 0); ! match(Set dst (MulReductionVD dst src2)); ! effect(TEMP tmp, TEMP dst, TEMP tmp2); ! format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" ! "vmulsd $dst,$dst,$tmp\n\t" ! "vextractf128 $tmp2,$src2\n\t" ! "vmulsd $dst,$dst,$tmp2\n\t" ! "pshufd $tmp,$tmp2,0xE\n\t" ! "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} ins_encode %{ ! __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); ! __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); ! __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); ! __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ predicate(UseAVX > 2); ! match(Set dst (MulReductionVD dst src2)); ! effect(TEMP tmp, TEMP dst, TEMP tmp2); ! format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" ! "vmulsd $dst,$dst,$tmp\n\t" ! "vextractf32x4 $tmp2,$src2, 0x1\n\t" ! "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$src2,0xE\n\t" ! "vmulsd $dst,$dst,$tmp\n\t" ! "vextractf32x4 $tmp2,$src2, 0x2\n\t" ! "vmulsd $dst,$dst,$tmp2\n\t" ! "pshufd $tmp,$tmp2,0xE\n\t" ! "vmulsd $dst,$dst,$tmp\n\t" ! "vextractf32x4 $tmp2,$src2, 0x3\n\t" ! "vmulsd $dst,$dst,$tmp2\n\t" ! "pshufd $tmp,$tmp2,0xE\n\t" ! "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} ins_encode %{ ! __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); ! __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); ! __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); ! __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); ! __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); ! __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); ! __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); ! __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} // ====================VECTOR ARITHMETIC======================================= // --------------------------------- ADD -------------------------------------- // Bytes vector add instruct vadd4B(vecS dst, vecS src) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (AddVB dst src)); format %{ "paddb $dst,$src\t! add packed4B" %} ins_encode %{ __ paddb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} ins_encode %{ int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); ! match(Set dst (AddVB src1 src2)); ! format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} ins_encode %{ int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); ! match(Set dst (AddVB dst src2)); ! effect(TEMP src1); ! format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); ! match(Set dst (AddVB src (LoadVector mem))); ! format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} ins_encode %{ int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); match(Set dst (AddVB src (LoadVector mem))); ! format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} ins_encode %{ int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); ! match(Set dst (AddVB dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd8B(vecD dst, vecD src) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (AddVB dst src)); ! format %{ "paddb $dst,$src\t! add packed8B" %} ins_encode %{ __ paddb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd8B_reg_avx(vecD dst, vecD src1, vecS src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); match(Set dst (AddVB src1 src2)); ! format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} ins_encode %{ int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); ! match(Set dst (AddVB src1 src2)); ! format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} ins_encode %{ int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); ! match(Set dst (AddVB dst src2)); ! effect(TEMP src1); ! format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} ins_encode %{ ! int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); match(Set dst (AddVB src (LoadVector mem))); ! format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} ins_encode %{ ! int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); ! match(Set dst (AddVB src (LoadVector mem))); ! format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); ! match(Set dst (AddVB dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} ins_encode %{ ! int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16B(vecX dst, vecX src) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 16); ! match(Set dst (AddVB dst src)); ! format %{ "paddb $dst,$src\t! add packed16B" %} ins_encode %{ ! __ paddb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); ! match(Set dst (AddVB src1 src2)); ! format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} ins_encode %{ int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); ! match(Set dst (AddVB src1 src2)); ! format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} ins_encode %{ int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); ! match(Set dst (AddVB dst src2)); ! effect(TEMP src1); ! format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); ! match(Set dst (AddVB src (LoadVector mem))); ! format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} ins_encode %{ int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); ! match(Set dst (AddVB src (LoadVector mem))); ! format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} ins_encode %{ int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); ! match(Set dst (AddVB dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); ! match(Set dst (AddVB src1 src2)); ! format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); ! match(Set dst (AddVB src1 src2)); ! format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); ! match(Set dst (AddVB dst src2)); ! effect(TEMP src1); ! format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} ins_encode %{ int vector_len = 1; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); ! match(Set dst (AddVB src (LoadVector mem))); ! format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} ins_encode %{ int vector_len = 1; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); ! match(Set dst (AddVB src (LoadVector mem))); ! format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); ! match(Set dst (AddVB dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); ! match(Set dst (AddVB src1 src2)); ! format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} ins_encode %{ int vector_len = 2; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); ! match(Set dst (AddVB src (LoadVector mem))); ! format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} ins_encode %{ ! int vector_len = 2; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! // Shorts/Chars vector add ! instruct vadd2S(vecS dst, vecS src) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVS dst src)); ! format %{ "paddw $dst,$src\t! add packed2S" %} ins_encode %{ ! __ paddw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); ! match(Set dst (AddVS src1 src2)); ! format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} ins_encode %{ int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); ! match(Set dst (AddVS src1 src2)); ! format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); ! match(Set dst (AddVS dst src2)); ! effect(TEMP src1); ! format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} ins_encode %{ int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); ! match(Set dst (AddVS src (LoadVector mem))); ! format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} ins_encode %{ int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); ! match(Set dst (AddVS src (LoadVector mem))); ! format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); ! match(Set dst (AddVS dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4S(vecD dst, vecD src) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVS dst src)); ! format %{ "paddw $dst,$src\t! add packed4S" %} ins_encode %{ ! __ paddw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd4S_reg_avx(vecD dst, vecD src1, vecS src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); ! match(Set dst (AddVS src1 src2)); ! format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); ! match(Set dst (AddVS src1 src2)); ! format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); ! match(Set dst (AddVS dst src2)); ! effect(TEMP src1); ! format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} ins_encode %{ int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); ! match(Set dst (AddVS src (LoadVector mem))); ! format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} ins_encode %{ int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); ! match(Set dst (AddVS src (LoadVector mem))); ! format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); ! match(Set dst (AddVS dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8S(vecX dst, vecX src) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 8); ! match(Set dst (AddVS dst src)); ! format %{ "paddw $dst,$src\t! add packed8S" %} ins_encode %{ ! __ paddw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); ! match(Set dst (AddVS src1 src2)); ! format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); ! match(Set dst (AddVS src1 src2)); ! format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); ! match(Set dst (AddVS dst src2)); ! effect(TEMP src1); ! format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} ins_encode %{ int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); ! match(Set dst (AddVS src (LoadVector mem))); ! format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} ins_encode %{ int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); ! match(Set dst (AddVS src (LoadVector mem))); ! format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); ! match(Set dst (AddVS dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} ins_encode %{ int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); ! match(Set dst (AddVS src1 src2)); ! format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); ! match(Set dst (AddVS src1 src2)); ! format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} ins_encode %{ int vector_len = 1; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); ! match(Set dst (AddVS dst src2)); ! effect(TEMP src1); ! format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} ins_encode %{ int vector_len = 1; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); ! match(Set dst (AddVS src (LoadVector mem))); ! format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); ! match(Set dst (AddVS src (LoadVector mem))); ! format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); ! match(Set dst (AddVS dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); ! match(Set dst (AddVS src1 src2)); ! format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} ins_encode %{ int vector_len = 2; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); ! match(Set dst (AddVS src (LoadVector mem))); ! format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} ins_encode %{ int vector_len = 2; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! // Integers vector add ! instruct vadd2I(vecD dst, vecD src) %{ predicate(n->as_Vector()->length() == 2); ! match(Set dst (AddVI dst src)); ! format %{ "paddd $dst,$src\t! add packed2I" %} ins_encode %{ ! __ paddd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVI src1 src2)); ! format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} ins_encode %{ int vector_len = 0; ! __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVI src (LoadVector mem))); ! format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} ins_encode %{ int vector_len = 0; ! __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4I(vecX dst, vecX src) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (AddVI dst src)); ! format %{ "paddd $dst,$src\t! add packed4I" %} ! ins_encode %{ ! __ paddd($dst$$XMMRegister, $src$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVI src1 src2)); ! format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVI src (LoadVector mem))); ! format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 8); ! match(Set dst (AddVI src1 src2)); ! format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} ins_encode %{ int vector_len = 1; ! __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 8); ! match(Set dst (AddVI src (LoadVector mem))); ! format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (AddVI src1 src2)); ! format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} ins_encode %{ int vector_len = 2; ! __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (AddVI src (LoadVector mem))); ! format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! // Longs vector add ! instruct vadd2L(vecX dst, vecX src) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (AddVL dst src)); ! format %{ "paddq $dst,$src\t! add packed2L" %} ins_encode %{ ! __ paddq($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVL src1 src2)); ! format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} ins_encode %{ int vector_len = 0; ! __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVL src (LoadVector mem))); ! format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} ins_encode %{ int vector_len = 0; ! __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 4); ! match(Set dst (AddVL src1 src2)); ! format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 4); ! match(Set dst (AddVL src (LoadVector mem))); ! format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (AddVL src1 src2)); ! format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} ins_encode %{ ! int vector_len = 2; ! __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (AddVL src (LoadVector mem))); ! format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // Floats vector add ! instruct vadd2F(vecD dst, vecD src) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (AddVF dst src)); ! format %{ "addps $dst,$src\t! add packed2F" %} ! ins_encode %{ ! __ addps($dst$$XMMRegister, $src$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVF src1 src2)); ! format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} ! ins_encode %{ ! int vector_len = 0; ! __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVF src (LoadVector mem))); ! format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} ! ins_encode %{ ! int vector_len = 0; ! __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd4F(vecX dst, vecX src) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (AddVF dst src)); ! format %{ "addps $dst,$src\t! add packed4F" %} ! ins_encode %{ ! __ addps($dst$$XMMRegister, $src$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVF src1 src2)); ! format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} ! ins_encode %{ ! int vector_len = 0; ! __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVF src (LoadVector mem))); ! format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} ! ins_encode %{ ! int vector_len = 0; ! __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); ! match(Set dst (AddVF src1 src2)); ! format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} ! ins_encode %{ ! int vector_len = 1; ! __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); ! match(Set dst (AddVF src (LoadVector mem))); ! format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} ! ins_encode %{ ! int vector_len = 1; ! __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (AddVF src1 src2)); ! format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} ! ins_encode %{ ! int vector_len = 2; ! __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (AddVF src (LoadVector mem))); ! format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} ! ins_encode %{ ! int vector_len = 2; ! __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // Doubles vector add ! instruct vadd2D(vecX dst, vecX src) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (AddVD dst src)); ! format %{ "addpd $dst,$src\t! add packed2D" %} ! ins_encode %{ ! __ addpd($dst$$XMMRegister, $src$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVD src1 src2)); ! format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} ! ins_encode %{ ! int vector_len = 0; ! __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (AddVD src (LoadVector mem))); ! format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} ! ins_encode %{ ! int vector_len = 0; ! __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVD src1 src2)); ! format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} ! ins_encode %{ ! int vector_len = 1; ! __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (AddVD src (LoadVector mem))); ! format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} ! ins_encode %{ ! int vector_len = 1; ! __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (AddVD src1 src2)); ! format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} ! ins_encode %{ ! int vector_len = 2; ! __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (AddVD src (LoadVector mem))); ! format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} ! ins_encode %{ ! int vector_len = 2; ! __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // --------------------------------- SUB -------------------------------------- ! ! // Bytes vector sub ! instruct vsub4B(vecS dst, vecS src) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (SubVB dst src)); ! format %{ "psubb $dst,$src\t! sub packed4B" %} ! ins_encode %{ ! __ psubb($dst$$XMMRegister, $src$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); ! match(Set dst (SubVB src1 src2)); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); ! match(Set dst (SubVB src1 src2)); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); ! match(Set dst (SubVB dst src2)); ! effect(TEMP src1); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); ! match(Set dst (SubVB src (LoadVector mem))); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); ! match(Set dst (SubVB src (LoadVector mem))); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); ! match(Set dst (SubVB dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub8B(vecD dst, vecD src) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 8); ! match(Set dst (SubVB dst src)); ! format %{ "psubb $dst,$src\t! sub packed8B" %} ! ins_encode %{ ! __ psubb($dst$$XMMRegister, $src$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); ! match(Set dst (SubVB src1 src2)); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); ! match(Set dst (SubVB src1 src2)); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); ! match(Set dst (SubVB dst src2)); ! effect(TEMP src1); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); ! match(Set dst (SubVB src (LoadVector mem))); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); ! match(Set dst (SubVB src (LoadVector mem))); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); ! match(Set dst (SubVB dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub16B(vecX dst, vecX src) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 16); ! match(Set dst (SubVB dst src)); ! format %{ "psubb $dst,$src\t! sub packed16B" %} ! ins_encode %{ ! __ psubb($dst$$XMMRegister, $src$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); ! match(Set dst (SubVB src1 src2)); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); ! match(Set dst (SubVB src1 src2)); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); ! match(Set dst (SubVB dst src2)); ! effect(TEMP src1); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); ! match(Set dst (SubVB src (LoadVector mem))); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); ! match(Set dst (SubVB src (LoadVector mem))); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); ! match(Set dst (SubVB dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); ! match(Set dst (SubVB src1 src2)); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); ! match(Set dst (SubVB src1 src2)); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); ! match(Set dst (SubVB dst src2)); ! effect(TEMP src1); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); ! match(Set dst (SubVB src (LoadVector mem))); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); ! match(Set dst (SubVB src (LoadVector mem))); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); ! match(Set dst (SubVB dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); ! match(Set dst (SubVB src1 src2)); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); ! match(Set dst (SubVB src (LoadVector mem))); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // Shorts/Chars vector sub ! instruct vsub2S(vecS dst, vecS src) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (SubVS dst src)); ! format %{ "psubw $dst,$src\t! sub packed2S" %} ! ins_encode %{ ! __ psubw($dst$$XMMRegister, $src$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); ! match(Set dst (SubVS src1 src2)); ! format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); ! match(Set dst (SubVS src1 src2)); ! format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); ! match(Set dst (SubVS dst src2)); ! effect(TEMP src1); ! format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); ! match(Set dst (SubVS src (LoadVector mem))); ! format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); ! match(Set dst (SubVS src (LoadVector mem))); ! format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); ! match(Set dst (SubVS dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub4S(vecD dst, vecD src) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (SubVS dst src)); ! format %{ "psubw $dst,$src\t! sub packed4S" %} ins_encode %{ ! __ psubw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); ! match(Set dst (SubVS src1 src2)); ! format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} ins_encode %{ int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); ! match(Set dst (SubVS src1 src2)); ! format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} ins_encode %{ int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); ! match(Set dst (SubVS dst src2)); ! effect(TEMP src1); ! format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); ! match(Set dst (SubVS src (LoadVector mem))); ! format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); ! match(Set dst (SubVS src (LoadVector mem))); ! format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); ! match(Set dst (SubVS dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub8S(vecX dst, vecX src) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (SubVS dst src)); ! format %{ "psubw $dst,$src\t! sub packed8S" %} ins_encode %{ __ psubw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); match(Set dst (SubVS src1 src2)); ! format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} ins_encode %{ int vector_len = 0; __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); ! match(Set dst (SubVS src1 src2)); ! format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} ins_encode %{ int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); ! match(Set dst (SubVS dst src2)); ! effect(TEMP src1); ! format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); ! match(Set dst (SubVS src (LoadVector mem))); ! format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} ins_encode %{ int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (SubVS src (LoadVector mem))); ! format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} ins_encode %{ int vector_len = 0; __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); ! match(Set dst (SubVS dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); match(Set dst (SubVS src1 src2)); ! format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} ins_encode %{ ! int vector_len = 1; __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); ! match(Set dst (SubVS src1 src2)); ! format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); ! match(Set dst (SubVS dst src2)); ! effect(TEMP src1); format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} ins_encode %{ int vector_len = 1; __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); ! match(Set dst (SubVS src (LoadVector mem))); ! format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} ins_encode %{ int vector_len = 1; __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (SubVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} ins_encode %{ int vector_len = 2; __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} ins_encode %{ int vector_len = 2; __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
*** 6881,7057 **** __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (SubVD src (LoadVector mem))); ! format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} ins_encode %{ ! int vector_len = 1; ! __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (SubVD src1 src2)); ! format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} ins_encode %{ ! int vector_len = 2; ! __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (SubVD src (LoadVector mem))); ! format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} ins_encode %{ ! int vector_len = 2; ! __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! // --------------------------------- MUL -------------------------------------- ! ! // Shorts/Chars vector mul ! instruct vmul2S(vecS dst, vecS src) %{ ! predicate(n->as_Vector()->length() == 2); match(Set dst (MulVS dst src)); ! format %{ "pmullw $dst,$src\t! mul packed2S" %} ins_encode %{ __ pmullw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVS src1 src2)); ! format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} ins_encode %{ int vector_len = 0; __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (MulVS src (LoadVector mem))); ! format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} ins_encode %{ int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul4S(vecD dst, vecD src) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (MulVS dst src)); ! format %{ "pmullw $dst,$src\t! mul packed4S" %} ins_encode %{ ! __ pmullw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (MulVS src1 src2)); ! format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} ins_encode %{ int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVS src (LoadVector mem))); ! format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} ins_encode %{ int vector_len = 0; __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul8S(vecX dst, vecX src) %{ ! predicate(n->as_Vector()->length() == 8); ! match(Set dst (MulVS dst src)); ! format %{ "pmullw $dst,$src\t! mul packed8S" %} ins_encode %{ ! __ pmullw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (MulVS src1 src2)); ! format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} ins_encode %{ ! int vector_len = 0; __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); ! match(Set dst (MulVS src (LoadVector mem))); ! format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); ! match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} ins_encode %{ int vector_len = 1; __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} ins_encode %{ int vector_len = 1; __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} ins_encode %{ int vector_len = 2; __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} ins_encode %{ int vector_len = 2; __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); --- 7718,8078 ---- __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (SubVD src (LoadVector mem))); ! format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} ! ins_encode %{ ! int vector_len = 1; ! __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (SubVD src1 src2)); ! format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} ! ins_encode %{ ! int vector_len = 2; ! __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (SubVD src (LoadVector mem))); ! format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} ! ins_encode %{ ! int vector_len = 2; ! __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // --------------------------------- MUL -------------------------------------- ! ! // Shorts/Chars vector mul ! instruct vmul2S(vecS dst, vecS src) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (MulVS dst src)); ! format %{ "pmullw $dst,$src\t! mul packed2S" %} ! ins_encode %{ ! __ pmullw($dst$$XMMRegister, $src$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); ! match(Set dst (MulVS src1 src2)); ! format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); ! match(Set dst (MulVS src1 src2)); ! format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); ! match(Set dst (MulVS dst src2)); ! effect(TEMP src1); ! format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); ! match(Set dst (MulVS src (LoadVector mem))); ! format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); ! match(Set dst (MulVS src (LoadVector mem))); ! format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); ! match(Set dst (MulVS dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul4S(vecD dst, vecD src) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (MulVS dst src)); ! format %{ "pmullw $dst,$src\t! mul packed4S" %} ! ins_encode %{ ! __ pmullw($dst$$XMMRegister, $src$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); ! match(Set dst (MulVS src1 src2)); ! format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); ! match(Set dst (MulVS src1 src2)); ! format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); ! match(Set dst (MulVS dst src2)); ! effect(TEMP src1); ! format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); ! match(Set dst (MulVS src (LoadVector mem))); ! format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); ! match(Set dst (MulVS src (LoadVector mem))); ! format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); ! match(Set dst (MulVS dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul8S(vecX dst, vecX src) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (MulVS dst src)); ! format %{ "pmullw $dst,$src\t! mul packed8S" %} ins_encode %{ __ pmullw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); match(Set dst (MulVS src1 src2)); ! format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} ins_encode %{ int vector_len = 0; __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); ! match(Set dst (MulVS src1 src2)); ! format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} ins_encode %{ int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); ! match(Set dst (MulVS dst src2)); ! effect(TEMP src1); ! format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); ! match(Set dst (MulVS src (LoadVector mem))); ! format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} ins_encode %{ int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (MulVS src (LoadVector mem))); ! format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} ins_encode %{ int vector_len = 0; __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); ! match(Set dst (MulVS dst (LoadVector mem))); ! effect(TEMP src); ! format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); match(Set dst (MulVS src1 src2)); ! format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} ins_encode %{ ! int vector_len = 1; __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); ! match(Set dst (MulVS src1 src2)); ! format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); ! match(Set dst (MulVS dst src2)); ! effect(TEMP src1); format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} ins_encode %{ int vector_len = 1; __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); ! match(Set dst (MulVS src (LoadVector mem))); ! format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} ins_encode %{ int vector_len = 1; __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (MulVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} ins_encode %{ int vector_len = 2; __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} ins_encode %{ int vector_len = 2; __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
*** 7677,7878 **** instruct vsqrt4D_mem(vecY dst, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SqrtVD (LoadVector mem))); format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} ins_encode %{ ! int vector_len = 1; ! __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (SqrtVD src)); ! format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} ins_encode %{ ! int vector_len = 2; ! __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsqrt8D_mem(vecZ dst, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (SqrtVD (LoadVector mem))); ! format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} ins_encode %{ ! int vector_len = 2; ! __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! // ------------------------------ LeftShift ----------------------------------- ! ! // Shorts/Chars vector left shift ! instruct vsll2S(vecS dst, vecS shift) %{ ! predicate(n->as_Vector()->length() == 2); match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed2S" %} ins_encode %{ __ psllw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsll2S_imm(vecS dst, immI8 shift) %{ ! predicate(n->as_Vector()->length() == 2); match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed2S" %} ins_encode %{ __ psllw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ins_encode %{ int vector_len = 0; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ins_encode %{ int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll4S(vecD dst, vecS shift) %{ ! predicate(n->as_Vector()->length() == 4); match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed4S" %} ins_encode %{ ! __ psllw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsll4S_imm(vecD dst, immI8 shift) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed4S" %} ins_encode %{ ! __ psllw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ins_encode %{ int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ins_encode %{ int vector_len = 0; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll8S(vecX dst, vecS shift) %{ ! predicate(n->as_Vector()->length() == 8); ! match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed8S" %} ins_encode %{ ! __ psllw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsll8S_imm(vecX dst, immI8 shift) %{ ! predicate(n->as_Vector()->length() == 8); ! match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed8S" %} ins_encode %{ ! __ psllw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ ! int vector_len = 0; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ ! int vector_len = 0; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ int vector_len = 1; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); ! match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ int vector_len = 1; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); --- 8698,9083 ---- instruct vsqrt4D_mem(vecY dst, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SqrtVD (LoadVector mem))); format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} ins_encode %{ ! int vector_len = 1; ! __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (SqrtVD src)); ! format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} ! ins_encode %{ ! int vector_len = 2; ! __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsqrt8D_mem(vecZ dst, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (SqrtVD (LoadVector mem))); ! format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} ! ins_encode %{ ! int vector_len = 2; ! __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // ------------------------------ LeftShift ----------------------------------- ! ! // Shorts/Chars vector left shift ! instruct vsll2S(vecS dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed2S" %} ! ins_encode %{ ! __ psllw($dst$$XMMRegister, $shift$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll2S_imm(vecS dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed2S" %} ! ins_encode %{ ! __ psllw($dst$$XMMRegister, (int)$shift$$constant); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll4S(vecD dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed4S" %} ! ins_encode %{ ! __ psllw($dst$$XMMRegister, $shift$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll4S_imm(vecD dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed4S" %} ! ins_encode %{ ! __ psllw($dst$$XMMRegister, (int)$shift$$constant); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll8S(vecX dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed8S" %} ins_encode %{ __ psllw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsll8S_imm(vecX dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed8S" %} ins_encode %{ __ psllw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); match(Set dst (LShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); ! match(Set dst (LShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); ! match(Set dst (LShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ ! int vector_len = 1; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ ! int vector_len = 1; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ int vector_len = 1; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); ! match(Set dst (LShiftVS dst shift)); ! effect(TEMP src); format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ int vector_len = 1; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
*** 8077,8271 **** instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (LShiftVL src shift)); format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} ins_encode %{ ! int vector_len = 2; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (LShiftVL src shift)); ! format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} ins_encode %{ ! int vector_len = 2; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! // ----------------------- LogicalRightShift ----------------------------------- ! ! // Shorts vector logical right shift produces incorrect Java result ! // for negative data because java code convert short value into int with ! // sign extension before a shift. But char vectors are fine since chars are ! // unsigned values. ! ! instruct vsrl2S(vecS dst, vecS shift) %{ ! predicate(n->as_Vector()->length() == 2); match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} ins_encode %{ __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsrl2S_imm(vecS dst, immI8 shift) %{ ! predicate(n->as_Vector()->length() == 2); match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} ins_encode %{ __ psrlw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ins_encode %{ int vector_len = 0; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ins_encode %{ int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl4S(vecD dst, vecS shift) %{ ! predicate(n->as_Vector()->length() == 4); match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} ins_encode %{ ! __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsrl4S_imm(vecD dst, immI8 shift) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} ins_encode %{ ! __ psrlw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ins_encode %{ int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ins_encode %{ int vector_len = 0; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl8S(vecX dst, vecS shift) %{ ! predicate(n->as_Vector()->length() == 8); ! match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} ins_encode %{ ! __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsrl8S_imm(vecX dst, immI8 shift) %{ ! predicate(n->as_Vector()->length() == 8); ! match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} ins_encode %{ ! __ psrlw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ ! int vector_len = 0; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ ! int vector_len = 0; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ int vector_len = 1; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); ! match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ int vector_len = 1; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); --- 9282,9660 ---- instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (LShiftVL src shift)); format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} ins_encode %{ ! int vector_len = 2; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (LShiftVL src shift)); ! format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // ----------------------- LogicalRightShift ----------------------------------- ! ! // Shorts vector logical right shift produces incorrect Java result ! // for negative data because java code convert short value into int with ! // sign extension before a shift. But char vectors are fine since chars are ! // unsigned values. ! ! instruct vsrl2S(vecS dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} ! ins_encode %{ ! __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl2S_imm(vecS dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} ! ins_encode %{ ! __ psrlw($dst$$XMMRegister, (int)$shift$$constant); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl4S(vecD dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} ! ins_encode %{ ! __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl4S_imm(vecD dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} ! ins_encode %{ ! __ psrlw($dst$$XMMRegister, (int)$shift$$constant); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl8S(vecX dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} ins_encode %{ __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsrl8S_imm(vecX dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} ins_encode %{ __ psrlw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); match(Set dst (URShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); ! match(Set dst (URShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); ! match(Set dst (URShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ ! int vector_len = 1; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ ! int vector_len = 1; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ int vector_len = 1; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); ! match(Set dst (URShiftVS dst shift)); ! effect(TEMP src); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ int vector_len = 1; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
*** 8491,8501 **** // ------------------- ArithmeticRightShift ----------------------------------- // Shorts/Chars vector arithmetic right shift instruct vsra2S(vecS dst, vecS shift) %{ ! predicate(n->as_Vector()->length() == 2); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ __ psraw($dst$$XMMRegister, $shift$$XMMRegister); %} --- 9880,9890 ---- // ------------------- ArithmeticRightShift ----------------------------------- // Shorts/Chars vector arithmetic right shift instruct vsra2S(vecS dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ __ psraw($dst$$XMMRegister, $shift$$XMMRegister); %}
*** 8510,8660 **** __ psraw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra4S(vecD dst, vecS shift) %{ ! predicate(n->as_Vector()->length() == 4); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ __ psraw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsra4S_imm(vecD dst, immI8 shift) %{ ! predicate(n->as_Vector()->length() == 4); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ __ psraw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra8S(vecX dst, vecS shift) %{ ! predicate(n->as_Vector()->length() == 8); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ __ psraw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsra8S_imm(vecX dst, immI8 shift) %{ ! predicate(n->as_Vector()->length() == 8); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ __ psraw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ins_encode %{ int vector_len = 1; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ins_encode %{ int vector_len = 1; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); --- 9899,10233 ---- __ psraw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); ! match(Set dst (RShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vsra4S(vecD dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ __ psraw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsra4S_imm(vecD dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ __ psraw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); ! match(Set dst (RShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vsra8S(vecX dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ __ psraw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsra8S_imm(vecX dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ __ psraw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); ! match(Set dst (RShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); ! match(Set dst (RShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ins_encode %{ int vector_len = 1; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ ! predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); ! match(Set dst (RShiftVS dst shift)); ! effect(TEMP src); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ ! predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ins_encode %{ int vector_len = 1; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} + instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); + %} + instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
< prev index next >