< prev index next >
src/cpu/x86/vm/x86.ad
Print this page
@@ -1714,10 +1714,40 @@
}
return ret_value; // Per default match rules are supported.
}
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+ // identify extra cases that we might want to provide match rules for
+ // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
+ bool ret_value = match_rule_supported(opcode);
+ if (ret_value) {
+ switch (opcode) {
+ case Op_AddVB:
+ case Op_SubVB:
+ if ((vlen == 64) && (VM_Version::supports_avx512bw() == false))
+ ret_value = false;
+ break;
+ case Op_URShiftVS:
+ case Op_RShiftVS:
+ case Op_LShiftVS:
+ case Op_MulVS:
+ case Op_AddVS:
+ case Op_SubVS:
+ if ((vlen == 32) && (VM_Version::supports_avx512bw() == false))
+ ret_value = false;
+ break;
+ case Op_CMoveVD:
+ if (vlen != 4)
+ ret_value = false;
+ break;
+ }
+ }
+
+ return ret_value; // Per default match rules are supported.
+}
+
const int Matcher::float_pressure(int default_pressure_threshold) {
int float_pressure_threshold = default_pressure_threshold;
#ifdef _LP64
if (UseAVX > 2) {
// Increase pressure threshold on machines with AVX3 which have
@@ -1757,15 +1787,13 @@
case T_CHAR:
if (size < 4) return 0;
break;
case T_BYTE:
if (size < 4) return 0;
- if ((size > 32) && !VM_Version::supports_avx512bw()) return 0;
break;
case T_SHORT:
if (size < 4) return 0;
- if ((size > 16) && !VM_Version::supports_avx512bw()) return 0;
break;
default:
ShouldNotReachHere();
}
return size;
@@ -1965,31 +1993,38 @@
#endif
}
bool is_single_byte = false;
int vec_len = 0;
if ((UseAVX > 2) && (stack_offset != 0)) {
+ int tuple_type = Assembler::EVEX_FVM;
+ int input_size = Assembler::EVEX_32bit;
switch (ireg) {
case Op_VecS:
+ tuple_type = Assembler::EVEX_T1S;
+ break;
case Op_VecD:
+ tuple_type = Assembler::EVEX_T1S;
+ input_size = Assembler::EVEX_64bit;
+ break;
case Op_VecX:
break;
case Op_VecY:
vec_len = 1;
break;
case Op_VecZ:
vec_len = 2;
break;
}
- is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, Assembler::EVEX_FVM, Assembler::EVEX_32bit, 0);
+ is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0);
}
int offset_size = 0;
int size = 5;
if (UseAVX > 2 ) {
- if ((VM_Version::supports_avx512vl() == false) && (vec_len == 2)) {
+ if (VM_Version::supports_avx512novl() && (vec_len == 2)) {
offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
size += 2; // Need an additional two bytes for EVEX encoding
- } else if ((VM_Version::supports_avx512vl() == false) && (vec_len < 2)) {
+ } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) {
offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
} else {
offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
size += 2; // Need an additional two bytes for EVEX encodding
}
@@ -2709,21 +2744,63 @@
%}
ins_pipe(pipe_slow);
%}
instruct absF_reg_reg(regF dst, regF src) %{
- predicate(UseAVX > 0);
+ predicate(VM_Version::supports_avx256only());
+ match(Set dst (AbsF src));
+ ins_cost(150);
+ format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vandps($dst$$XMMRegister, $src$$XMMRegister,
+ ExternalAddress(float_signmask()), vector_len);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+#ifdef _LP64
+instruct absF_reg_reg_evex(regF dst, regF src) %{
+ predicate(UseAVX > 2 && VM_Version::supports_avx512vl());
+ match(Set dst (AbsF src));
+ ins_cost(150);
+ format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vandps($dst$$XMMRegister, $src$$XMMRegister,
+ ExternalAddress(float_signmask()), vector_len);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{
+ predicate(VM_Version::supports_avx512novl());
+ match(Set dst (AbsF src1));
+ effect(TEMP src2);
+ ins_cost(150);
+ format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
+ ExternalAddress(float_signmask()), vector_len);
+ %}
+ ins_pipe(pipe_slow);
+%}
+#else // _LP64
+instruct absF_reg_reg_evex(regF dst, regF src) %{
+ predicate(UseAVX > 2);
match(Set dst (AbsF src));
ins_cost(150);
format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
ins_encode %{
int vector_len = 0;
__ vandps($dst$$XMMRegister, $src$$XMMRegister,
ExternalAddress(float_signmask()), vector_len);
%}
ins_pipe(pipe_slow);
%}
+#endif
instruct absD_reg(regD dst) %{
predicate((UseSSE>=2) && (UseAVX == 0));
match(Set dst (AbsD dst));
ins_cost(150);
@@ -2734,11 +2811,54 @@
%}
ins_pipe(pipe_slow);
%}
instruct absD_reg_reg(regD dst, regD src) %{
- predicate(UseAVX > 0);
+ predicate(VM_Version::supports_avx256only());
+ match(Set dst (AbsD src));
+ ins_cost(150);
+ format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
+ "# abs double by sign masking" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
+ ExternalAddress(double_signmask()), vector_len);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+#ifdef _LP64
+instruct absD_reg_reg_evex(regD dst, regD src) %{
+ predicate(UseAVX > 2 && VM_Version::supports_avx512vl());
+ match(Set dst (AbsD src));
+ ins_cost(150);
+ format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
+ "# abs double by sign masking" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
+ ExternalAddress(double_signmask()), vector_len);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{
+ predicate(VM_Version::supports_avx512novl());
+ match(Set dst (AbsD src1));
+ effect(TEMP src2);
+ ins_cost(150);
+ format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
+ ExternalAddress(double_signmask()), vector_len);
+ %}
+ ins_pipe(pipe_slow);
+%}
+#else // _LP64
+instruct absD_reg_reg_evex(regD dst, regD src) %{
+ predicate(UseAVX > 2);
match(Set dst (AbsD src));
ins_cost(150);
format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
"# abs double by sign masking" %}
ins_encode %{
@@ -2746,10 +2866,11 @@
__ vandpd($dst$$XMMRegister, $src$$XMMRegister,
ExternalAddress(double_signmask()), vector_len);
%}
ins_pipe(pipe_slow);
%}
+#endif
instruct negF_reg(regF dst) %{
predicate((UseSSE>=1) && (UseAVX == 0));
match(Set dst (NegF dst));
ins_cost(150);
@@ -4552,11 +4673,11 @@
%}
ins_pipe( pipe_slow );
%}
instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
- predicate(UseAVX > 0 && UseAVX < 3);
+ predicate(VM_Version::supports_avx256only());
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vphaddd $tmp,$src2,$src2\n\t"
"movd $tmp2,$src1\n\t"
"vpaddd $tmp2,$tmp2,$tmp\n\t"
@@ -4611,11 +4732,11 @@
%}
ins_pipe( pipe_slow );
%}
instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
- predicate(UseAVX > 0 && UseAVX < 3);
+ predicate(VM_Version::supports_avx256only());
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vphaddd $tmp,$src2,$src2\n\t"
"vphaddd $tmp,$tmp,$tmp2\n\t"
"movd $tmp2,$src1\n\t"
@@ -4655,11 +4776,11 @@
%}
ins_pipe( pipe_slow );
%}
instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
- predicate(UseAVX > 0 && UseAVX < 3);
+ predicate(VM_Version::supports_avx256only());
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vphaddd $tmp,$src2,$src2\n\t"
"vphaddd $tmp,$tmp,$tmp2\n\t"
"vextracti128 $tmp2,$tmp\n\t"
@@ -4710,11 +4831,11 @@
instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
- format %{ "vextracti64x4 $tmp3,$src2\n\t"
+ format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t"
"vpaddd $tmp3,$tmp3,$src2\n\t"
"vextracti128 $tmp,$tmp3\n\t"
"vpaddd $tmp,$tmp,$tmp3\n\t"
"pshufd $tmp2,$tmp,0xE\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t"
@@ -4722,11 +4843,11 @@
"vpaddd $tmp,$tmp,$tmp2\n\t"
"movd $tmp2,$src1\n\t"
"vpaddd $tmp2,$tmp,$tmp2\n\t"
"movd $dst,$tmp2\t! mul reduction16I" %}
ins_encode %{
- __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
+ __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
__ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
__ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
@@ -4761,19 +4882,19 @@
instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
- format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t"
+ format %{ "vextracti128 $tmp,$src2\n\t"
"vpaddq $tmp2,$tmp,$src2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"movdq $tmp,$src1\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! add reduction4L" %}
ins_encode %{
- __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
__ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ movdq($tmp$$XMMRegister, $src1$$Register);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -4784,21 +4905,21 @@
instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
- format %{ "vextracti64x4 $tmp2,$src2\n\t"
+ format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t"
"vpaddq $tmp2,$tmp2,$src2\n\t"
"vextracti128 $tmp,$tmp2\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"movdq $tmp,$src1\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! add reduction8L" %}
ins_encode %{
- __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
+ __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
__ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -4808,294 +4929,284 @@
%}
ins_pipe( pipe_slow );
%}
#endif
-instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
- match(Set dst (AddReductionVF src1 src2));
- effect(TEMP tmp, TEMP tmp2);
- format %{ "movdqu $tmp,$src1\n\t"
- "addss $tmp,$src2\n\t"
- "pshufd $tmp2,$src2,0x01\n\t"
- "addss $tmp,$tmp2\n\t"
- "movdqu $dst,$tmp\t! add reduction2F" %}
+ match(Set dst (AddReductionVF dst src2));
+ effect(TEMP dst, TEMP tmp);
+ format %{ "addss $dst,$src2\n\t"
+ "pshufd $tmp,$src2,0x01\n\t"
+ "addss $dst,$tmp\t! add reduction2F" %}
ins_encode %{
- __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
- __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
- __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
- __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
- __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
+ __ addss($dst$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+ __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
predicate(UseAVX > 0);
- match(Set dst (AddReductionVF src1 src2));
- effect(TEMP tmp2, TEMP tmp);
- format %{ "vaddss $tmp2,$src1,$src2\n\t"
+ match(Set dst (AddReductionVF dst src2));
+ effect(TEMP dst, TEMP tmp);
+ format %{ "vaddss $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
- "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %}
+ "vaddss $dst,$dst,$tmp\t! add reduction2F" %}
ins_encode %{
- __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
- __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
- match(Set dst (AddReductionVF src1 src2));
- effect(TEMP tmp, TEMP tmp2);
- format %{ "movdqu $tmp,$src1\n\t"
- "addss $tmp,$src2\n\t"
- "pshufd $tmp2,$src2,0x01\n\t"
- "addss $tmp,$tmp2\n\t"
- "pshufd $tmp2,$src2,0x02\n\t"
- "addss $tmp,$tmp2\n\t"
- "pshufd $tmp2,$src2,0x03\n\t"
- "addss $tmp,$tmp2\n\t"
- "movdqu $dst,$tmp\t! add reduction4F" %}
- ins_encode %{
- __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
- __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
- __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
- __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
- __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
- __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
- __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
- __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
- __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
+ match(Set dst (AddReductionVF dst src2));
+ effect(TEMP dst, TEMP tmp);
+ format %{ "addss $dst,$src2\n\t"
+ "pshufd $tmp,$src2,0x01\n\t"
+ "addss $dst,$tmp\n\t"
+ "pshufd $tmp,$src2,0x02\n\t"
+ "addss $dst,$tmp\n\t"
+ "pshufd $tmp,$src2,0x03\n\t"
+ "addss $dst,$tmp\t! add reduction4F" %}
+ ins_encode %{
+ __ addss($dst$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+ __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
+ __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
+ __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
predicate(UseAVX > 0);
- match(Set dst (AddReductionVF src1 src2));
- effect(TEMP tmp, TEMP tmp2);
- format %{ "vaddss $tmp2,$src1,$src2\n\t"
+ match(Set dst (AddReductionVF dst src2));
+ effect(TEMP tmp, TEMP dst);
+ format %{ "vaddss $dst,dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x02\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
- "vaddss $dst,$tmp2,$tmp\t! add reduction4F" %}
+ "vaddss $dst,$dst,$tmp\t! add reduction4F" %}
ins_encode %{
- __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
- __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
+instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
predicate(UseAVX > 0);
- match(Set dst (AddReductionVF src1 src2));
- effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
- format %{ "vaddss $tmp2,$src1,$src2\n\t"
+ match(Set dst (AddReductionVF dst src2));
+ effect(TEMP tmp, TEMP dst, TEMP tmp2);
+ format %{ "vaddss $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x02\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
- "vextractf128 $tmp3,$src2\n\t"
- "vaddss $tmp2,$tmp2,$tmp3\n\t"
- "pshufd $tmp,$tmp3,0x01\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
- "pshufd $tmp,$tmp3,0x02\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
- "pshufd $tmp,$tmp3,0x03\n\t"
- "vaddss $dst,$tmp2,$tmp\t! add reduction8F" %}
+ "vaddss $dst,$dst,$tmp\n\t"
+ "vextractf128 $tmp2,$src2\n\t"
+ "vaddss $dst,$dst,$tmp2\n\t"
+ "pshufd $tmp,$tmp2,0x01\n\t"
+ "vaddss $dst,$dst,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0x02\n\t"
+ "vaddss $dst,$dst,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0x03\n\t"
+ "vaddss $dst,$dst,$tmp\t! add reduction8F" %}
ins_encode %{
- __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
- __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
+instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
predicate(UseAVX > 2);
- match(Set dst (AddReductionVF src1 src2));
- effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
- format %{ "vaddss $tmp2,$src1,$src2\n\t"
+ match(Set dst (AddReductionVF dst src2));
+ effect(TEMP tmp, TEMP dst, TEMP tmp2);
+ format %{ "vaddss $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x02\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
- "vextractf64x2 $tmp3,$src2, 0x1\n\t"
- "vaddss $tmp2,$tmp2,$tmp3\n\t"
- "pshufd $tmp,$tmp3,0x01\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
- "pshufd $tmp,$tmp3,0x02\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
- "pshufd $tmp,$tmp3,0x03\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
- "vextractf64x2 $tmp3,$src2, 0x2\n\t"
- "vaddss $tmp2,$tmp2,$tmp3\n\t"
- "pshufd $tmp,$tmp3,0x01\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
- "pshufd $tmp,$tmp3,0x02\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
- "pshufd $tmp,$tmp3,0x03\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
- "vextractf64x2 $tmp3,$src2, 0x3\n\t"
- "vaddss $tmp2,$tmp2,$tmp3\n\t"
- "pshufd $tmp,$tmp3,0x01\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
- "pshufd $tmp,$tmp3,0x02\n\t"
- "vaddss $tmp2,$tmp2,$tmp\n\t"
- "pshufd $tmp,$tmp3,0x03\n\t"
- "vaddss $dst,$tmp2,$tmp\t! add reduction16F" %}
+ "vaddss $dst,$dst,$tmp\n\t"
+ "vextractf32x4 $tmp2,$src2, 0x1\n\t"
+ "vaddss $dst,$dst,$tmp2\n\t"
+ "pshufd $tmp,$tmp2,0x01\n\t"
+ "vaddss $dst,$dst,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0x02\n\t"
+ "vaddss $dst,$dst,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0x03\n\t"
+ "vaddss $dst,$dst,$tmp\n\t"
+ "vextractf32x4 $tmp2,$src2, 0x2\n\t"
+ "vaddss $dst,$dst,$tmp2\n\t"
+ "pshufd $tmp,$tmp2,0x01\n\t"
+ "vaddss $dst,$dst,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0x02\n\t"
+ "vaddss $dst,$dst,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0x03\n\t"
+ "vaddss $dst,$dst,$tmp\n\t"
+ "vextractf32x4 $tmp2,$src2, 0x3\n\t"
+ "vaddss $dst,$dst,$tmp2\n\t"
+ "pshufd $tmp,$tmp2,0x01\n\t"
+ "vaddss $dst,$dst,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0x02\n\t"
+ "vaddss $dst,$dst,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0x03\n\t"
+ "vaddss $dst,$dst,$tmp\t! add reduction16F" %}
ins_encode %{
- __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
- __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
- __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
+instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
- match(Set dst (AddReductionVD src1 src2));
+ match(Set dst (AddReductionVD dst src2));
effect(TEMP tmp, TEMP dst);
- format %{ "movdqu $tmp,$src1\n\t"
- "addsd $tmp,$src2\n\t"
- "pshufd $dst,$src2,0xE\n\t"
+ format %{ "addsd $dst,$src2\n\t"
+ "pshufd $tmp,$src2,0xE\n\t"
"addsd $dst,$tmp\t! add reduction2D" %}
ins_encode %{
- __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
- __ addsd($tmp$$XMMRegister, $src2$$XMMRegister);
- __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
+ __ addsd($dst$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
+instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
predicate(UseAVX > 0);
- match(Set dst (AddReductionVD src1 src2));
- effect(TEMP tmp, TEMP tmp2);
- format %{ "vaddsd $tmp2,$src1,$src2\n\t"
+ match(Set dst (AddReductionVD dst src2));
+ effect(TEMP tmp, TEMP dst);
+ format %{ "vaddsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
- "vaddsd $dst,$tmp2,$tmp\t! add reduction2D" %}
+ "vaddsd $dst,$dst,$tmp\t! add reduction2D" %}
ins_encode %{
- __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
- __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
+instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
predicate(UseAVX > 0);
- match(Set dst (AddReductionVD src1 src2));
- effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
- format %{ "vaddsd $tmp2,$src1,$src2\n\t"
+ match(Set dst (AddReductionVD dst src2));
+ effect(TEMP tmp, TEMP dst, TEMP tmp2);
+ format %{ "vaddsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
- "vaddsd $tmp2,$tmp2,$tmp\n\t"
- "vextractf128 $tmp3,$src2\n\t"
- "vaddsd $tmp2,$tmp2,$tmp3\n\t"
- "pshufd $tmp,$tmp3,0xE\n\t"
- "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %}
+ "vaddsd $dst,$dst,$tmp\n\t"
+ "vextractf32x4h $tmp2,$src2, 0x1\n\t"
+ "vaddsd $dst,$dst,$tmp2\n\t"
+ "pshufd $tmp,$tmp2,0xE\n\t"
+ "vaddsd $dst,$dst,$tmp\t! add reduction4D" %}
ins_encode %{
- __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
- __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
- __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
- __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
+instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
predicate(UseAVX > 2);
- match(Set dst (AddReductionVD src1 src2));
- effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
- format %{ "vaddsd $tmp2,$src1,$src2\n\t"
+ match(Set dst (AddReductionVD dst src2));
+ effect(TEMP tmp, TEMP dst, TEMP tmp2);
+ format %{ "vaddsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
- "vaddsd $tmp2,$tmp2,$tmp\n\t"
- "vextractf64x2 $tmp3,$src2, 0x1\n\t"
- "vaddsd $tmp2,$tmp2,$tmp3\n\t"
- "pshufd $tmp,$tmp3,0xE\n\t"
- "vaddsd $tmp2,$tmp2,$tmp\n\t"
- "vextractf64x2 $tmp3,$src2, 0x2\n\t"
- "vaddsd $tmp2,$tmp2,$tmp3\n\t"
- "pshufd $tmp,$tmp3,0xE\n\t"
- "vaddsd $tmp2,$tmp2,$tmp\n\t"
- "vextractf64x2 $tmp3,$src2, 0x3\n\t"
- "vaddsd $tmp2,$tmp2,$tmp3\n\t"
- "pshufd $tmp,$tmp3,0xE\n\t"
- "vaddsd $dst,$tmp2,$tmp\t! add reduction8D" %}
+ "vaddsd $dst,$dst,$tmp\n\t"
+ "vextractf32x4 $tmp2,$src2, 0x1\n\t"
+ "vaddsd $dst,$dst,$tmp2\n\t"
+ "pshufd $tmp,$tmp2,0xE\n\t"
+ "vaddsd $dst,$dst,$tmp\n\t"
+ "vextractf32x4 $tmp2,$src2, 0x2\n\t"
+ "vaddsd $dst,$dst,$tmp2\n\t"
+ "pshufd $tmp,$tmp2,0xE\n\t"
+ "vaddsd $dst,$dst,$tmp\n\t"
+ "vextractf32x4 $tmp2,$src2, 0x3\n\t"
+ "vaddsd $dst,$dst,$tmp2\n\t"
+ "pshufd $tmp,$tmp2,0xE\n\t"
+ "vaddsd $dst,$dst,$tmp\t! add reduction8D" %}
ins_encode %{
- __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
- __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
- __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
- __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
- __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
- __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
- __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
- __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
@@ -5214,11 +5325,11 @@
instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
predicate(UseAVX > 2);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
- format %{ "vextracti64x4 $tmp3,$src2\n\t"
+ format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t"
"vpmulld $tmp3,$tmp3,$src2\n\t"
"vextracti128 $tmp,$tmp3\n\t"
"vpmulld $tmp,$tmp,$src2\n\t"
"pshufd $tmp2,$tmp,0xE\n\t"
"vpmulld $tmp,$tmp,$tmp2\n\t"
@@ -5226,11 +5337,11 @@
"vpmulld $tmp,$tmp,$tmp2\n\t"
"movd $tmp2,$src1\n\t"
"vpmulld $tmp2,$tmp,$tmp2\n\t"
"movd $dst,$tmp2\t! mul reduction16I" %}
ins_encode %{
- __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
+ __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
__ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
__ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
@@ -5265,19 +5376,19 @@
instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
match(Set dst (MulReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
- format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t"
+ format %{ "vextracti128 $tmp,$src2\n\t"
"vpmullq $tmp2,$tmp,$src2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"movdq $tmp,$src1\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! mul reduction4L" %}
ins_encode %{
- __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
__ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ movdq($tmp$$XMMRegister, $src1$$Register);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -5288,21 +5399,21 @@
instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
match(Set dst (MulReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
- format %{ "vextracti64x4 $tmp2,$src2\n\t"
+ format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t"
"vpmullq $tmp2,$tmp2,$src2\n\t"
"vextracti128 $tmp,$tmp2\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"movdq $tmp,$src1\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! mul reduction8L" %}
ins_encode %{
- __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
+ __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
__ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -5312,1231 +5423,1957 @@
%}
ins_pipe( pipe_slow );
%}
#endif
-instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
- match(Set dst (MulReductionVF src1 src2));
- effect(TEMP tmp, TEMP tmp2);
- format %{ "movdqu $tmp,$src1\n\t"
- "mulss $tmp,$src2\n\t"
- "pshufd $tmp2,$src2,0x01\n\t"
- "mulss $tmp,$tmp2\n\t"
- "movdqu $dst,$tmp\t! mul reduction2F" %}
+ match(Set dst (MulReductionVF dst src2));
+ effect(TEMP dst, TEMP tmp);
+ format %{ "mulss $dst,$src2\n\t"
+ "pshufd $tmp,$src2,0x01\n\t"
+ "mulss $dst,$tmp\t! mul reduction2F" %}
ins_encode %{
- __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
- __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
- __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
- __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
- __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
+ __ mulss($dst$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+ __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
predicate(UseAVX > 0);
- match(Set dst (MulReductionVF src1 src2));
- effect(TEMP tmp, TEMP tmp2);
- format %{ "vmulss $tmp2,$src1,$src2\n\t"
+ match(Set dst (MulReductionVF dst src2));
+ effect(TEMP tmp, TEMP dst);
+ format %{ "vmulss $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
- "vmulss $dst,$tmp2,$tmp\t! mul reduction2F" %}
+ "vmulss $dst,$dst,$tmp\t! mul reduction2F" %}
ins_encode %{
- __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
- __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
- match(Set dst (MulReductionVF src1 src2));
- effect(TEMP tmp, TEMP tmp2);
- format %{ "movdqu $tmp,$src1\n\t"
- "mulss $tmp,$src2\n\t"
- "pshufd $tmp2,$src2,0x01\n\t"
- "mulss $tmp,$tmp2\n\t"
- "pshufd $tmp2,$src2,0x02\n\t"
- "mulss $tmp,$tmp2\n\t"
- "pshufd $tmp2,$src2,0x03\n\t"
- "mulss $tmp,$tmp2\n\t"
- "movdqu $dst,$tmp\t! mul reduction4F" %}
- ins_encode %{
- __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
- __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
- __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
- __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
- __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
- __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
- __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
- __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
- __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
+ match(Set dst (MulReductionVF dst src2));
+ effect(TEMP dst, TEMP tmp);
+ format %{ "mulss $dst,$src2\n\t"
+ "pshufd $tmp,$src2,0x01\n\t"
+ "mulss $dst,$tmp\n\t"
+ "pshufd $tmp,$src2,0x02\n\t"
+ "mulss $dst,$tmp\n\t"
+ "pshufd $tmp,$src2,0x03\n\t"
+ "mulss $dst,$tmp\t! mul reduction4F" %}
+ ins_encode %{
+ __ mulss($dst$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+ __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
+ __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
+ __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
predicate(UseAVX > 0);
- match(Set dst (MulReductionVF src1 src2));
- effect(TEMP tmp, TEMP tmp2);
- format %{ "vmulss $tmp2,$src1,$src2\n\t"
+ match(Set dst (MulReductionVF dst src2));
+ effect(TEMP tmp, TEMP dst);
+ format %{ "vmulss $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x02\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
- "vmulss $dst,$tmp2,$tmp\t! mul reduction4F" %}
+ "vmulss $dst,$dst,$tmp\t! mul reduction4F" %}
ins_encode %{
- __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
- __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
+instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
predicate(UseAVX > 0);
- match(Set dst (MulReductionVF src1 src2));
- effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
- format %{ "vmulss $tmp2,$src1,$src2\n\t"
+ match(Set dst (MulReductionVF dst src2));
+ effect(TEMP tmp, TEMP dst, TEMP tmp2);
+ format %{ "vmulss $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x02\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
- "vextractf128 $tmp3,$src2\n\t"
- "vmulss $tmp2,$tmp2,$tmp3\n\t"
- "pshufd $tmp,$tmp3,0x01\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
- "pshufd $tmp,$tmp3,0x02\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
- "pshufd $tmp,$tmp3,0x03\n\t"
- "vmulss $dst,$tmp2,$tmp\t! mul reduction8F" %}
+ "vmulss $dst,$dst,$tmp\n\t"
+ "vextractf128 $tmp2,$src2\n\t"
+ "vmulss $dst,$dst,$tmp2\n\t"
+ "pshufd $tmp,$tmp2,0x01\n\t"
+ "vmulss $dst,$dst,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0x02\n\t"
+ "vmulss $dst,$dst,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0x03\n\t"
+ "vmulss $dst,$dst,$tmp\t! mul reduction8F" %}
ins_encode %{
- __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
- __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
+instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
predicate(UseAVX > 2);
- match(Set dst (MulReductionVF src1 src2));
- effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
- format %{ "vmulss $tmp2,$src1,$src2\n\t"
+ match(Set dst (MulReductionVF dst src2));
+ effect(TEMP tmp, TEMP dst, TEMP tmp2);
+ format %{ "vmulss $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x02\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
- "vextractf32x4 $tmp3,$src2, 0x1\n\t"
- "vmulss $tmp2,$tmp2,$tmp3\n\t"
- "pshufd $tmp,$tmp3,0x01\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
- "pshufd $tmp,$tmp3,0x02\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
- "pshufd $tmp,$tmp3,0x03\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
- "vextractf32x4 $tmp3,$src2, 0x2\n\t"
- "vmulss $tmp2,$tmp2,$tmp3\n\t"
- "pshufd $tmp,$tmp3,0x01\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
- "pshufd $tmp,$tmp3,0x02\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
- "pshufd $tmp,$tmp3,0x03\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
- "vextractf32x4 $tmp3,$src2, 0x3\n\t"
- "vmulss $tmp2,$tmp2,$tmp3\n\t"
- "pshufd $tmp,$tmp3,0x01\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
- "pshufd $tmp,$tmp3,0x02\n\t"
- "vmulss $tmp2,$tmp2,$tmp\n\t"
- "pshufd $tmp,$tmp3,0x03\n\t"
- "vmulss $dst,$tmp2,$tmp\t! mul reduction16F" %}
+ "vmulss $dst,$dst,$tmp\n\t"
+ "vextractf32x4 $tmp2,$src2, 0x1\n\t"
+ "vmulss $dst,$dst,$tmp2\n\t"
+ "pshufd $tmp,$tmp2,0x01\n\t"
+ "vmulss $dst,$dst,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0x02\n\t"
+ "vmulss $dst,$dst,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0x03\n\t"
+ "vmulss $dst,$dst,$tmp\n\t"
+ "vextractf32x4 $tmp2,$src2, 0x2\n\t"
+ "vmulss $dst,$dst,$tmp2\n\t"
+ "pshufd $tmp,$tmp2,0x01\n\t"
+ "vmulss $dst,$dst,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0x02\n\t"
+ "vmulss $dst,$dst,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0x03\n\t"
+ "vmulss $dst,$dst,$tmp\n\t"
+ "vextractf32x4 $tmp2,$src2, 0x3\n\t"
+ "vmulss $dst,$dst,$tmp2\n\t"
+ "pshufd $tmp,$tmp2,0x01\n\t"
+ "vmulss $dst,$dst,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0x02\n\t"
+ "vmulss $dst,$dst,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0x03\n\t"
+ "vmulss $dst,$dst,$tmp\t! mul reduction16F" %}
ins_encode %{
- __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
- __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
- __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
+instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
- match(Set dst (MulReductionVD src1 src2));
- effect(TEMP tmp, TEMP dst);
- format %{ "movdqu $tmp,$src1\n\t"
- "mulsd $tmp,$src2\n\t"
- "pshufd $dst,$src2,0xE\n\t"
+ match(Set dst (MulReductionVD dst src2));
+ effect(TEMP dst, TEMP tmp);
+ format %{ "mulsd $dst,$src2\n\t"
+ "pshufd $tmp,$src2,0xE\n\t"
"mulsd $dst,$tmp\t! mul reduction2D" %}
ins_encode %{
- __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
- __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister);
- __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
+ __ mulsd($dst$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
+instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
predicate(UseAVX > 0);
- match(Set dst (MulReductionVD src1 src2));
- effect(TEMP tmp, TEMP tmp2);
- format %{ "vmulsd $tmp2,$src1,$src2\n\t"
+ match(Set dst (MulReductionVD dst src2));
+ effect(TEMP tmp, TEMP dst);
+ format %{ "vmulsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
- "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %}
+ "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %}
ins_encode %{
- __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
- __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
+instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
predicate(UseAVX > 0);
- match(Set dst (MulReductionVD src1 src2));
- effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
- format %{ "vmulsd $tmp2,$src1,$src2\n\t"
+ match(Set dst (MulReductionVD dst src2));
+ effect(TEMP tmp, TEMP dst, TEMP tmp2);
+ format %{ "vmulsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
- "vmulsd $tmp2,$tmp2,$tmp\n\t"
- "vextractf128 $tmp3,$src2\n\t"
- "vmulsd $tmp2,$tmp2,$tmp3\n\t"
- "pshufd $tmp,$tmp3,0xE\n\t"
- "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %}
+ "vmulsd $dst,$dst,$tmp\n\t"
+ "vextractf128 $tmp2,$src2\n\t"
+ "vmulsd $dst,$dst,$tmp2\n\t"
+ "pshufd $tmp,$tmp2,0xE\n\t"
+ "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %}
ins_encode %{
- __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
- __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
- __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
- __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
+ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
+instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
predicate(UseAVX > 2);
- match(Set dst (MulReductionVD src1 src2));
- effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
- format %{ "vmulsd $tmp2,$src1,$src2\n\t"
+ match(Set dst (MulReductionVD dst src2));
+ effect(TEMP tmp, TEMP dst, TEMP tmp2);
+ format %{ "vmulsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
- "vmulsd $tmp2,$tmp2,$tmp\n\t"
- "vextractf64x2 $tmp3,$src2, 0x1\n\t"
- "vmulsd $tmp2,$tmp2,$tmp3\n\t"
+ "vmulsd $dst,$dst,$tmp\n\t"
+ "vextractf32x4 $tmp2,$src2, 0x1\n\t"
+ "vmulsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
- "vmulsd $tmp2,$tmp2,$tmp\n\t"
- "vextractf64x2 $tmp3,$src2, 0x2\n\t"
- "vmulsd $tmp2,$tmp2,$tmp3\n\t"
- "pshufd $tmp,$tmp3,0xE\n\t"
- "vmulsd $tmp2,$tmp2,$tmp\n\t"
- "vextractf64x2 $tmp3,$src2, 0x3\n\t"
- "vmulsd $tmp2,$tmp2,$tmp3\n\t"
- "pshufd $tmp,$tmp3,0xE\n\t"
- "vmulsd $dst,$tmp2,$tmp\t! mul reduction8D" %}
+ "vmulsd $dst,$dst,$tmp\n\t"
+ "vextractf32x4 $tmp2,$src2, 0x2\n\t"
+ "vmulsd $dst,$dst,$tmp2\n\t"
+ "pshufd $tmp,$tmp2,0xE\n\t"
+ "vmulsd $dst,$dst,$tmp\n\t"
+ "vextractf32x4 $tmp2,$src2, 0x3\n\t"
+ "vmulsd $dst,$dst,$tmp2\n\t"
+ "pshufd $tmp,$tmp2,0xE\n\t"
+ "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %}
ins_encode %{
- __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
- __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
- __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
- __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
- __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
- __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
- __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
- __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
- __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
// ====================VECTOR ARITHMETIC=======================================
// --------------------------------- ADD --------------------------------------
// Bytes vector add
instruct vadd4B(vecS dst, vecS src) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (AddVB dst src));
format %{ "paddb $dst,$src\t! add packed4B" %}
ins_encode %{
__ paddb($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4);
match(Set dst (AddVB src1 src2));
format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %}
ins_encode %{
int vector_len = 0;
__ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (AddVB src (LoadVector mem)));
- format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %}
+instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+ match(Set dst (AddVB src1 src2));
+ format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %}
ins_encode %{
int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8B(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 8);
- match(Set dst (AddVB dst src));
- format %{ "paddb $dst,$src\t! add packed8B" %}
+instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+ match(Set dst (AddVB dst src2));
+ effect(TEMP src1);
+ format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %}
ins_encode %{
- __ paddb($dst$$XMMRegister, $src$$XMMRegister);
+ int vector_len = 0;
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
- match(Set dst (AddVB src1 src2));
- format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
+instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4);
+ match(Set dst (AddVB src (LoadVector mem)));
+ format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %}
ins_encode %{
int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
match(Set dst (AddVB src (LoadVector mem)));
- format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %}
+ format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %}
ins_encode %{
int vector_len = 0;
__ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd16B(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 16);
+instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+ match(Set dst (AddVB dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8B(vecD dst, vecD src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (AddVB dst src));
- format %{ "paddb $dst,$src\t! add packed16B" %}
+ format %{ "paddb $dst,$src\t! add packed8B" %}
ins_encode %{
__ paddb($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+instruct vadd8B_reg_avx(vecD dst, vecD src1, vecS src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8);
match(Set dst (AddVB src1 src2));
- format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
+ format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
ins_encode %{
int vector_len = 0;
__ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
- match(Set dst (AddVB src (LoadVector mem)));
- format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
+instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+ match(Set dst (AddVB src1 src2));
+ format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
ins_encode %{
int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
- match(Set dst (AddVB src1 src2));
- format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
+instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+ match(Set dst (AddVB dst src2));
+ effect(TEMP src1);
+ format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %}
ins_encode %{
- int vector_len = 1;
+ int vector_len = 0;
__ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8);
match(Set dst (AddVB src (LoadVector mem)));
- format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
+ format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %}
ins_encode %{
- int vector_len = 1;
+ int vector_len = 0;
__ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
- match(Set dst (AddVB src1 src2));
- format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %}
+instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+ match(Set dst (AddVB src (LoadVector mem)));
+ format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %}
ins_encode %{
- int vector_len = 2;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vector_len = 0;
+ __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
- match(Set dst (AddVB src (LoadVector mem)));
- format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %}
+instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+ match(Set dst (AddVB dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %}
ins_encode %{
- int vector_len = 2;
+ int vector_len = 0;
__ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-// Shorts/Chars vector add
-instruct vadd2S(vecS dst, vecS src) %{
- predicate(n->as_Vector()->length() == 2);
- match(Set dst (AddVS dst src));
- format %{ "paddw $dst,$src\t! add packed2S" %}
+instruct vadd16B(vecX dst, vecX src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 16);
+ match(Set dst (AddVB dst src));
+ format %{ "paddb $dst,$src\t! add packed16B" %}
ins_encode %{
- __ paddw($dst$$XMMRegister, $src$$XMMRegister);
+ __ paddb($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (AddVS src1 src2));
- format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
+instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+ match(Set dst (AddVB src1 src2));
+ format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
ins_encode %{
int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (AddVS src (LoadVector mem)));
- format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %}
+instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+ match(Set dst (AddVB src1 src2));
+ format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
ins_encode %{
int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4S(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 4);
- match(Set dst (AddVS dst src));
- format %{ "paddw $dst,$src\t! add packed4S" %}
+instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+ match(Set dst (AddVB dst src2));
+ effect(TEMP src1);
+ format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %}
ins_encode %{
- __ paddw($dst$$XMMRegister, $src$$XMMRegister);
+ int vector_len = 0;
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (AddVS src1 src2));
- format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
+instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+ match(Set dst (AddVB src (LoadVector mem)));
+ format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
ins_encode %{
int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (AddVS src (LoadVector mem)));
- format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %}
+instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+ match(Set dst (AddVB src (LoadVector mem)));
+ format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
ins_encode %{
int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8S(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 8);
- match(Set dst (AddVS dst src));
- format %{ "paddw $dst,$src\t! add packed8S" %}
+instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+ match(Set dst (AddVB dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
ins_encode %{
- __ paddw($dst$$XMMRegister, $src$$XMMRegister);
+ int vector_len = 0;
+ __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
- match(Set dst (AddVS src1 src2));
- format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
+instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
+ match(Set dst (AddVB src1 src2));
+ format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vector_len = 1;
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
- match(Set dst (AddVS src (LoadVector mem)));
- format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
+instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ match(Set dst (AddVB src1 src2));
+ format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vector_len = 1;
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
- match(Set dst (AddVS src1 src2));
- format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
+instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
+ match(Set dst (AddVB dst src2));
+ effect(TEMP src1);
+ format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %}
ins_encode %{
int vector_len = 1;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
- match(Set dst (AddVS src (LoadVector mem)));
- format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
+instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
+ match(Set dst (AddVB src (LoadVector mem)));
+ format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
ins_encode %{
int vector_len = 1;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
- match(Set dst (AddVS src1 src2));
- format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %}
+instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ match(Set dst (AddVB src (LoadVector mem)));
+ format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
ins_encode %{
- int vector_len = 2;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vector_len = 1;
+ __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
- match(Set dst (AddVS src (LoadVector mem)));
- format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %}
+instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ match(Set dst (AddVB dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
+ match(Set dst (AddVB src1 src2));
+ format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %}
ins_encode %{
int vector_len = 2;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-// Integers vector add
-instruct vadd2I(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 2);
- match(Set dst (AddVI dst src));
- format %{ "paddd $dst,$src\t! add packed2I" %}
+instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
+ match(Set dst (AddVB src (LoadVector mem)));
+ format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %}
ins_encode %{
- __ paddd($dst$$XMMRegister, $src$$XMMRegister);
+ int vector_len = 2;
+ __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (AddVI src1 src2));
- format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %}
+// Shorts/Chars vector add
+instruct vadd2S(vecS dst, vecS src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
+ match(Set dst (AddVS dst src));
+ format %{ "paddw $dst,$src\t! add packed2S" %}
ins_encode %{
- int vector_len = 0;
- __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ paddw($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (AddVI src (LoadVector mem)));
- format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %}
+instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2);
+ match(Set dst (AddVS src1 src2));
+ format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
ins_encode %{
int vector_len = 0;
- __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4I(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 4);
- match(Set dst (AddVI dst src));
- format %{ "paddd $dst,$src\t! add packed4I" %}
+instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+ match(Set dst (AddVS src1 src2));
+ format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
ins_encode %{
- __ paddd($dst$$XMMRegister, $src$$XMMRegister);
+ int vector_len = 0;
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (AddVI src1 src2));
- format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %}
+instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+ match(Set dst (AddVS dst src2));
+ effect(TEMP src1);
+ format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %}
ins_encode %{
int vector_len = 0;
- __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (AddVI src (LoadVector mem)));
- format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %}
+instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2);
+ match(Set dst (AddVS src (LoadVector mem)));
+ format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %}
ins_encode %{
int vector_len = 0;
- __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
- match(Set dst (AddVI src1 src2));
- format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %}
+instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+ match(Set dst (AddVS src (LoadVector mem)));
+ format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %}
ins_encode %{
- int vector_len = 1;
- __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vector_len = 0;
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
- match(Set dst (AddVI src (LoadVector mem)));
- format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %}
+instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+ match(Set dst (AddVS dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %}
ins_encode %{
- int vector_len = 1;
- __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vector_len = 0;
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
- match(Set dst (AddVI src1 src2));
- format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %}
+instruct vadd4S(vecD dst, vecD src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
+ match(Set dst (AddVS dst src));
+ format %{ "paddw $dst,$src\t! add packed4S" %}
ins_encode %{
- int vector_len = 2;
- __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ paddw($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
- match(Set dst (AddVI src (LoadVector mem)));
- format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %}
+instruct vadd4S_reg_avx(vecD dst, vecD src1, vecS src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4);
+ match(Set dst (AddVS src1 src2));
+ format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
ins_encode %{
- int vector_len = 2;
- __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vector_len = 0;
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-// Longs vector add
-instruct vadd2L(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 2);
- match(Set dst (AddVL dst src));
- format %{ "paddq $dst,$src\t! add packed2L" %}
+instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+ match(Set dst (AddVS src1 src2));
+ format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
ins_encode %{
- __ paddq($dst$$XMMRegister, $src$$XMMRegister);
+ int vector_len = 0;
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (AddVL src1 src2));
- format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %}
+instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+ match(Set dst (AddVS dst src2));
+ effect(TEMP src1);
+ format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %}
ins_encode %{
int vector_len = 0;
- __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (AddVL src (LoadVector mem)));
- format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %}
+instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4);
+ match(Set dst (AddVS src (LoadVector mem)));
+ format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %}
ins_encode %{
int vector_len = 0;
- __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
- match(Set dst (AddVL src1 src2));
- format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %}
+instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+ match(Set dst (AddVS src (LoadVector mem)));
+ format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %}
ins_encode %{
- int vector_len = 1;
- __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vector_len = 0;
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
- match(Set dst (AddVL src (LoadVector mem)));
- format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %}
+instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+ match(Set dst (AddVS dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %}
ins_encode %{
- int vector_len = 1;
- __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vector_len = 0;
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
- match(Set dst (AddVL src1 src2));
- format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %}
+instruct vadd8S(vecX dst, vecX src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVS dst src));
+ format %{ "paddw $dst,$src\t! add packed8S" %}
ins_encode %{
- int vector_len = 2;
- __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ paddw($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
- match(Set dst (AddVL src (LoadVector mem)));
- format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %}
+instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8);
+ match(Set dst (AddVS src1 src2));
+ format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
ins_encode %{
- int vector_len = 2;
- __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vector_len = 0;
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-// Floats vector add
-instruct vadd2F(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 2);
- match(Set dst (AddVF dst src));
- format %{ "addps $dst,$src\t! add packed2F" %}
+instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+ match(Set dst (AddVS src1 src2));
+ format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
ins_encode %{
- __ addps($dst$$XMMRegister, $src$$XMMRegister);
+ int vector_len = 0;
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (AddVF src1 src2));
- format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %}
+instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+ match(Set dst (AddVS dst src2));
+ effect(TEMP src1);
+ format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %}
ins_encode %{
int vector_len = 0;
- __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (AddVF src (LoadVector mem)));
- format %{ "vaddps $dst,$src,$mem\t! add packed2F" %}
+instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8);
+ match(Set dst (AddVS src (LoadVector mem)));
+ format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
ins_encode %{
int vector_len = 0;
- __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4F(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 4);
- match(Set dst (AddVF dst src));
- format %{ "addps $dst,$src\t! add packed4F" %}
+instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+ match(Set dst (AddVS src (LoadVector mem)));
+ format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
ins_encode %{
- __ addps($dst$$XMMRegister, $src$$XMMRegister);
+ int vector_len = 0;
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (AddVF src1 src2));
- format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %}
+instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+ match(Set dst (AddVS dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
ins_encode %{
int vector_len = 0;
- __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (AddVF src (LoadVector mem)));
- format %{ "vaddps $dst,$src,$mem\t! add packed4F" %}
+instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+ match(Set dst (AddVS src1 src2));
+ format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
ins_encode %{
- int vector_len = 0;
- __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vector_len = 1;
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
- match(Set dst (AddVF src1 src2));
- format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %}
+instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+ match(Set dst (AddVS src1 src2));
+ format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
ins_encode %{
int vector_len = 1;
- __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
- match(Set dst (AddVF src (LoadVector mem)));
- format %{ "vaddps $dst,$src,$mem\t! add packed8F" %}
+instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+ match(Set dst (AddVS dst src2));
+ effect(TEMP src1);
+ format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %}
ins_encode %{
int vector_len = 1;
- __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
- match(Set dst (AddVF src1 src2));
- format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %}
+instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+ match(Set dst (AddVS src (LoadVector mem)));
+ format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+ match(Set dst (AddVS src (LoadVector mem)));
+ format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+ match(Set dst (AddVS dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ match(Set dst (AddVS src1 src2));
+ format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %}
ins_encode %{
int vector_len = 2;
- __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
- match(Set dst (AddVF src (LoadVector mem)));
- format %{ "vaddps $dst,$src,$mem\t! add packed16F" %}
+instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ match(Set dst (AddVS src (LoadVector mem)));
+ format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %}
ins_encode %{
int vector_len = 2;
- __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-// Doubles vector add
-instruct vadd2D(vecX dst, vecX src) %{
+// Integers vector add
+instruct vadd2I(vecD dst, vecD src) %{
predicate(n->as_Vector()->length() == 2);
- match(Set dst (AddVD dst src));
- format %{ "addpd $dst,$src\t! add packed2D" %}
+ match(Set dst (AddVI dst src));
+ format %{ "paddd $dst,$src\t! add packed2I" %}
ins_encode %{
- __ addpd($dst$$XMMRegister, $src$$XMMRegister);
+ __ paddd($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
+instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (AddVD src1 src2));
- format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %}
+ match(Set dst (AddVI src1 src2));
+ format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %}
ins_encode %{
int vector_len = 0;
- __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
+instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{
predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (AddVD src (LoadVector mem)));
- format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %}
+ match(Set dst (AddVI src (LoadVector mem)));
+ format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %}
ins_encode %{
int vector_len = 0;
- __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
+instruct vadd4I(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (AddVI dst src));
+ format %{ "paddd $dst,$src\t! add packed4I" %}
+ ins_encode %{
+ __ paddd($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (AddVD src1 src2));
- format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %}
+ match(Set dst (AddVI src1 src2));
+ format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %}
ins_encode %{
- int vector_len = 1;
- __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vector_len = 0;
+ __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
+instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (AddVD src (LoadVector mem)));
- format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %}
+ match(Set dst (AddVI src (LoadVector mem)));
+ format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVI src1 src2));
+ format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %}
ins_encode %{
int vector_len = 1;
- __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
- match(Set dst (AddVD src1 src2));
- format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %}
+instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVI src (LoadVector mem)));
+ format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %}
ins_encode %{
- int vector_len = 2;
- __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vector_len = 1;
+ __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
- match(Set dst (AddVD src (LoadVector mem)));
- format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %}
+instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (AddVI src1 src2));
+ format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %}
ins_encode %{
int vector_len = 2;
- __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-// --------------------------------- SUB --------------------------------------
+instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (AddVI src (LoadVector mem)));
+ format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
-// Bytes vector sub
-instruct vsub4B(vecS dst, vecS src) %{
- predicate(n->as_Vector()->length() == 4);
- match(Set dst (SubVB dst src));
- format %{ "psubb $dst,$src\t! sub packed4B" %}
+// Longs vector add
+instruct vadd2L(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVL dst src));
+ format %{ "paddq $dst,$src\t! add packed2L" %}
ins_encode %{
- __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+ __ paddq($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (SubVB src1 src2));
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
+instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (AddVL src1 src2));
+ format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %}
ins_encode %{
int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (SubVB src (LoadVector mem)));
- format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %}
+instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (AddVL src (LoadVector mem)));
+ format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %}
ins_encode %{
int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub8B(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 8);
- match(Set dst (SubVB dst src));
- format %{ "psubb $dst,$src\t! sub packed8B" %}
+instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+ match(Set dst (AddVL src1 src2));
+ format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %}
ins_encode %{
- __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+ int vector_len = 1;
+ __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
- match(Set dst (SubVB src1 src2));
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
+instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+ match(Set dst (AddVL src (LoadVector mem)));
+ format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %}
ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vector_len = 1;
+ __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
- match(Set dst (SubVB src (LoadVector mem)));
- format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %}
+instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVL src1 src2));
+ format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %}
ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vector_len = 2;
+ __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub16B(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 16);
- match(Set dst (SubVB dst src));
- format %{ "psubb $dst,$src\t! sub packed16B" %}
+instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVL src (LoadVector mem)));
+ format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Floats vector add
+instruct vadd2F(vecD dst, vecD src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVF dst src));
+ format %{ "addps $dst,$src\t! add packed2F" %}
+ ins_encode %{
+ __ addps($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (AddVF src1 src2));
+ format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (AddVF src (LoadVector mem)));
+ format %{ "vaddps $dst,$src,$mem\t! add packed2F" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4F(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (AddVF dst src));
+ format %{ "addps $dst,$src\t! add packed4F" %}
+ ins_encode %{
+ __ addps($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (AddVF src1 src2));
+ format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (AddVF src (LoadVector mem)));
+ format %{ "vaddps $dst,$src,$mem\t! add packed4F" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVF src1 src2));
+ format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVF src (LoadVector mem)));
+ format %{ "vaddps $dst,$src,$mem\t! add packed8F" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (AddVF src1 src2));
+ format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (AddVF src (LoadVector mem)));
+ format %{ "vaddps $dst,$src,$mem\t! add packed16F" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Doubles vector add
+instruct vadd2D(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVD dst src));
+ format %{ "addpd $dst,$src\t! add packed2D" %}
+ ins_encode %{
+ __ addpd($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (AddVD src1 src2));
+ format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (AddVD src (LoadVector mem)));
+ format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (AddVD src1 src2));
+ format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (AddVD src (LoadVector mem)));
+ format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVD src1 src2));
+ format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVD src (LoadVector mem)));
+ format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- SUB --------------------------------------
+
+// Bytes vector sub
+instruct vsub4B(vecS dst, vecS src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
+ match(Set dst (SubVB dst src));
+ format %{ "psubb $dst,$src\t! sub packed4B" %}
+ ins_encode %{
+ __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4);
+ match(Set dst (SubVB src1 src2));
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+ match(Set dst (SubVB src1 src2));
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+ match(Set dst (SubVB dst src2));
+ effect(TEMP src1);
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4);
+ match(Set dst (SubVB src (LoadVector mem)));
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+ match(Set dst (SubVB src (LoadVector mem)));
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+ match(Set dst (SubVB dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B(vecD dst, vecD src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
+ match(Set dst (SubVB dst src));
+ format %{ "psubb $dst,$src\t! sub packed8B" %}
+ ins_encode %{
+ __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8);
+ match(Set dst (SubVB src1 src2));
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+ match(Set dst (SubVB src1 src2));
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+ match(Set dst (SubVB dst src2));
+ effect(TEMP src1);
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8);
+ match(Set dst (SubVB src (LoadVector mem)));
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+ match(Set dst (SubVB src (LoadVector mem)));
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+ match(Set dst (SubVB dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B(vecX dst, vecX src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 16);
+ match(Set dst (SubVB dst src));
+ format %{ "psubb $dst,$src\t! sub packed16B" %}
+ ins_encode %{
+ __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+ match(Set dst (SubVB src1 src2));
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+ match(Set dst (SubVB src1 src2));
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+ match(Set dst (SubVB dst src2));
+ effect(TEMP src1);
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+ match(Set dst (SubVB src (LoadVector mem)));
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+ match(Set dst (SubVB src (LoadVector mem)));
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+ match(Set dst (SubVB dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
+ match(Set dst (SubVB src1 src2));
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ match(Set dst (SubVB src1 src2));
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
+ match(Set dst (SubVB dst src2));
+ effect(TEMP src1);
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
+ match(Set dst (SubVB src (LoadVector mem)));
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ match(Set dst (SubVB src (LoadVector mem)));
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
+ match(Set dst (SubVB dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
+ match(Set dst (SubVB src1 src2));
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
+ match(Set dst (SubVB src (LoadVector mem)));
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Shorts/Chars vector sub
+instruct vsub2S(vecS dst, vecS src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
+ match(Set dst (SubVS dst src));
+ format %{ "psubw $dst,$src\t! sub packed2S" %}
+ ins_encode %{
+ __ psubw($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2);
+ match(Set dst (SubVS src1 src2));
+ format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+ match(Set dst (SubVS src1 src2));
+ format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+ match(Set dst (SubVS dst src2));
+ effect(TEMP src1);
+ format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2);
+ match(Set dst (SubVS src (LoadVector mem)));
+ format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+ match(Set dst (SubVS src (LoadVector mem)));
+ format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+ match(Set dst (SubVS dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4S(vecD dst, vecD src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
+ match(Set dst (SubVS dst src));
+ format %{ "psubw $dst,$src\t! sub packed4S" %}
ins_encode %{
- __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+ __ psubw($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
- match(Set dst (SubVB src1 src2));
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
+instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4);
+ match(Set dst (SubVS src1 src2));
+ format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
ins_encode %{
int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
- match(Set dst (SubVB src (LoadVector mem)));
- format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
+instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+ match(Set dst (SubVS src1 src2));
+ format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
ins_encode %{
int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
- match(Set dst (SubVB src1 src2));
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
+instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+ match(Set dst (SubVS dst src2));
+ effect(TEMP src1);
+ format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
ins_encode %{
- int vector_len = 1;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vector_len = 0;
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
- match(Set dst (SubVB src (LoadVector mem)));
- format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
+instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4);
+ match(Set dst (SubVS src (LoadVector mem)));
+ format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %}
ins_encode %{
- int vector_len = 1;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vector_len = 0;
+ __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
- match(Set dst (SubVB src1 src2));
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %}
+instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+ match(Set dst (SubVS src (LoadVector mem)));
+ format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %}
ins_encode %{
- int vector_len = 2;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vector_len = 0;
+ __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
- match(Set dst (SubVB src (LoadVector mem)));
- format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %}
+instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+ match(Set dst (SubVS dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %}
ins_encode %{
- int vector_len = 2;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vector_len = 0;
+ __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-// Shorts/Chars vector sub
-instruct vsub2S(vecS dst, vecS src) %{
- predicate(n->as_Vector()->length() == 2);
+instruct vsub8S(vecX dst, vecX src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (SubVS dst src));
- format %{ "psubw $dst,$src\t! sub packed2S" %}
+ format %{ "psubw $dst,$src\t! sub packed8S" %}
ins_encode %{
__ psubw($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8);
match(Set dst (SubVS src1 src2));
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
+ format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
ins_encode %{
int vector_len = 0;
__ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (SubVS src (LoadVector mem)));
- format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %}
+instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+ match(Set dst (SubVS src1 src2));
+ format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
ins_encode %{
int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub4S(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 4);
- match(Set dst (SubVS dst src));
- format %{ "psubw $dst,$src\t! sub packed4S" %}
+instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+ match(Set dst (SubVS dst src2));
+ effect(TEMP src1);
+ format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
ins_encode %{
- __ psubw($dst$$XMMRegister, $src$$XMMRegister);
+ int vector_len = 0;
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (SubVS src1 src2));
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
+instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8);
+ match(Set dst (SubVS src (LoadVector mem)));
+ format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
ins_encode %{
int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
match(Set dst (SubVS src (LoadVector mem)));
- format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %}
+ format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
ins_encode %{
int vector_len = 0;
__ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub8S(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 8);
- match(Set dst (SubVS dst src));
- format %{ "psubw $dst,$src\t! sub packed8S" %}
+instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+ match(Set dst (SubVS dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
ins_encode %{
- __ psubw($dst$$XMMRegister, $src$$XMMRegister);
+ int vector_len = 0;
+ __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
match(Set dst (SubVS src1 src2));
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
+ format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
ins_encode %{
- int vector_len = 0;
+ int vector_len = 1;
__ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
- match(Set dst (SubVS src (LoadVector mem)));
- format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
+instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+ match(Set dst (SubVS src1 src2));
+ format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vector_len = 1;
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
- match(Set dst (SubVS src1 src2));
+instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+ match(Set dst (SubVS dst src2));
+ effect(TEMP src1);
format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+ match(Set dst (SubVS src (LoadVector mem)));
+ format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
match(Set dst (SubVS src (LoadVector mem)));
format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
+instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+ match(Set dst (SubVS dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (SubVS src (LoadVector mem)));
format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
@@ -6881,177 +7718,361 @@
__ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (SubVD src (LoadVector mem)));
- format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %}
+instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (SubVD src (LoadVector mem)));
+ format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (SubVD src1 src2));
+ format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (SubVD src (LoadVector mem)));
+ format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- MUL --------------------------------------
+
+// Shorts/Chars vector mul
+instruct vmul2S(vecS dst, vecS src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
+ match(Set dst (MulVS dst src));
+ format %{ "pmullw $dst,$src\t! mul packed2S" %}
+ ins_encode %{
+ __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2);
+ match(Set dst (MulVS src1 src2));
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+ match(Set dst (MulVS src1 src2));
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+ match(Set dst (MulVS dst src2));
+ effect(TEMP src1);
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2);
+ match(Set dst (MulVS src (LoadVector mem)));
+ format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+ match(Set dst (MulVS src (LoadVector mem)));
+ format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+ match(Set dst (MulVS dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S(vecD dst, vecD src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
+ match(Set dst (MulVS dst src));
+ format %{ "pmullw $dst,$src\t! mul packed4S" %}
+ ins_encode %{
+ __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4);
+ match(Set dst (MulVS src1 src2));
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+ match(Set dst (MulVS src1 src2));
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+ match(Set dst (MulVS dst src2));
+ effect(TEMP src1);
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4);
+ match(Set dst (MulVS src (LoadVector mem)));
+ format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
ins_encode %{
- int vector_len = 1;
- __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
- match(Set dst (SubVD src1 src2));
- format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %}
+instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+ match(Set dst (MulVS src (LoadVector mem)));
+ format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
ins_encode %{
- int vector_len = 2;
- __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
- match(Set dst (SubVD src (LoadVector mem)));
- format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %}
+instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+ match(Set dst (MulVS dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
ins_encode %{
- int vector_len = 2;
- __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-// --------------------------------- MUL --------------------------------------
-
-// Shorts/Chars vector mul
-instruct vmul2S(vecS dst, vecS src) %{
- predicate(n->as_Vector()->length() == 2);
+instruct vmul8S(vecX dst, vecX src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (MulVS dst src));
- format %{ "pmullw $dst,$src\t! mul packed2S" %}
+ format %{ "pmullw $dst,$src\t! mul packed8S" %}
ins_encode %{
__ pmullw($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8);
match(Set dst (MulVS src1 src2));
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
ins_encode %{
int vector_len = 0;
__ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (MulVS src (LoadVector mem)));
- format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
+instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+ match(Set dst (MulVS src1 src2));
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
ins_encode %{
int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul4S(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 4);
- match(Set dst (MulVS dst src));
- format %{ "pmullw $dst,$src\t! mul packed4S" %}
+instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+ match(Set dst (MulVS dst src2));
+ effect(TEMP src1);
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
ins_encode %{
- __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (MulVS src1 src2));
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
+instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8);
+ match(Set dst (MulVS src (LoadVector mem)));
+ format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
ins_encode %{
int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
match(Set dst (MulVS src (LoadVector mem)));
- format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
+ format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
ins_encode %{
int vector_len = 0;
__ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul8S(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 8);
- match(Set dst (MulVS dst src));
- format %{ "pmullw $dst,$src\t! mul packed8S" %}
+instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+ match(Set dst (MulVS dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
ins_encode %{
- __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
match(Set dst (MulVS src1 src2));
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
ins_encode %{
- int vector_len = 0;
+ int vector_len = 1;
__ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
- match(Set dst (MulVS src (LoadVector mem)));
- format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
+instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+ match(Set dst (MulVS src1 src2));
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vector_len = 1;
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
- match(Set dst (MulVS src1 src2));
+instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+ match(Set dst (MulVS dst src2));
+ effect(TEMP src1);
format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+ match(Set dst (MulVS src (LoadVector mem)));
+ format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
match(Set dst (MulVS src (LoadVector mem)));
format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
+instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+ match(Set dst (MulVS dst (LoadVector mem)));
+ effect(TEMP src);
+ format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (MulVS src1 src2));
format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (MulVS src (LoadVector mem)));
format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
@@ -7677,202 +8698,386 @@
instruct vsqrt4D_mem(vecY dst, memory mem) %{
predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (SqrtVD (LoadVector mem)));
format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %}
ins_encode %{
- int vector_len = 1;
- __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
+ int vector_len = 1;
+ __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsqrt8D_reg(vecZ dst, vecZ src) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (SqrtVD src));
+ format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsqrt8D_mem(vecZ dst, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (SqrtVD (LoadVector mem)));
+ format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// ------------------------------ LeftShift -----------------------------------
+
+// Shorts/Chars vector left shift
+instruct vsll2S(vecS dst, vecS shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVS dst shift));
+ format %{ "psllw $dst,$shift\t! left shift packed2S" %}
+ ins_encode %{
+ __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_imm(vecS dst, immI8 shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVS dst shift));
+ format %{ "psllw $dst,$shift\t! left shift packed2S" %}
+ ins_encode %{
+ __ psllw($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S(vecD dst, vecS shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVS dst shift));
+ format %{ "psllw $dst,$shift\t! left shift packed4S" %}
+ ins_encode %{
+ __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_imm(vecD dst, immI8 shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVS dst shift));
+ format %{ "psllw $dst,$shift\t! left shift packed4S" %}
+ ins_encode %{
+ __ psllw($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsqrt8D_reg(vecZ dst, vecZ src) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
- match(Set dst (SqrtVD src));
- format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %}
+instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
ins_encode %{
- int vector_len = 2;
- __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsqrt8D_mem(vecZ dst, memory mem) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
- match(Set dst (SqrtVD (LoadVector mem)));
- format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %}
+instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
ins_encode %{
- int vector_len = 2;
- __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-// ------------------------------ LeftShift -----------------------------------
-
-// Shorts/Chars vector left shift
-instruct vsll2S(vecS dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 2);
+instruct vsll8S(vecX dst, vecS shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed2S" %}
+ format %{ "psllw $dst,$shift\t! left shift packed8S" %}
ins_encode %{
__ psllw($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll2S_imm(vecS dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 2);
+instruct vsll8S_imm(vecX dst, immI8 shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed2S" %}
+ format %{ "psllw $dst,$shift\t! left shift packed8S" %}
ins_encode %{
__ psllw($dst$$XMMRegister, (int)$shift$$constant);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8);
match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
ins_encode %{
int vector_len = 0;
__ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
ins_encode %{
int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll4S(vecD dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 4);
+instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed4S" %}
+ effect(TEMP src);
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
ins_encode %{
- __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll4S_imm(vecD dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed4S" %}
+instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
ins_encode %{
- __ psllw($dst$$XMMRegister, (int)$shift$$constant);
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
ins_encode %{
int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
+instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
ins_encode %{
int vector_len = 0;
__ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll8S(vecX dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 8);
- match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed8S" %}
+instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
ins_encode %{
- __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
+ int vector_len = 1;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll8S_imm(vecX dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 8);
- match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed8S" %}
+instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
ins_encode %{
- __ psllw($dst$$XMMRegister, (int)$shift$$constant);
+ int vector_len = 1;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
+instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+ match(Set dst (LShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
ins_encode %{
- int vector_len = 0;
+ int vector_len = 1;
__ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
ins_encode %{
- int vector_len = 0;
+ int vector_len = 1;
__ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
ins_encode %{
int vector_len = 1;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
- match(Set dst (LShiftVS src shift));
+instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+ match(Set dst (LShiftVS dst shift));
+ effect(TEMP src);
format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
@@ -8077,195 +9282,379 @@
instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{
predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
match(Set dst (LShiftVL src shift));
format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %}
ins_encode %{
- int vector_len = 2;
- __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ int vector_len = 2;
+ __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVL src shift));
+ format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// ----------------------- LogicalRightShift -----------------------------------
+
+// Shorts vector logical right shift produces incorrect Java result
+// for negative data because java code convert short value into int with
+// sign extension before a shift. But char vectors are fine since chars are
+// unsigned values.
+
+instruct vsrl2S(vecS dst, vecS shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
+ ins_encode %{
+ __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_imm(vecS dst, immI8 shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
+ ins_encode %{
+ __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S(vecD dst, vecS shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
+ ins_encode %{
+ __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_imm(vecD dst, immI8 shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
+ ins_encode %{
+ __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
- match(Set dst (LShiftVL src shift));
- format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %}
+instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
ins_encode %{
- int vector_len = 2;
- __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-// ----------------------- LogicalRightShift -----------------------------------
-
-// Shorts vector logical right shift produces incorrect Java result
-// for negative data because java code convert short value into int with
-// sign extension before a shift. But char vectors are fine since chars are
-// unsigned values.
-
-instruct vsrl2S(vecS dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 2);
+instruct vsrl8S(vecX dst, vecS shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
+ format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
ins_encode %{
__ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl2S_imm(vecS dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 2);
+instruct vsrl8S_imm(vecX dst, immI8 shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
+ format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
ins_encode %{
__ psrlw($dst$$XMMRegister, (int)$shift$$constant);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8);
match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
ins_encode %{
int vector_len = 0;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
ins_encode %{
int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl4S(vecD dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 4);
+instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
+ effect(TEMP src);
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
ins_encode %{
- __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl4S_imm(vecD dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 4);
- match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
+instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
ins_encode %{
- __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
ins_encode %{
int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
+instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
ins_encode %{
int vector_len = 0;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl8S(vecX dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 8);
- match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
+instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
ins_encode %{
- __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
+ int vector_len = 1;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl8S_imm(vecX dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 8);
- match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
+instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
ins_encode %{
- __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
+ int vector_len = 1;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
+instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+ match(Set dst (URShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
ins_encode %{
- int vector_len = 0;
+ int vector_len = 1;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
ins_encode %{
- int vector_len = 0;
+ int vector_len = 1;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
ins_encode %{
int vector_len = 1;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
- match(Set dst (URShiftVS src shift));
+instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+ match(Set dst (URShiftVS dst shift));
+ effect(TEMP src);
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
@@ -8491,11 +9880,11 @@
// ------------------- ArithmeticRightShift -----------------------------------
// Shorts/Chars vector arithmetic right shift
instruct vsra2S(vecS dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (RShiftVS dst shift));
format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
ins_encode %{
__ psraw($dst$$XMMRegister, $shift$$XMMRegister);
%}
@@ -8510,151 +9899,335 @@
__ psraw($dst$$XMMRegister, (int)$shift$$constant);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
ins_encode %{
int vector_len = 0;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
ins_encode %{
int vector_len = 0;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
+instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct vsra4S(vecD dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (RShiftVS dst shift));
format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
ins_encode %{
__ psraw($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vsra4S_imm(vecD dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (RShiftVS dst shift));
format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
ins_encode %{
__ psraw($dst$$XMMRegister, (int)$shift$$constant);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
ins_encode %{
int vector_len = 0;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+ match(Set dst (RShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
ins_encode %{
int vector_len = 0;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
+instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+ match(Set dst (RShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct vsra8S(vecX dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 8);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (RShiftVS dst shift));
format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
ins_encode %{
__ psraw($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vsra8S_imm(vecX dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 8);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (RShiftVS dst shift));
format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
ins_encode %{
__ psraw($dst$$XMMRegister, (int)$shift$$constant);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
ins_encode %{
int vector_len = 0;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+ match(Set dst (RShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
ins_encode %{
int vector_len = 0;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+ match(Set dst (RShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+ match(Set dst (RShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
+ predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
+instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
+ predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+ match(Set dst (RShiftVS dst shift));
+ effect(TEMP src);
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
< prev index next >