< prev index next >
src/hotspot/cpu/x86/x86.ad
Print this page
@@ -727,10 +727,11 @@
XMM31
#endif
);
reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
// Class for pre evex double registers
reg_class double_reg_legacy(XMM0, XMM0b,
XMM1, XMM1b,
XMM2, XMM2b,
@@ -787,10 +788,11 @@
XMM31, XMM31b
#endif
);
reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
// Class for pre evex 32bit vector registers
reg_class vectors_reg_legacy(XMM0,
XMM1,
XMM2,
@@ -847,10 +849,11 @@
XMM31
#endif
);
reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
// Class for all 64bit vector registers
reg_class vectord_reg_legacy(XMM0, XMM0b,
XMM1, XMM1b,
XMM2, XMM2b,
@@ -907,10 +910,11 @@
XMM31, XMM31b
#endif
);
reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
// Class for all 128bit vector registers
reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
XMM1, XMM1b, XMM1c, XMM1d,
XMM2, XMM2b, XMM2c, XMM2d,
@@ -967,10 +971,11 @@
XMM31, XMM31b, XMM31c, XMM31d
#endif
);
reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
// Class for all 256bit vector registers
reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
@@ -1027,13 +1032,14 @@
XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h
#endif
);
reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
// Class for all 512bit vector registers
-reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
+reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
@@ -1065,10 +1071,34 @@
XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
#endif
);
+// Class for restricted 512bit vector registers
+reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
+ XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
+ XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
+ XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
+ XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
+ XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
+ XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
+ XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p
+#ifdef _LP64
+ ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
+ XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
+ XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
+ XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
+ XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
+ XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
+ XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
+ XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
+#endif
+ );
+
+reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
+
reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h);
reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p);
reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d);
@@ -1485,10 +1515,12 @@
// AVX2/EVEX supports 512bit vectors for all types.
int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
// AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
size = (UseAVX > 2) ? 64 : 32;
+ if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
+ size = (VM_Version::supports_avx512bw()) ? 64 : 32;
// Use flag to limit vector size.
size = MIN2(size,(int)MaxVectorSize);
// Minimum 2 values in vector (or 4 for bytes).
switch (bt) {
case T_DOUBLE:
@@ -1646,14 +1678,32 @@
int offset = __ offset();
switch (ireg) {
case Op_VecS: // copy whole register
case Op_VecD:
case Op_VecX:
+#ifndef LP64
__ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+#else
+ if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
+ __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+ } else {
+ __ vpxor(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), 2);
+ __ vinserti32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
+ }
+#endif
break;
case Op_VecY:
+#ifndef LP64
+ __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+#else
+ if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+ } else {
+ __ vpxor(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), 2);
+ __ vinserti64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
+ }
+#endif
break;
case Op_VecZ:
__ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
break;
default:
@@ -1701,14 +1751,32 @@
break;
case Op_VecD:
__ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
break;
case Op_VecX:
+#ifndef LP64
+ __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+#else
+ if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
__ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+ } else {
+ __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
+ __ vinserti32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
+ }
+#endif
break;
case Op_VecY:
+#ifndef LP64
+ __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+#else
+ if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+ } else {
+ __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
+ __ vinserti64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
+ }
+#endif
break;
case Op_VecZ:
__ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
break;
default:
@@ -1721,14 +1789,32 @@
break;
case Op_VecD:
__ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
break;
case Op_VecX:
+#ifndef LP64
__ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+#else
+ if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
+ __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+ }
+ else {
+ __ vextracti32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
+ }
+#endif
break;
case Op_VecY:
+#ifndef LP64
+ __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+#else
+ if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
__ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+ }
+ else {
+ __ vextracti64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
+ }
+#endif
break;
case Op_VecZ:
__ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
break;
default:
@@ -1906,19 +1992,26 @@
//----------OPERANDS-----------------------------------------------------------
// Operand definitions must precede instruction definitions for correct parsing
// in the ADLC because operands constitute user defined types which are used in
// instruction definitions.
-// This one generically applies only for evex, so only one version
operand vecZ() %{
constraint(ALLOC_IN_RC(vectorz_reg));
match(VecZ);
format %{ %}
interface(REG_INTER);
%}
+operand legVecZ() %{
+ constraint(ALLOC_IN_RC(vectorz_reg_vl));
+ match(VecZ);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
// Comparison Code for FP conditional move
operand cmpOp_vcmppd() %{
match(Bool);
predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
@@ -2545,64 +2638,22 @@
__ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
%}
ins_pipe(pipe_slow);
%}
-instruct absF_reg_reg(regF dst, regF src) %{
- predicate(VM_Version::supports_avxonly());
- match(Set dst (AbsF src));
- ins_cost(150);
- format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
- ins_encode %{
- int vector_len = 0;
- __ vandps($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(float_signmask()), vector_len);
- %}
- ins_pipe(pipe_slow);
-%}
-
-#ifdef _LP64
-instruct absF_reg_reg_evex(regF dst, regF src) %{
- predicate(UseAVX > 2 && VM_Version::supports_avx512vl());
- match(Set dst (AbsF src));
- ins_cost(150);
- format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
- ins_encode %{
- int vector_len = 0;
- __ vandps($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(float_signmask()), vector_len);
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{
- predicate(VM_Version::supports_avx512novl());
- match(Set dst (AbsF src1));
- effect(TEMP src2);
- ins_cost(150);
- format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %}
- ins_encode %{
- int vector_len = 0;
- __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
- ExternalAddress(float_signmask()), vector_len);
- %}
- ins_pipe(pipe_slow);
-%}
-#else // _LP64
-instruct absF_reg_reg_evex(regF dst, regF src) %{
- predicate(UseAVX > 2);
+instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
+ predicate(UseAVX > 0);
match(Set dst (AbsF src));
ins_cost(150);
format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
ins_encode %{
int vector_len = 0;
__ vandps($dst$$XMMRegister, $src$$XMMRegister,
ExternalAddress(float_signmask()), vector_len);
%}
ins_pipe(pipe_slow);
%}
-#endif
instruct absD_reg(regD dst) %{
predicate((UseSSE>=2) && (UseAVX == 0));
match(Set dst (AbsD dst));
ins_cost(150);
@@ -2612,55 +2663,12 @@
__ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
%}
ins_pipe(pipe_slow);
%}
-instruct absD_reg_reg(regD dst, regD src) %{
- predicate(VM_Version::supports_avxonly());
- match(Set dst (AbsD src));
- ins_cost(150);
- format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
- "# abs double by sign masking" %}
- ins_encode %{
- int vector_len = 0;
- __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(double_signmask()), vector_len);
- %}
- ins_pipe(pipe_slow);
-%}
-
-#ifdef _LP64
-instruct absD_reg_reg_evex(regD dst, regD src) %{
- predicate(UseAVX > 2 && VM_Version::supports_avx512vl());
- match(Set dst (AbsD src));
- ins_cost(150);
- format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
- "# abs double by sign masking" %}
- ins_encode %{
- int vector_len = 0;
- __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(double_signmask()), vector_len);
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{
- predicate(VM_Version::supports_avx512novl());
- match(Set dst (AbsD src1));
- effect(TEMP src2);
- ins_cost(150);
- format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %}
- ins_encode %{
- int vector_len = 0;
- __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
- ExternalAddress(double_signmask()), vector_len);
- %}
- ins_pipe(pipe_slow);
-%}
-#else // _LP64
-instruct absD_reg_reg_evex(regD dst, regD src) %{
- predicate(UseAVX > 2);
+instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
+ predicate(UseAVX > 0);
match(Set dst (AbsD src));
ins_cost(150);
format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
"# abs double by sign masking" %}
ins_encode %{
@@ -2668,11 +2676,10 @@
__ vandpd($dst$$XMMRegister, $src$$XMMRegister,
ExternalAddress(double_signmask()), vector_len);
%}
ins_pipe(pipe_slow);
%}
-#endif
instruct negF_reg(regF dst) %{
predicate((UseSSE>=1) && (UseAVX == 0));
match(Set dst (NegF dst));
ins_cost(150);
@@ -2681,11 +2688,11 @@
__ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
%}
ins_pipe(pipe_slow);
%}
-instruct negF_reg_reg(regF dst, regF src) %{
+instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
predicate(UseAVX > 0);
match(Set dst (NegF src));
ins_cost(150);
format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
ins_encode %{
@@ -2705,15 +2712,15 @@
__ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
%}
ins_pipe(pipe_slow);
%}
-instruct negD_reg_reg(regD dst, regD src) %{
+instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
predicate(UseAVX > 0);
match(Set dst (NegD src));
ins_cost(150);
- format %{ "vnegatess $dst, $src, [0x8000000000000000]\t"
+ format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
"# neg double by sign flipping" %}
ins_encode %{
__ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
ExternalAddress(double_signflip()));
%}
@@ -2833,10 +2840,11 @@
ins_pipe( pipe_slow );
%}
// ====================VECTOR INSTRUCTIONS=====================================
+
// Load vectors (4 bytes long)
instruct loadV4(vecS dst, memory mem) %{
predicate(n->as_LoadVector()->memory_size() == 4);
match(Set dst (LoadVector mem));
ins_cost(125);
@@ -2845,10 +2853,30 @@
__ movdl($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow );
%}
+// Load vectors (4 bytes long)
+instruct MoveVecS2Leg(legVecS dst, vecS src) %{
+ match(Set dst src);
+ format %{ "movss $dst,$src\t! load vector (4 bytes)" %}
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Load vectors (4 bytes long)
+instruct MoveLeg2VecS(vecS dst, legVecS src) %{
+ match(Set dst src);
+ format %{ "movss $dst,$src\t! load vector (4 bytes)" %}
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
// Load vectors (8 bytes long)
instruct loadV8(vecD dst, memory mem) %{
predicate(n->as_LoadVector()->memory_size() == 8);
match(Set dst (LoadVector mem));
ins_cost(125);
@@ -2857,10 +2885,30 @@
__ movq($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow );
%}
+// Load vectors (8 bytes long)
+instruct MoveVecD2Leg(legVecD dst, vecD src) %{
+ match(Set dst src);
+ format %{ "movsd $dst,$src\t! load vector (8 bytes)" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Load vectors (8 bytes long)
+instruct MoveLeg2VecD(vecD dst, legVecD src) %{
+ match(Set dst src);
+ format %{ "movsd $dst,$src\t! load vector (8 bytes)" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
// Load vectors (16 bytes long)
instruct loadV16(vecX dst, memory mem) %{
predicate(n->as_LoadVector()->memory_size() == 16);
match(Set dst (LoadVector mem));
ins_cost(125);
@@ -2869,10 +2917,40 @@
__ movdqu($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow );
%}
+// Load vectors (16 bytes long)
+instruct MoveVecX2Leg(legVecX dst, vecX src) %{
+ match(Set dst src);
+ format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %}
+ ins_encode %{
+ if (UseAVX < 2 || VM_Version::supports_avx512vl()) {
+ __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
+ } else {
+ int vector_len = 2;
+ __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ }
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Load vectors (16 bytes long)
+instruct MoveLeg2VecX(vecX dst, legVecX src) %{
+ match(Set dst src);
+ format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %}
+ ins_encode %{
+ if (UseAVX < 2 || VM_Version::supports_avx512vl()) {
+ __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
+ } else {
+ int vector_len = 2;
+ __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ }
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
// Load vectors (32 bytes long)
instruct loadV32(vecY dst, memory mem) %{
predicate(n->as_LoadVector()->memory_size() == 32);
match(Set dst (LoadVector mem));
ins_cost(125);
@@ -2881,10 +2959,40 @@
__ vmovdqu($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow );
%}
+// Load vectors (32 bytes long)
+instruct MoveVecY2Leg(legVecY dst, vecY src) %{
+ match(Set dst src);
+ format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %}
+ ins_encode %{
+ if (UseAVX < 2 || VM_Version::supports_avx512vl()) {
+ __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister);
+ } else {
+ int vector_len = 2;
+ __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ }
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Load vectors (32 bytes long)
+instruct MoveLeg2VecY(vecY dst, legVecY src) %{
+ match(Set dst src);
+ format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %}
+ ins_encode %{
+ if (UseAVX < 2 || VM_Version::supports_avx512vl()) {
+ __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister);
+ } else {
+ int vector_len = 2;
+ __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ }
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
// Load vectors (64 bytes long)
instruct loadV64_dword(vecZ dst, memory mem) %{
predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4);
match(Set dst (LoadVector mem));
ins_cost(125);
@@ -2907,10 +3015,30 @@
__ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
+instruct MoveVecZ2Leg(legVecZ dst, vecZ src) %{
+ match(Set dst src);
+ format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct MoveLeg2VecZ(vecZ dst, legVecZ src) %{
+ match(Set dst src);
+ format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
// Store vectors
instruct storeV4(memory mem, vecS src) %{
predicate(n->as_StoreVector()->memory_size() == 4);
match(Set mem (StoreVector mem src));
ins_cost(145);
@@ -3066,10 +3194,48 @@
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
+instruct Repl64B(legVecZ dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw());
+ match(Set dst (ReplicateB src));
+ format %{ "movd $dst,$src\n\t"
+ "punpcklbw $dst,$dst\n\t"
+ "pshuflw $dst,$dst,0x00\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl64B_mem(legVecZ dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw());
+ match(Set dst (ReplicateB (LoadB mem)));
+ format %{ "punpcklbw $dst,$mem\n\t"
+ "pshuflw $dst,$dst,0x00\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %}
+ ins_encode %{
+ __ punpcklbw($dst$$XMMRegister, $mem$$Address);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct Repl16B_imm(vecX dst, immI con) %{
predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB con));
format %{ "movq $dst,[$constantaddress]\n\t"
"punpcklqdq $dst,$dst\t! replicate16B($con)" %}
@@ -3092,10 +3258,26 @@
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
+instruct Repl64B_imm(legVecZ dst, immI con) %{
+ predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw());
+ match(Set dst (ReplicateB con));
+ format %{ "movq $dst,[$constantaddress]\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B($con)" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct Repl4S(vecD dst, rRegI src) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS src));
format %{ "movd $dst,$src\n\t"
"pshuflw $dst,$dst,0x00\t! replicate4S" %}
@@ -3196,26 +3378,76 @@
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct Repl4I(vecX dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
- match(Set dst (ReplicateI src));
+instruct Repl32S(legVecZ dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
+ match(Set dst (ReplicateS src));
format %{ "movd $dst,$src\n\t"
- "pshufd $dst,$dst,0x00\t! replicate4I" %}
+ "pshuflw $dst,$dst,0x00\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %}
ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
- __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
-instruct Repl4I_mem(vecX dst, memory mem) %{
- predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
- match(Set dst (ReplicateI (LoadI mem)));
- format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %}
+instruct Repl32S_mem(legVecZ dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
+ match(Set dst (ReplicateS (LoadS mem)));
+ format %{ "pshuflw $dst,$mem,0x00\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %}
+ ins_encode %{
+ __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl32S_imm(legVecZ dst, immI con) %{
+ predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
+ match(Set dst (ReplicateS con));
+ format %{ "movq $dst,[$constantaddress]\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S($con)" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl4I(vecX dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateI src));
+ format %{ "movd $dst,$src\n\t"
+ "pshufd $dst,$dst,0x00\t! replicate4I" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl4I_mem(vecX dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateI (LoadI mem)));
+ format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
%}
ins_pipe( pipe_slow );
%}
@@ -3244,10 +3476,40 @@
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
+instruct Repl16I(legVecZ dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateI src));
+ format %{ "movd $dst,$src\n\t"
+ "pshufd $dst,$dst,0x00\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl16I_mem(legVecZ dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateI (LoadI mem)));
+ format %{ "pshufd $dst,$mem,0x00\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %}
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct Repl4I_imm(vecX dst, immI con) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateI con));
format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t"
"punpcklqdq $dst,$dst" %}
@@ -3270,10 +3532,26 @@
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
+instruct Repl16I_imm(legVecZ dst, immI con) %{
+ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateI con));
+ format %{ "movq $dst,[$constantaddress]\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I($con)" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Long could be loaded into xmm register directly from memory.
instruct Repl2L_mem(vecX dst, memory mem) %{
predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateL (LoadL mem)));
format %{ "movq $dst,$mem\n\t"
@@ -3298,12 +3576,28 @@
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
+
+instruct Repl8L(legVecZ dst, rRegL src) %{
+ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateL src));
+ format %{ "movdq $dst,$src\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %}
+ ins_encode %{
+ __ movdq($dst$$XMMRegister, $src$$Register);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
#else // _LP64
-instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
+instruct Repl4L(vecY dst, eRegL src, vecY tmp) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateL src));
effect(TEMP dst, USE src, TEMP tmp);
format %{ "movdl $dst,$src.lo\n\t"
"movdl $tmp,$src.hi\n\t"
@@ -3317,10 +3611,31 @@
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
+
+instruct Repl8L(legVecZ dst, eRegL src, legVecZ tmp) %{
+ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateL src));
+ effect(TEMP dst, USE src, TEMP tmp);
+ format %{ "movdl $dst,$src.lo\n\t"
+ "movdl $tmp,$src.hi\n\t"
+ "punpckldq $dst,$tmp\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+ __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
#endif // _LP64
instruct Repl4L_imm(vecY dst, immL con) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateL con));
@@ -3333,10 +3648,26 @@
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
+instruct Repl8L_imm(legVecZ dst, immL con) %{
+ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateL con));
+ format %{ "movq $dst,[$constantaddress]\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L($con)" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $constantaddress($con));
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct Repl4L_mem(vecY dst, memory mem) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateL (LoadL mem)));
format %{ "movq $dst,$mem\n\t"
"punpcklqdq $dst,$dst\n\t"
@@ -3347,10 +3678,26 @@
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
+instruct Repl8L_mem(legVecZ dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateL (LoadL mem)));
+ format %{ "movq $dst,$mem\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct Repl2F_mem(vecD dst, memory mem) %{
predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateF (LoadF mem)));
format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %}
ins_encode %{
@@ -3367,12 +3714,12 @@
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
%}
ins_pipe( pipe_slow );
%}
-instruct Repl8F(vecY dst, regF src) %{
- predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+instruct Repl8F(vecY dst, vlRegF src) %{
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateF src));
format %{ "pshufd $dst,$src,0x00\n\t"
"vinsertf128_high $dst,$dst\t! replicate8F" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
@@ -3391,10 +3738,38 @@
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
+instruct Repl16F(legVecZ dst, vlRegF src) %{
+ predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateF src));
+ format %{ "pshufd $dst,$src,0x00\n\t"
+ "vinsertf128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %}
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl16F_mem(legVecZ dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateF (LoadF mem)));
+ format %{ "pshufd $dst,$mem,0x00\n\t"
+ "vinsertf128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %}
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
+ __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct Repl2F_zero(vecD dst, immF0 zero) %{
predicate(n->as_Vector()->length() == 2 && UseAVX < 3);
match(Set dst (ReplicateF zero));
format %{ "xorps $dst,$dst\t! replicate2F zero" %}
ins_encode %{
@@ -3432,12 +3807,12 @@
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
%}
ins_pipe( pipe_slow );
%}
-instruct Repl4D(vecY dst, regD src) %{
- predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
+instruct Repl4D(vecY dst, vlRegD src) %{
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateD src));
format %{ "pshufd $dst,$src,0x44\n\t"
"vinsertf128_high $dst,$dst\t! replicate4D" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
@@ -3456,10 +3831,38 @@
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
+instruct Repl8D(legVecZ dst, vlRegD src) %{
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateD src));
+ format %{ "pshufd $dst,$src,0x44\n\t"
+ "vinsertf128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %}
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
+ __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl8D_mem(legVecZ dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateD (LoadD mem)));
+ format %{ "pshufd $dst,$mem,0x44\n\t"
+ "vinsertf128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %}
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
+ __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Replicate double (8 byte) scalar zero to be vector
instruct Repl2D_zero(vecX dst, immD0 zero) %{
predicate(n->as_Vector()->length() == 2 && UseAVX < 3);
match(Set dst (ReplicateD zero));
format %{ "xorpd $dst,$dst\t! replicate2D zero" %}
@@ -3734,11 +4137,11 @@
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
#else // _LP64
-instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
+instruct Repl2L(vecX dst, eRegL src, vecX tmp) %{
predicate(n->as_Vector()->length() == 2);
match(Set dst (ReplicateL src));
effect(TEMP dst, USE src, TEMP tmp);
format %{ "movdl $dst,$src.lo\n\t"
"movdl $tmp,$src.hi\n\t"
@@ -3789,32 +4192,32 @@
%}
ins_pipe( fpu_reg_reg );
%}
// Replicate float (4 byte) scalar to be vector
-instruct Repl2F(vecD dst, regF src) %{
+instruct Repl2F(vecD dst, vlRegF src) %{
predicate(n->as_Vector()->length() == 2);
match(Set dst (ReplicateF src));
format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
%}
ins_pipe( fpu_reg_reg );
%}
-instruct Repl4F(vecX dst, regF src) %{
+instruct Repl4F(vecX dst, vlRegF src) %{
predicate(n->as_Vector()->length() == 4);
match(Set dst (ReplicateF src));
format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
%}
ins_pipe( pipe_slow );
%}
// Replicate double (8 bytes) scalar to be vector
-instruct Repl2D(vecX dst, regD src) %{
+instruct Repl2D(vecX dst, vlRegD src) %{
predicate(n->as_Vector()->length() == 2);
match(Set dst (ReplicateD src));
format %{ "pshufd $dst,$src,0x44\t! replicate2D" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
@@ -3823,132 +4226,132 @@
%}
// ====================EVEX REPLICATE=============================================
instruct Repl4B_mem_evex(vecS dst, memory mem) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB (LoadB mem)));
format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %}
ins_encode %{
int vector_len = 0;
- __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8B_mem_evex(vecD dst, memory mem) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB (LoadB mem)));
format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %}
ins_encode %{
int vector_len = 0;
- __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16B_evex(vecX dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB src));
- format %{ "vpbroadcastb $dst,$src\t! replicate16B" %}
+ format %{ "evpbroadcastb $dst,$src\t! replicate16B" %}
ins_encode %{
int vector_len = 0;
__ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16B_mem_evex(vecX dst, memory mem) %{
- predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB (LoadB mem)));
format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %}
ins_encode %{
int vector_len = 0;
- __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl32B_evex(vecY dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB src));
- format %{ "vpbroadcastb $dst,$src\t! replicate32B" %}
+ format %{ "evpbroadcastb $dst,$src\t! replicate32B" %}
ins_encode %{
int vector_len = 1;
__ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl32B_mem_evex(vecY dst, memory mem) %{
- predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB (LoadB mem)));
format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %}
ins_encode %{
int vector_len = 1;
- __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl64B_evex(vecZ dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw());
+ predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw());
match(Set dst (ReplicateB src));
- format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %}
+ format %{ "evpbroadcastb $dst,$src\t! upper replicate64B" %}
ins_encode %{
int vector_len = 2;
__ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl64B_mem_evex(vecZ dst, memory mem) %{
- predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw());
+ predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw());
match(Set dst (ReplicateB (LoadB mem)));
format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %}
ins_encode %{
int vector_len = 2;
- __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16B_imm_evex(vecX dst, immI con) %{
- predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB con));
format %{ "movq $dst,[$constantaddress]\n\t"
"vpbroadcastb $dst,$dst\t! replicate16B" %}
ins_encode %{
int vector_len = 0;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
- __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl32B_imm_evex(vecY dst, immI con) %{
- predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB con));
format %{ "movq $dst,[$constantaddress]\n\t"
"vpbroadcastb $dst,$dst\t! replicate32B" %}
ins_encode %{
int vector_len = 1;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
- __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl64B_imm_evex(vecZ dst, immI con) %{
- predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw());
+ predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw());
match(Set dst (ReplicateB con));
format %{ "movq $dst,[$constantaddress]\n\t"
"vpbroadcastb $dst,$dst\t! upper replicate64B" %}
ins_encode %{
int vector_len = 2;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
- __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{
@@ -3962,132 +4365,132 @@
%}
ins_pipe( fpu_reg_reg );
%}
instruct Repl4S_evex(vecD dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS src));
- format %{ "vpbroadcastw $dst,$src\t! replicate4S" %}
+ format %{ "evpbroadcastw $dst,$src\t! replicate4S" %}
ins_encode %{
int vector_len = 0;
__ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4S_mem_evex(vecD dst, memory mem) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS (LoadS mem)));
format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %}
ins_encode %{
int vector_len = 0;
- __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8S_evex(vecX dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS src));
- format %{ "vpbroadcastw $dst,$src\t! replicate8S" %}
+ format %{ "evpbroadcastw $dst,$src\t! replicate8S" %}
ins_encode %{
int vector_len = 0;
__ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8S_mem_evex(vecX dst, memory mem) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS (LoadS mem)));
format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %}
ins_encode %{
int vector_len = 0;
- __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16S_evex(vecY dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS src));
- format %{ "vpbroadcastw $dst,$src\t! replicate16S" %}
+ format %{ "evpbroadcastw $dst,$src\t! replicate16S" %}
ins_encode %{
int vector_len = 1;
__ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16S_mem_evex(vecY dst, memory mem) %{
- predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS (LoadS mem)));
format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %}
ins_encode %{
int vector_len = 1;
- __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl32S_evex(vecZ dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw());
+ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw());
match(Set dst (ReplicateS src));
- format %{ "vpbroadcastw $dst,$src\t! replicate32S" %}
+ format %{ "evpbroadcastw $dst,$src\t! replicate32S" %}
ins_encode %{
int vector_len = 2;
__ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl32S_mem_evex(vecZ dst, memory mem) %{
- predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw());
+ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw());
match(Set dst (ReplicateS (LoadS mem)));
format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %}
ins_encode %{
int vector_len = 2;
- __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8S_imm_evex(vecX dst, immI con) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS con));
format %{ "movq $dst,[$constantaddress]\n\t"
"vpbroadcastw $dst,$dst\t! replicate8S" %}
ins_encode %{
int vector_len = 0;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
- __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16S_imm_evex(vecY dst, immI con) %{
- predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS con));
format %{ "movq $dst,[$constantaddress]\n\t"
"vpbroadcastw $dst,$dst\t! replicate16S" %}
ins_encode %{
int vector_len = 1;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
- __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl32S_imm_evex(vecZ dst, immI con) %{
- predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw());
+ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw());
match(Set dst (ReplicateS con));
format %{ "movq $dst,[$constantaddress]\n\t"
"vpbroadcastw $dst,$dst\t! replicate32S" %}
ins_encode %{
int vector_len = 2;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
- __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{
@@ -4101,57 +4504,57 @@
%}
ins_pipe( fpu_reg_reg );
%}
instruct Repl4I_evex(vecX dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateI src));
- format %{ "vpbroadcastd $dst,$src\t! replicate4I" %}
+ format %{ "evpbroadcastd $dst,$src\t! replicate4I" %}
ins_encode %{
int vector_len = 0;
__ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4I_mem_evex(vecX dst, memory mem) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateI (LoadI mem)));
format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %}
ins_encode %{
int vector_len = 0;
- __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8I_evex(vecY dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateI src));
- format %{ "vpbroadcastd $dst,$src\t! replicate8I" %}
+ format %{ "evpbroadcastd $dst,$src\t! replicate8I" %}
ins_encode %{
int vector_len = 1;
__ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8I_mem_evex(vecY dst, memory mem) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateI (LoadI mem)));
format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %}
ins_encode %{
int vector_len = 1;
- __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16I_evex(vecZ dst, rRegI src) %{
predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
match(Set dst (ReplicateI src));
- format %{ "vpbroadcastd $dst,$src\t! replicate16I" %}
+ format %{ "evpbroadcastd $dst,$src\t! replicate16I" %}
ins_encode %{
int vector_len = 2;
__ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
%}
ins_pipe( pipe_slow );
@@ -4161,37 +4564,37 @@
predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
match(Set dst (ReplicateI (LoadI mem)));
format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %}
ins_encode %{
int vector_len = 2;
- __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4I_imm_evex(vecX dst, immI con) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateI con));
format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
"vpbroadcastd $dst,$dst\t! replicate4I" %}
ins_encode %{
int vector_len = 0;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
- __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8I_imm_evex(vecY dst, immI con) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateI con));
format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
"vpbroadcastd $dst,$dst\t! replicate8I" %}
ins_encode %{
int vector_len = 1;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
- __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16I_imm_evex(vecZ dst, immI con) %{
@@ -4200,11 +4603,11 @@
format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t"
"vpbroadcastd $dst,$dst\t! replicate16I" %}
ins_encode %{
int vector_len = 2;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
- __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{
@@ -4220,33 +4623,33 @@
%}
// Replicate long (8 byte) scalar to be vector
#ifdef _LP64
instruct Repl4L_evex(vecY dst, rRegL src) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateL src));
- format %{ "vpbroadcastq $dst,$src\t! replicate4L" %}
+ format %{ "evpbroadcastq $dst,$src\t! replicate4L" %}
ins_encode %{
int vector_len = 1;
__ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8L_evex(vecZ dst, rRegL src) %{
predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
match(Set dst (ReplicateL src));
- format %{ "vpbroadcastq $dst,$src\t! replicate8L" %}
+ format %{ "evpbroadcastq $dst,$src\t! replicate8L" %}
ins_encode %{
int vector_len = 2;
__ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len);
%}
ins_pipe( pipe_slow );
%}
#else // _LP64
instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateL src));
effect(TEMP dst, USE src, TEMP tmp);
format %{ "movdl $dst,$src.lo\n\t"
"movdl $tmp,$src.hi\n\t"
"punpckldq $dst,$tmp\n\t"
@@ -4254,16 +4657,16 @@
ins_encode %{
int vector_len = 1;
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
- __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{
+instruct Repl8L_evex(legVecZ dst, eRegL src, legVecZ tmp) %{
predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
match(Set dst (ReplicateL src));
effect(TEMP dst, USE src, TEMP tmp);
format %{ "movdl $dst,$src.lo\n\t"
"movdl $tmp,$src.hi\n\t"
@@ -4272,25 +4675,25 @@
ins_encode %{
int vector_len = 2;
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
- __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
#endif // _LP64
instruct Repl4L_imm_evex(vecY dst, immL con) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateL con));
format %{ "movq $dst,[$constantaddress]\n\t"
"vpbroadcastq $dst,$dst\t! replicate4L" %}
ins_encode %{
int vector_len = 1;
__ movq($dst$$XMMRegister, $constantaddress($con));
- __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8L_imm_evex(vecZ dst, immL con) %{
@@ -4299,44 +4702,44 @@
format %{ "movq $dst,[$constantaddress]\n\t"
"vpbroadcastq $dst,$dst\t! replicate8L" %}
ins_encode %{
int vector_len = 2;
__ movq($dst$$XMMRegister, $constantaddress($con));
- __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl2L_mem_evex(vecX dst, memory mem) %{
- predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 2 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateL (LoadL mem)));
format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %}
ins_encode %{
int vector_len = 0;
- __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4L_mem_evex(vecY dst, memory mem) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateL (LoadL mem)));
format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %}
ins_encode %{
int vector_len = 1;
- __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8L_mem_evex(vecZ dst, memory mem) %{
predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
match(Set dst (ReplicateL (LoadL mem)));
format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %}
ins_encode %{
int vector_len = 2;
- __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{
@@ -4350,49 +4753,49 @@
%}
ins_pipe( fpu_reg_reg );
%}
instruct Repl8F_evex(vecY dst, regF src) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateF src));
- format %{ "vbroadcastss $dst,$src\t! replicate8F" %}
+ format %{ "vpbroadcastss $dst,$src\t! replicate8F" %}
ins_encode %{
int vector_len = 1;
- __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8F_mem_evex(vecY dst, memory mem) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateF (LoadF mem)));
format %{ "vbroadcastss $dst,$mem\t! replicate8F" %}
ins_encode %{
int vector_len = 1;
- __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16F_evex(vecZ dst, regF src) %{
predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
match(Set dst (ReplicateF src));
- format %{ "vbroadcastss $dst,$src\t! replicate16F" %}
+ format %{ "vpbroadcastss $dst,$src\t! replicate16F" %}
ins_encode %{
int vector_len = 2;
- __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16F_mem_evex(vecZ dst, memory mem) %{
predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
match(Set dst (ReplicateF (LoadF mem)));
format %{ "vbroadcastss $dst,$mem\t! replicate16F" %}
ins_encode %{
int vector_len = 2;
- __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{
@@ -4442,49 +4845,49 @@
%}
ins_pipe( fpu_reg_reg );
%}
instruct Repl4D_evex(vecY dst, regD src) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateD src));
- format %{ "vbroadcastsd $dst,$src\t! replicate4D" %}
+ format %{ "vpbroadcastsd $dst,$src\t! replicate4D" %}
ins_encode %{
int vector_len = 1;
- __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4D_mem_evex(vecY dst, memory mem) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateD (LoadD mem)));
format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %}
ins_encode %{
int vector_len = 1;
- __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8D_evex(vecZ dst, regD src) %{
predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
match(Set dst (ReplicateD src));
- format %{ "vbroadcastsd $dst,$src\t! replicate8D" %}
+ format %{ "vpbroadcastsd $dst,$src\t! replicate8D" %}
ins_encode %{
int vector_len = 2;
- __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8D_mem_evex(vecZ dst, memory mem) %{
predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
match(Set dst (ReplicateD (LoadD mem)));
format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %}
ins_encode %{
int vector_len = 2;
- __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{
@@ -4523,11 +4926,11 @@
ins_pipe( fpu_reg_reg );
%}
// ====================REDUCTION ARITHMETIC=======================================
-instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{
predicate(UseSSE > 2 && UseAVX == 0);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp2, TEMP tmp);
format %{ "movdqu $tmp2,$src2\n\t"
"phaddd $tmp2,$tmp2\n\t"
@@ -4542,11 +4945,11 @@
__ movdl($dst$$Register, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{
predicate(VM_Version::supports_avxonly());
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vphaddd $tmp,$src2,$src2\n\t"
"movd $tmp2,$src1\n\t"
@@ -4560,11 +4963,11 @@
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "pshufd $tmp2,$src2,0x1\n\t"
"vpaddd $tmp,$src2,$tmp2\n\t"
@@ -4580,11 +4983,11 @@
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{
predicate(UseSSE > 2 && UseAVX == 0);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "movdqu $tmp,$src2\n\t"
"phaddd $tmp,$tmp\n\t"
@@ -4601,11 +5004,11 @@
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{
predicate(VM_Version::supports_avxonly());
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vphaddd $tmp,$src2,$src2\n\t"
"vphaddd $tmp,$tmp,$tmp\n\t"
@@ -4621,11 +5024,11 @@
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "pshufd $tmp2,$src2,0xE\n\t"
"vpaddd $tmp,$src2,$tmp2\n\t"
@@ -4645,11 +5048,11 @@
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
+instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{
predicate(VM_Version::supports_avxonly());
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vphaddd $tmp,$src2,$src2\n\t"
"vphaddd $tmp,$tmp,$tmp2\n\t"
@@ -4669,11 +5072,11 @@
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
+instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vextracti128_high $tmp,$src2\n\t"
"vpaddd $tmp,$tmp,$src2\n\t"
@@ -4697,11 +5100,11 @@
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
+instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
format %{ "vextracti64x4_high $tmp3,$src2\n\t"
"vpaddd $tmp3,$tmp3,$src2\n\t"
@@ -4729,11 +5132,11 @@
%}
ins_pipe( pipe_slow );
%}
#ifdef _LP64
-instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "pshufd $tmp2,$src2,0xE\n\t"
"vpaddq $tmp,$src2,$tmp2\n\t"
@@ -4748,11 +5151,11 @@
__ movdq($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
+instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vextracti128_high $tmp,$src2\n\t"
"vpaddq $tmp2,$tmp,$src2\n\t"
@@ -4771,11 +5174,11 @@
__ movdq($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
+instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vextracti64x4_high $tmp2,$src2\n\t"
"vpaddq $tmp2,$tmp2,$src2\n\t"
@@ -4799,11 +5202,11 @@
%}
ins_pipe( pipe_slow );
%}
#endif
-instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
+instruct rsadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
match(Set dst (AddReductionVF dst src2));
effect(TEMP dst, TEMP tmp);
format %{ "addss $dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
@@ -4814,11 +5217,11 @@
__ addss($dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
+instruct rvadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{
predicate(UseAVX > 0);
match(Set dst (AddReductionVF dst src2));
effect(TEMP dst, TEMP tmp);
format %{ "vaddss $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
@@ -4829,11 +5232,11 @@
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
+instruct rsadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
match(Set dst (AddReductionVF dst src2));
effect(TEMP dst, TEMP tmp);
format %{ "addss $dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
@@ -4852,11 +5255,11 @@
__ addss($dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
+instruct rvadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{
predicate(UseAVX > 0);
match(Set dst (AddReductionVF dst src2));
effect(TEMP tmp, TEMP dst);
format %{ "vaddss $dst,dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
@@ -4875,11 +5278,11 @@
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
+instruct radd8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{
predicate(UseAVX > 0);
match(Set dst (AddReductionVF dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
format %{ "vaddss $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
@@ -4914,11 +5317,11 @@
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
+instruct radd16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVF dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
format %{ "vaddss $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
@@ -4985,11 +5388,11 @@
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
+instruct rsadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
match(Set dst (AddReductionVD dst src2));
effect(TEMP tmp, TEMP dst);
format %{ "addsd $dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
@@ -5000,11 +5403,11 @@
__ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
+instruct rvadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{
predicate(UseAVX > 0);
match(Set dst (AddReductionVD dst src2));
effect(TEMP tmp, TEMP dst);
format %{ "vaddsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
@@ -5015,34 +5418,34 @@
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
+instruct rvadd4D_reduction_reg(regD dst, vecY src2, vecX tmp, vecX tmp2) %{
predicate(UseAVX > 0);
match(Set dst (AddReductionVD dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
format %{ "vaddsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t"
- "vextractf32x4 $tmp2,$src2,0x1\n\t"
+ "vextractf128 $tmp2,$src2,0x1\n\t"
"vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\t! add reduction4D" %}
ins_encode %{
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
+instruct rvadd8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVD dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
format %{ "vaddsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
@@ -5077,11 +5480,11 @@
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{
predicate(UseSSE > 3 && UseAVX == 0);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "pshufd $tmp2,$src2,0x1\n\t"
"pmulld $tmp2,$src2\n\t"
@@ -5096,11 +5499,11 @@
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{
predicate(UseAVX > 0);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "pshufd $tmp2,$src2,0x1\n\t"
"vpmulld $tmp,$src2,$tmp2\n\t"
@@ -5116,11 +5519,11 @@
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{
predicate(UseSSE > 3 && UseAVX == 0);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "pshufd $tmp2,$src2,0xE\n\t"
"pmulld $tmp2,$src2\n\t"
@@ -5139,11 +5542,11 @@
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{
predicate(UseAVX > 0);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "pshufd $tmp2,$src2,0xE\n\t"
"vpmulld $tmp,$src2,$tmp2\n\t"
@@ -5163,12 +5566,12 @@
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
- predicate(UseAVX > 0);
+instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{
+ predicate(UseAVX > 1);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vextracti128_high $tmp,$src2\n\t"
"vpmulld $tmp,$tmp,$src2\n\t"
"pshufd $tmp2,$tmp,0xE\n\t"
@@ -5191,11 +5594,11 @@
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
+instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{
predicate(UseAVX > 2);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
format %{ "vextracti64x4_high $tmp3,$src2\n\t"
"vpmulld $tmp3,$tmp3,$src2\n\t"
@@ -5223,11 +5626,11 @@
%}
ins_pipe( pipe_slow );
%}
#ifdef _LP64
-instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
match(Set dst (MulReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "pshufd $tmp2,$src2,0xE\n\t"
"vpmullq $tmp,$src2,$tmp2\n\t"
@@ -5242,11 +5645,11 @@
__ movdq($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
+instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
match(Set dst (MulReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vextracti128_high $tmp,$src2\n\t"
"vpmullq $tmp2,$tmp,$src2\n\t"
@@ -5265,11 +5668,11 @@
__ movdq($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
+instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
match(Set dst (MulReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vextracti64x4_high $tmp2,$src2\n\t"
"vpmullq $tmp2,$tmp2,$src2\n\t"
@@ -5293,11 +5696,11 @@
%}
ins_pipe( pipe_slow );
%}
#endif
-instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{
+instruct rsmul2F_reduction(regF dst, vecD src2, vecD tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
match(Set dst (MulReductionVF dst src2));
effect(TEMP dst, TEMP tmp);
format %{ "mulss $dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
@@ -5308,11 +5711,11 @@
__ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
+instruct rvmul2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{
predicate(UseAVX > 0);
match(Set dst (MulReductionVF dst src2));
effect(TEMP tmp, TEMP dst);
format %{ "vmulss $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
@@ -5323,11 +5726,11 @@
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
+instruct rsmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
match(Set dst (MulReductionVF dst src2));
effect(TEMP dst, TEMP tmp);
format %{ "mulss $dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
@@ -5346,11 +5749,11 @@
__ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
+instruct rvmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{
predicate(UseAVX > 0);
match(Set dst (MulReductionVF dst src2));
effect(TEMP tmp, TEMP dst);
format %{ "vmulss $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
@@ -5369,11 +5772,11 @@
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
+instruct rvmul8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{
predicate(UseAVX > 0);
match(Set dst (MulReductionVF dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
format %{ "vmulss $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
@@ -5408,11 +5811,11 @@
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
+instruct rvmul16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
predicate(UseAVX > 2);
match(Set dst (MulReductionVF dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
format %{ "vmulss $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
@@ -5479,11 +5882,11 @@
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
+instruct rsmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
match(Set dst (MulReductionVD dst src2));
effect(TEMP dst, TEMP tmp);
format %{ "mulsd $dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
@@ -5494,11 +5897,11 @@
__ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
+instruct rvmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{
predicate(UseAVX > 0);
match(Set dst (MulReductionVD dst src2));
effect(TEMP tmp, TEMP dst);
format %{ "vmulsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
@@ -5509,11 +5912,11 @@
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
+instruct rvmul4D_reduction_reg(regD dst, vecY src2, vecY tmp, vecY tmp2) %{
predicate(UseAVX > 0);
match(Set dst (MulReductionVD dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
format %{ "vmulsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
@@ -5532,11 +5935,11 @@
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
+instruct rvmul8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
predicate(UseAVX > 2);
match(Set dst (MulReductionVD dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
format %{ "vmulsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
@@ -5586,315 +5989,133 @@
__ paddb($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (AddVB src1 src2));
- format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (AddVB src1 src2));
format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %}
ins_encode %{
int vector_len = 0;
__ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (AddVB dst src2));
- effect(TEMP src1);
- format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (AddVB src (LoadVector mem)));
- format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (AddVB src (LoadVector mem)));
format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %}
ins_encode %{
int vector_len = 0;
__ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (AddVB dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vadd8B(vecD dst, vecD src) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (AddVB dst src));
format %{ "paddb $dst,$src\t! add packed8B" %}
ins_encode %{
__ paddb($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (AddVB src1 src2));
- format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (AddVB src1 src2));
format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
ins_encode %{
int vector_len = 0;
__ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (AddVB dst src2));
- effect(TEMP src1);
- format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (AddVB src (LoadVector mem)));
- format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (AddVB src (LoadVector mem)));
format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %}
ins_encode %{
int vector_len = 0;
__ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (AddVB dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vadd16B(vecX dst, vecX src) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 16);
match(Set dst (AddVB dst src));
format %{ "paddb $dst,$src\t! add packed16B" %}
ins_encode %{
__ paddb($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
+instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
match(Set dst (AddVB src1 src2));
format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
ins_encode %{
int vector_len = 0;
__ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
- match(Set dst (AddVB src1 src2));
- format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
+instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+ match(Set dst (AddVB src (LoadVector mem)));
+ format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
ins_encode %{
int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (AddVB dst src2));
- effect(TEMP src1);
- format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %}
+instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+ match(Set dst (AddVB src1 src2));
+ format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
ins_encode %{
- int vector_len = 0;
+ int vector_len = 1;
__ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
+instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
match(Set dst (AddVB src (LoadVector mem)));
- format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
+ format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
ins_encode %{
- int vector_len = 0;
+ int vector_len = 1;
__ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
- match(Set dst (AddVB src (LoadVector mem)));
- format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (AddVB dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
- match(Set dst (AddVB src1 src2));
- format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
- match(Set dst (AddVB src1 src2));
- format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
- match(Set dst (AddVB dst src2));
- effect(TEMP src1);
- format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
- match(Set dst (AddVB src (LoadVector mem)));
- format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
- match(Set dst (AddVB src (LoadVector mem)));
- format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
- match(Set dst (AddVB dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
- match(Set dst (AddVB src1 src2));
- format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %}
+instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
+ match(Set dst (AddVB src1 src2));
+ format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %}
ins_encode %{
int vector_len = 2;
__ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
match(Set dst (AddVB src (LoadVector mem)));
format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %}
ins_encode %{
int vector_len = 2;
__ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
@@ -5911,315 +6132,131 @@
__ paddw($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (AddVS src1 src2));
- format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (AddVS src1 src2));
format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
ins_encode %{
int vector_len = 0;
__ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (AddVS dst src2));
- effect(TEMP src1);
- format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (AddVS src (LoadVector mem)));
- format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (AddVS src (LoadVector mem)));
format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %}
ins_encode %{
int vector_len = 0;
__ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (AddVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vadd4S(vecD dst, vecD src) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (AddVS dst src));
format %{ "paddw $dst,$src\t! add packed4S" %}
ins_encode %{
__ paddw($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (AddVS src1 src2));
- format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (AddVS src1 src2));
format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
ins_encode %{
int vector_len = 0;
__ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (AddVS dst src2));
- effect(TEMP src1);
- format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (AddVS src (LoadVector mem)));
- format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (AddVS src (LoadVector mem)));
format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %}
ins_encode %{
int vector_len = 0;
__ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (AddVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vadd8S(vecX dst, vecX src) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (AddVS dst src));
format %{ "paddw $dst,$src\t! add packed8S" %}
ins_encode %{
__ paddw($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (AddVS src1 src2));
- format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (AddVS src1 src2));
format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
ins_encode %{
int vector_len = 0;
__ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (AddVS dst src2));
- effect(TEMP src1);
- format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (AddVS src (LoadVector mem)));
- format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (AddVS src (LoadVector mem)));
format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
ins_encode %{
int vector_len = 0;
__ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (AddVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
- match(Set dst (AddVS src1 src2));
- format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (AddVS src1 src2));
format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (AddVS dst src2));
- effect(TEMP src1);
- format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
- match(Set dst (AddVS src (LoadVector mem)));
- format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (AddVS src (LoadVector mem)));
format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (AddVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (AddVS src1 src2));
format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (AddVS src (LoadVector mem)));
format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
@@ -6227,11 +6264,11 @@
ins_pipe( pipe_slow );
%}
// Integers vector add
instruct vadd2I(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (AddVI dst src));
format %{ "paddd $dst,$src\t! add packed2I" %}
ins_encode %{
__ paddd($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -6259,11 +6296,11 @@
%}
ins_pipe( pipe_slow );
%}
instruct vadd4I(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (AddVI dst src));
format %{ "paddd $dst,$src\t! add packed4I" %}
ins_encode %{
__ paddd($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -6336,11 +6373,11 @@
ins_pipe( pipe_slow );
%}
// Longs vector add
instruct vadd2L(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (AddVL dst src));
format %{ "paddq $dst,$src\t! add packed2L" %}
ins_encode %{
__ paddq($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -6413,11 +6450,11 @@
ins_pipe( pipe_slow );
%}
// Floats vector add
instruct vadd2F(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (AddVF dst src));
format %{ "addps $dst,$src\t! add packed2F" %}
ins_encode %{
__ addps($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -6445,11 +6482,11 @@
%}
ins_pipe( pipe_slow );
%}
instruct vadd4F(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (AddVF dst src));
format %{ "addps $dst,$src\t! add packed4F" %}
ins_encode %{
__ addps($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -6522,11 +6559,11 @@
ins_pipe( pipe_slow );
%}
// Doubles vector add
instruct vadd2D(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (AddVD dst src));
format %{ "addpd $dst,$src\t! add packed2D" %}
ins_encode %{
__ addpd($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -6610,315 +6647,131 @@
__ psubb($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (SubVB src1 src2));
format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
ins_encode %{
int vector_len = 0;
__ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
- match(Set dst (SubVB src1 src2));
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
+instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (SubVB src (LoadVector mem)));
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %}
ins_encode %{
int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (SubVB dst src2));
- effect(TEMP src1);
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
+instruct vsub8B(vecD dst, vecD src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
+ match(Set dst (SubVB dst src));
+ format %{ "psubb $dst,$src\t! sub packed8B" %}
ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ psubb($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (SubVB src (LoadVector mem)));
- format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %}
+instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (SubVB src1 src2));
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
ins_encode %{
int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (SubVB src (LoadVector mem)));
- format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %}
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %}
ins_encode %{
int vector_len = 0;
__ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (SubVB dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %}
+instruct vsub16B(vecX dst, vecX src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 16);
+ match(Set dst (SubVB dst src));
+ format %{ "psubb $dst,$src\t! sub packed16B" %}
ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ psubb($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub8B(vecD dst, vecD src) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
- match(Set dst (SubVB dst src));
- format %{ "psubb $dst,$src\t! sub packed8B" %}
- ins_encode %{
- __ psubb($dst$$XMMRegister, $src$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (SubVB src1 src2));
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
- match(Set dst (SubVB src1 src2));
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (SubVB dst src2));
- effect(TEMP src1);
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (SubVB src (LoadVector mem)));
- format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
- match(Set dst (SubVB src (LoadVector mem)));
- format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (SubVB dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub16B(vecX dst, vecX src) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 16);
- match(Set dst (SubVB dst src));
- format %{ "psubb $dst,$src\t! sub packed16B" %}
- ins_encode %{
- __ psubb($dst$$XMMRegister, $src$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
- match(Set dst (SubVB src1 src2));
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
match(Set dst (SubVB src1 src2));
format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
ins_encode %{
int vector_len = 0;
__ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (SubVB dst src2));
- effect(TEMP src1);
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
- match(Set dst (SubVB src (LoadVector mem)));
- format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
match(Set dst (SubVB src (LoadVector mem)));
format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
ins_encode %{
int vector_len = 0;
__ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (SubVB dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
+instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
match(Set dst (SubVB src1 src2));
format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
ins_encode %{
int vector_len = 1;
__ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
- match(Set dst (SubVB src1 src2));
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
- match(Set dst (SubVB dst src2));
- effect(TEMP src1);
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
+instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
match(Set dst (SubVB src (LoadVector mem)));
format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
ins_encode %{
int vector_len = 1;
__ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
- match(Set dst (SubVB src (LoadVector mem)));
- format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
- match(Set dst (SubVB dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
match(Set dst (SubVB src1 src2));
format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %}
ins_encode %{
int vector_len = 2;
__ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
match(Set dst (SubVB src (LoadVector mem)));
format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %}
ins_encode %{
int vector_len = 2;
__ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
@@ -6935,315 +6788,131 @@
__ psubw($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (SubVS src1 src2));
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
ins_encode %{
int vector_len = 0;
__ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (SubVS dst src2));
- effect(TEMP src1);
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (SubVS src (LoadVector mem)));
- format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (SubVS src (LoadVector mem)));
format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %}
ins_encode %{
int vector_len = 0;
__ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (SubVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsub4S(vecD dst, vecD src) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (SubVS dst src));
format %{ "psubw $dst,$src\t! sub packed4S" %}
ins_encode %{
__ psubw($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (SubVS src1 src2));
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
ins_encode %{
int vector_len = 0;
__ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (SubVS dst src2));
- effect(TEMP src1);
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (SubVS src (LoadVector mem)));
- format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (SubVS src (LoadVector mem)));
format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %}
ins_encode %{
int vector_len = 0;
__ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (SubVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsub8S(vecX dst, vecX src) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (SubVS dst src));
format %{ "psubw $dst,$src\t! sub packed8S" %}
ins_encode %{
__ psubw($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (SubVS src1 src2));
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
ins_encode %{
int vector_len = 0;
__ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (SubVS dst src2));
- effect(TEMP src1);
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (SubVS src (LoadVector mem)));
- format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (SubVS src (LoadVector mem)));
format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
ins_encode %{
int vector_len = 0;
__ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (SubVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
- match(Set dst (SubVS src1 src2));
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (SubVS dst src2));
- effect(TEMP src1);
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
- match(Set dst (SubVS src (LoadVector mem)));
- format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (SubVS src (LoadVector mem)));
format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (SubVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (SubVS src (LoadVector mem)));
format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
@@ -7251,11 +6920,11 @@
ins_pipe( pipe_slow );
%}
// Integers vector sub
instruct vsub2I(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (SubVI dst src));
format %{ "psubd $dst,$src\t! sub packed2I" %}
ins_encode %{
__ psubd($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -7283,11 +6952,11 @@
%}
ins_pipe( pipe_slow );
%}
instruct vsub4I(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (SubVI dst src));
format %{ "psubd $dst,$src\t! sub packed4I" %}
ins_encode %{
__ psubd($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -7360,11 +7029,11 @@
ins_pipe( pipe_slow );
%}
// Longs vector sub
instruct vsub2L(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (SubVL dst src));
format %{ "psubq $dst,$src\t! sub packed2L" %}
ins_encode %{
__ psubq($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -7437,11 +7106,11 @@
ins_pipe( pipe_slow );
%}
// Floats vector sub
instruct vsub2F(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (SubVF dst src));
format %{ "subps $dst,$src\t! sub packed2F" %}
ins_encode %{
__ subps($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -7469,11 +7138,11 @@
%}
ins_pipe( pipe_slow );
%}
instruct vsub4F(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (SubVF dst src));
format %{ "subps $dst,$src\t! sub packed4F" %}
ins_encode %{
__ subps($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -7546,11 +7215,11 @@
ins_pipe( pipe_slow );
%}
// Doubles vector sub
instruct vsub2D(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (SubVD dst src));
format %{ "subpd $dst,$src\t! sub packed2D" %}
ins_encode %{
__ subpd($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -7610,339 +7279,155 @@
__ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
- match(Set dst (SubVD src (LoadVector mem)));
- format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %}
- ins_encode %{
- int vector_len = 2;
- __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// --------------------------------- MUL --------------------------------------
-
-// Shorts/Chars vector mul
-instruct vmul2S(vecS dst, vecS src) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (MulVS dst src));
- format %{ "pmullw $dst,$src\t! mul packed2S" %}
- ins_encode %{
- __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (MulVS src1 src2));
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
- match(Set dst (MulVS src1 src2));
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (MulVS dst src2));
- effect(TEMP src1);
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (MulVS src (LoadVector mem)));
- format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
- match(Set dst (MulVS src (LoadVector mem)));
- format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (MulVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul4S(vecD dst, vecD src) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
- match(Set dst (MulVS dst src));
- format %{ "pmullw $dst,$src\t! mul packed4S" %}
- ins_encode %{
- __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (MulVS src1 src2));
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
- match(Set dst (MulVS src1 src2));
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (MulVS dst src2));
- effect(TEMP src1);
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (MulVS src (LoadVector mem)));
- format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
- match(Set dst (MulVS src (LoadVector mem)));
- format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (MulVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
+instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (SubVD src (LoadVector mem)));
+ format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %}
ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vector_len = 2;
+ __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul8S(vecX dst, vecX src) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
+// --------------------------------- MUL --------------------------------------
+
+// Shorts/Chars vector mul
+instruct vmul2S(vecS dst, vecS src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (MulVS dst src));
- format %{ "pmullw $dst,$src\t! mul packed8S" %}
+ format %{ "pmullw $dst,$src\t! mul packed2S" %}
ins_encode %{
__ pmullw($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (MulVS src1 src2));
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
ins_encode %{
int vector_len = 0;
__ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
- match(Set dst (MulVS src1 src2));
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
+instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (MulVS src (LoadVector mem)));
+ format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
ins_encode %{
int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (MulVS dst src2));
- effect(TEMP src1);
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
+instruct vmul4S(vecD dst, vecD src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
+ match(Set dst (MulVS dst src));
+ format %{ "pmullw $dst,$src\t! mul packed4S" %}
ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (MulVS src (LoadVector mem)));
- format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
+instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (MulVS src1 src2));
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
ins_encode %{
int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (MulVS src (LoadVector mem)));
- format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
+ format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
ins_encode %{
int vector_len = 0;
__ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (MulVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
+instruct vmul8S(vecX dst, vecX src) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
+ match(Set dst (MulVS dst src));
+ format %{ "pmullw $dst,$src\t! mul packed8S" %}
ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (MulVS src1 src2));
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
ins_encode %{
- int vector_len = 1;
+ int vector_len = 0;
__ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
- match(Set dst (MulVS src1 src2));
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
+instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (MulVS src (LoadVector mem)));
+ format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
ins_encode %{
- int vector_len = 1;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (MulVS dst src2));
- effect(TEMP src1);
+instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (MulVS src1 src2));
format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
- match(Set dst (MulVS src (LoadVector mem)));
- format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (MulVS src (LoadVector mem)));
format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (MulVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (MulVS src1 src2));
format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (MulVS src (LoadVector mem)));
format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
@@ -8125,11 +7610,11 @@
ins_pipe( pipe_slow );
%}
// Floats vector mul
instruct vmul2F(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (MulVF dst src));
format %{ "mulps $dst,$src\t! mul packed2F" %}
ins_encode %{
__ mulps($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -8157,11 +7642,11 @@
%}
ins_pipe( pipe_slow );
%}
instruct vmul4F(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (MulVF dst src));
format %{ "mulps $dst,$src\t! mul packed4F" %}
ins_encode %{
__ mulps($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -8234,11 +7719,11 @@
ins_pipe( pipe_slow );
%}
// Doubles vector mul
instruct vmul2D(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (MulVD dst src));
format %{ "mulpd $dst,$src\t! mul packed2D" %}
ins_encode %{
__ mulpd($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -8309,12 +7794,12 @@
__ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vcmov8F_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{
- predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 8);
+instruct vcmov8F_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2)));
effect(TEMP dst, USE src1, USE src2);
format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t"
"blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t"
%}
@@ -8325,12 +7810,12 @@
__ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{
- predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4);
+instruct vcmov4D_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2)));
effect(TEMP dst, USE src1, USE src2);
format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t"
"blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t"
%}
@@ -8345,11 +7830,11 @@
// --------------------------------- DIV --------------------------------------
// Floats vector div
instruct vdiv2F(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (DivVF dst src));
format %{ "divps $dst,$src\t! div packed2F" %}
ins_encode %{
__ divps($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -8377,11 +7862,11 @@
%}
ins_pipe( pipe_slow );
%}
instruct vdiv4F(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (DivVF dst src));
format %{ "divps $dst,$src\t! div packed4F" %}
ins_encode %{
__ divps($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -8454,11 +7939,11 @@
ins_pipe( pipe_slow );
%}
// Doubles vector div
instruct vdiv2D(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (DivVD dst src));
format %{ "divpd $dst,$src\t! div packed2D" %}
ins_encode %{
__ divpd($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -8688,370 +8173,186 @@
int vector_len = 2;
__ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-
-instruct vsqrt16F_mem(vecZ dst, memory mem) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
- match(Set dst (SqrtVF (LoadVector mem)));
- format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %}
- ins_encode %{
- int vector_len = 2;
- __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// ------------------------------ LeftShift -----------------------------------
-
-// Shorts/Chars vector left shift
-instruct vsll2S(vecS dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed2S" %}
- ins_encode %{
- __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2S_imm(vecS dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed2S" %}
- ins_encode %{
- __ psllw($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4S(vecD dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed4S" %}
- ins_encode %{
- __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4S_imm(vecD dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed4S" %}
- ins_encode %{
- __ psllw($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
+
+instruct vsqrt16F_mem(vecZ dst, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (SqrtVF (LoadVector mem)));
+ format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %}
ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ int vector_len = 2;
+ __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll8S(vecX dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
+// ------------------------------ LeftShift -----------------------------------
+
+// Shorts/Chars vector left shift
+instruct vsll2S(vecS dst, vecS shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed8S" %}
+ format %{ "psllw $dst,$shift\t! left shift packed2S" %}
ins_encode %{
__ psllw($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll8S_imm(vecX dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
+instruct vsll2S_imm(vecS dst, immI8 shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed8S" %}
+ format %{ "psllw $dst,$shift\t! left shift packed2S" %}
ins_encode %{
__ psllw($dst$$XMMRegister, (int)$shift$$constant);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
ins_encode %{
int vector_len = 0;
__ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
ins_encode %{
int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+instruct vsll4S(vecD dst, vecS shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (LShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
+ format %{ "psllw $dst,$shift\t! left shift packed4S" %}
ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
+instruct vsll4S_imm(vecD dst, immI8 shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVS dst shift));
+ format %{ "psllw $dst,$shift\t! left shift packed4S" %}
ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ __ psllw($dst$$XMMRegister, (int)$shift$$constant);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
ins_encode %{
int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (LShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
+instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
ins_encode %{
int vector_len = 0;
__ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
+instruct vsll8S(vecX dst, vecS shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVS dst shift));
+ format %{ "psllw $dst,$shift\t! left shift packed8S" %}
ins_encode %{
- int vector_len = 1;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
+instruct vsll8S_imm(vecX dst, immI8 shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVS dst shift));
+ format %{ "psllw $dst,$shift\t! left shift packed8S" %}
ins_encode %{
- int vector_len = 1;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ psllw($dst$$XMMRegister, (int)$shift$$constant);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (LShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
+instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
ins_encode %{
- int vector_len = 1;
+ int vector_len = 0;
__ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
ins_encode %{
- int vector_len = 1;
+ int vector_len = 0;
__ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
ins_encode %{
int vector_len = 1;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (LShiftVS dst shift));
- effect(TEMP src);
+instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
@@ -9059,21 +8360,21 @@
ins_pipe( pipe_slow );
%}
// Integers vector left shift
instruct vsll2I(vecD dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (LShiftVI dst shift));
format %{ "pslld $dst,$shift\t! left shift packed2I" %}
ins_encode %{
__ pslld($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vsll2I_imm(vecD dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (LShiftVI dst shift));
format %{ "pslld $dst,$shift\t! left shift packed2I" %}
ins_encode %{
__ pslld($dst$$XMMRegister, (int)$shift$$constant);
%}
@@ -9101,21 +8402,21 @@
%}
ins_pipe( pipe_slow );
%}
instruct vsll4I(vecX dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (LShiftVI dst shift));
format %{ "pslld $dst,$shift\t! left shift packed4I" %}
ins_encode %{
__ pslld($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vsll4I_imm(vecX dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (LShiftVI dst shift));
format %{ "pslld $dst,$shift\t! left shift packed4I" %}
ins_encode %{
__ pslld($dst$$XMMRegister, (int)$shift$$constant);
%}
@@ -9188,21 +8489,21 @@
ins_pipe( pipe_slow );
%}
// Longs vector left shift
instruct vsll2L(vecX dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (LShiftVL dst shift));
format %{ "psllq $dst,$shift\t! left shift packed2L" %}
ins_encode %{
__ psllq($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vsll2L_imm(vecX dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (LShiftVL dst shift));
format %{ "psllq $dst,$shift\t! left shift packed2L" %}
ins_encode %{
__ psllq($dst$$XMMRegister, (int)$shift$$constant);
%}
@@ -9275,360 +8576,176 @@
ins_pipe( pipe_slow );
%}
// ----------------------- LogicalRightShift -----------------------------------
-// Shorts vector logical right shift produces incorrect Java result
-// for negative data because java code convert short value into int with
-// sign extension before a shift. But char vectors are fine since chars are
-// unsigned values.
-
-instruct vsrl2S(vecS dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
- ins_encode %{
- __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2S_imm(vecS dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
- ins_encode %{
- __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4S(vecD dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
- ins_encode %{
- __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4S_imm(vecD dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
- ins_encode %{
- __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
+// Shorts vector logical right shift produces incorrect Java result
+// for negative data because java code convert short value into int with
+// sign extension before a shift. But char vectors are fine since chars are
+// unsigned values.
-instruct vsrl8S(vecX dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
+instruct vsrl2S(vecS dst, vecS shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
+ format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
ins_encode %{
__ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl8S_imm(vecX dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
+instruct vsrl2S_imm(vecS dst, immI8 shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
+ format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
ins_encode %{
__ psrlw($dst$$XMMRegister, (int)$shift$$constant);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
ins_encode %{
int vector_len = 0;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
ins_encode %{
int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+instruct vsrl4S(vecD dst, vecS shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (URShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
+ format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
+instruct vsrl4S_imm(vecD dst, immI8 shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
ins_encode %{
int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (URShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
+instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
ins_encode %{
int vector_len = 0;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
+instruct vsrl8S(vecX dst, vecS shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
ins_encode %{
- int vector_len = 1;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
+instruct vsrl8S_imm(vecX dst, immI8 shift) %{
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
ins_encode %{
- int vector_len = 1;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (URShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
+instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
ins_encode %{
- int vector_len = 1;
+ int vector_len = 0;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
ins_encode %{
- int vector_len = 1;
+ int vector_len = 0;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
ins_encode %{
int vector_len = 1;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (URShiftVS dst shift));
- effect(TEMP src);
+instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
@@ -9636,21 +8753,21 @@
ins_pipe( pipe_slow );
%}
// Integers vector logical right shift
instruct vsrl2I(vecD dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (URShiftVI dst shift));
format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
ins_encode %{
__ psrld($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl2I_imm(vecD dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (URShiftVI dst shift));
format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
ins_encode %{
__ psrld($dst$$XMMRegister, (int)$shift$$constant);
%}
@@ -9678,21 +8795,21 @@
%}
ins_pipe( pipe_slow );
%}
instruct vsrl4I(vecX dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (URShiftVI dst shift));
format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
ins_encode %{
__ psrld($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl4I_imm(vecX dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (URShiftVI dst shift));
format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
ins_encode %{
__ psrld($dst$$XMMRegister, (int)$shift$$constant);
%}
@@ -9765,21 +8882,21 @@
ins_pipe( pipe_slow );
%}
// Longs vector logical right shift
instruct vsrl2L(vecX dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (URShiftVL dst shift));
format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
ins_encode %{
__ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vsrl2L_imm(vecX dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (URShiftVL dst shift));
format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
ins_encode %{
__ psrlq($dst$$XMMRegister, (int)$shift$$constant);
%}
@@ -9864,87 +8981,41 @@
%}
ins_pipe( pipe_slow );
%}
instruct vsra2S_imm(vecS dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (RShiftVS dst shift));
format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
ins_encode %{
__ psraw($dst$$XMMRegister, (int)$shift$$constant);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
ins_encode %{
int vector_len = 0;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (RShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
ins_encode %{
int vector_len = 0;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (RShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsra4S(vecD dst, vecS shift) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (RShiftVS dst shift));
format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
ins_encode %{
@@ -9961,78 +9032,32 @@
__ psraw($dst$$XMMRegister, (int)$shift$$constant);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
ins_encode %{
int vector_len = 0;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (RShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
ins_encode %{
int vector_len = 0;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (RShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsra8S(vecX dst, vecS shift) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (RShiftVS dst shift));
format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
ins_encode %{
@@ -10049,159 +9074,67 @@
__ psraw($dst$$XMMRegister, (int)$shift$$constant);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
ins_encode %{
int vector_len = 0;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (RShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
ins_encode %{
int vector_len = 0;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (RShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (RShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
ins_encode %{
int vector_len = 1;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
-instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (RShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %}
ins_encode %{
int vector_len = 2;
__ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
@@ -10209,21 +9142,21 @@
ins_pipe( pipe_slow );
%}
// Integers vector arithmetic right shift
instruct vsra2I(vecD dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (RShiftVI dst shift));
format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
ins_encode %{
__ psrad($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vsra2I_imm(vecD dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (RShiftVI dst shift));
format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
ins_encode %{
__ psrad($dst$$XMMRegister, (int)$shift$$constant);
%}
@@ -10251,21 +9184,21 @@
%}
ins_pipe( pipe_slow );
%}
instruct vsra4I(vecX dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (RShiftVI dst shift));
format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
ins_encode %{
__ psrad($dst$$XMMRegister, $shift$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vsra4I_imm(vecX dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (RShiftVI dst shift));
format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
ins_encode %{
__ psrad($dst$$XMMRegister, (int)$shift$$constant);
%}
@@ -10342,11 +9275,11 @@
// --------------------------------- AND --------------------------------------
instruct vand4B(vecS dst, vecS src) %{
- predicate(n->as_Vector()->length_in_bytes() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4);
match(Set dst (AndV dst src));
format %{ "pand $dst,$src\t! and vectors (4 bytes)" %}
ins_encode %{
__ pand($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -10374,11 +9307,11 @@
%}
ins_pipe( pipe_slow );
%}
instruct vand8B(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length_in_bytes() == 8);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8);
match(Set dst (AndV dst src));
format %{ "pand $dst,$src\t! and vectors (8 bytes)" %}
ins_encode %{
__ pand($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -10406,11 +9339,11 @@
%}
ins_pipe( pipe_slow );
%}
instruct vand16B(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length_in_bytes() == 16);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16);
match(Set dst (AndV dst src));
format %{ "pand $dst,$src\t! and vectors (16 bytes)" %}
ins_encode %{
__ pand($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -10484,11 +9417,11 @@
%}
// --------------------------------- OR ---------------------------------------
instruct vor4B(vecS dst, vecS src) %{
- predicate(n->as_Vector()->length_in_bytes() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4);
match(Set dst (OrV dst src));
format %{ "por $dst,$src\t! or vectors (4 bytes)" %}
ins_encode %{
__ por($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -10516,11 +9449,11 @@
%}
ins_pipe( pipe_slow );
%}
instruct vor8B(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length_in_bytes() == 8);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8);
match(Set dst (OrV dst src));
format %{ "por $dst,$src\t! or vectors (8 bytes)" %}
ins_encode %{
__ por($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -10548,11 +9481,11 @@
%}
ins_pipe( pipe_slow );
%}
instruct vor16B(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length_in_bytes() == 16);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16);
match(Set dst (OrV dst src));
format %{ "por $dst,$src\t! or vectors (16 bytes)" %}
ins_encode %{
__ por($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -10626,11 +9559,11 @@
%}
// --------------------------------- XOR --------------------------------------
instruct vxor4B(vecS dst, vecS src) %{
- predicate(n->as_Vector()->length_in_bytes() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4);
match(Set dst (XorV dst src));
format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %}
ins_encode %{
__ pxor($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -10658,11 +9591,11 @@
%}
ins_pipe( pipe_slow );
%}
instruct vxor8B(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length_in_bytes() == 8);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8);
match(Set dst (XorV dst src));
format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %}
ins_encode %{
__ pxor($dst$$XMMRegister, $src$$XMMRegister);
%}
@@ -10690,11 +9623,11 @@
%}
ins_pipe( pipe_slow );
%}
instruct vxor16B(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length_in_bytes() == 16);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16);
match(Set dst (XorV dst src));
format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %}
ins_encode %{
__ pxor($dst$$XMMRegister, $src$$XMMRegister);
%}
< prev index next >