< prev index next >
src/hotspot/cpu/x86/x86.ad
Print this page
rev 62166 : manual merge with vectorIntrinsics
*** 1095,1104 ****
--- 1095,1105 ----
);
reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
+ reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
%}
//----------SOURCE BLOCK-------------------------------------------------------
// This is a block of C++ code which provides values, functions, and
*** 1163,1172 ****
--- 1164,1231 ----
return 5 + NativeJump::instruction_size; // pushl(); jmp;
}
#endif
};
+
+ inline uint vector_length(const Node* n) {
+ const TypeVect* vt = n->bottom_type()->is_vect();
+ return vt->length();
+ }
+
+ inline uint vector_length(const MachNode* use, MachOper* opnd) {
+ uint def_idx = use->operand_index(opnd);
+ Node* def = use->in(def_idx);
+ return def->bottom_type()->is_vect()->length();
+ }
+
+ inline uint vector_length_in_bytes(const Node* n) {
+ const TypeVect* vt = n->bottom_type()->is_vect();
+ return vt->length_in_bytes();
+ }
+
+ inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) {
+ uint def_idx = use->operand_index(opnd);
+ Node* def = use->in(def_idx);
+ return def->bottom_type()->is_vect()->length_in_bytes();
+ }
+
+ inline BasicType vector_element_basic_type(const Node *n) {
+ return n->bottom_type()->is_vect()->element_basic_type();
+ }
+
+ inline BasicType vector_element_basic_type(const MachNode *use, MachOper* opnd) {
+ uint def_idx = use->operand_index(opnd);
+ Node* def = use->in(def_idx);
+ return def->bottom_type()->is_vect()->element_basic_type();
+ }
+
+ inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
+ switch(bytes) {
+ case 4: // fall-through
+ case 8: // fall-through
+ case 16: return Assembler::AVX_128bit;
+ case 32: return Assembler::AVX_256bit;
+ case 64: return Assembler::AVX_512bit;
+
+ default: {
+ ShouldNotReachHere();
+ return Assembler::AVX_NoVec;
+ }
+ }
+ }
+
+ static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
+ return vector_length_encoding(vector_length_in_bytes(n));
+ }
+
+ static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
+ uint def_idx = use->operand_index(opnd);
+ Node* def = use->in(def_idx);
+ return vector_length_encoding(def);
+ }
+
class Node::PD {
public:
enum NodeFlags {
Flag_intel_jcc_erratum = Node::_last_flag << 1,
_last_flag = Flag_intel_jcc_erratum
*** 1260,1269 ****
--- 1319,1340 ----
assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
__ end_a_stub();
return offset;
}
+ Assembler::Width widthForType(BasicType bt) {
+ if (bt == T_BYTE) {
+ return Assembler::B;
+ } else if (bt == T_SHORT) {
+ return Assembler::W;
+ } else if (bt == T_INT) {
+ return Assembler::D;
+ } else {
+ assert(bt == T_LONG, "not a long: %s", type2name(bt));
+ return Assembler::Q;
+ }
+ }
//=============================================================================
// Float masks come from different places depending on platform.
#ifdef _LP64
*** 1276,1295 ****
--- 1347,1375 ----
static address float_signflip() { return (address)float_signflip_pool; }
static address double_signmask() { return (address)double_signmask_pool; }
static address double_signflip() { return (address)double_signflip_pool; }
#endif
static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
+ static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
+ static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
+ static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
+ static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
+ static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
+ static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
+ static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
+ static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
//=============================================================================
const bool Matcher::match_rule_supported(int opcode) {
if (!has_match_rule(opcode)) {
return false; // no match rule present
}
switch (opcode) {
case Op_AbsVL:
+ case Op_StoreVectorScatter:
if (UseAVX < 3) {
return false;
}
break;
case Op_PopCountI:
*** 1307,1321 ****
--- 1387,1410 ----
if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
return false;
}
break;
case Op_MulVL:
+ if (UseSSE < 4) { // only with SSE4_1 or AVX
+ return false;
+ }
+ break;
case Op_MulReductionVL:
if (VM_Version::supports_avx512dq() == false) {
return false;
}
break;
+ case Op_AddReductionVL:
+ if (UseSSE < 2) { // requires at least SSE2
+ return false;
+ }
+ break;
case Op_AbsVB:
case Op_AbsVS:
case Op_AbsVI:
case Op_AddReductionVI:
case Op_AndReductionV:
*** 1323,1339 ****
--- 1412,1437 ----
case Op_XorReductionV:
if (UseSSE < 3) { // requires at least SSSE3
return false;
}
break;
+ case Op_VectorLoadShuffle:
+ case Op_VectorRearrange:
case Op_MulReductionVI:
if (UseSSE < 4) { // requires at least SSE4
return false;
}
break;
case Op_SqrtVD:
case Op_SqrtVF:
+ case Op_VectorMaskCmp:
+ case Op_VectorCastB2X:
+ case Op_VectorCastS2X:
+ case Op_VectorCastI2X:
+ case Op_VectorCastL2X:
+ case Op_VectorCastF2X:
+ case Op_VectorCastD2X:
if (UseAVX < 1) { // enabled for AVX only
return false;
}
break;
case Op_CompareAndSwapL:
*** 1344,1354 ****
return false;
}
break;
case Op_CMoveVF:
case Op_CMoveVD:
! if (UseAVX < 1 || UseAVX > 2) {
return false;
}
break;
case Op_StrIndexOf:
if (!UseSSE42Intrinsics) {
--- 1442,1452 ----
return false;
}
break;
case Op_CMoveVF:
case Op_CMoveVD:
! if (UseAVX < 1) { // enabled for AVX only
return false;
}
break;
case Op_StrIndexOf:
if (!UseSSE42Intrinsics) {
*** 1367,1376 ****
--- 1465,1478 ----
break;
case Op_MulVB:
case Op_LShiftVB:
case Op_RShiftVB:
case Op_URShiftVB:
+ case Op_VectorInsert:
+ case Op_VectorLoadMask:
+ case Op_VectorStoreMask:
+ case Op_VectorBlend:
if (UseSSE < 4) {
return false;
}
break;
#ifdef _LP64
*** 1388,1407 ****
--- 1490,1523 ----
case Op_CacheWBPostSync:
if (!VM_Version::supports_data_cache_line_flush()) {
return false;
}
break;
+ case Op_ExtractB:
+ case Op_ExtractL:
+ case Op_ExtractI:
case Op_RoundDoubleMode:
if (UseSSE < 4) {
return false;
}
break;
case Op_RoundDoubleModeV:
if (VM_Version::supports_avx() == false) {
return false; // 128bit vroundpd is not available
}
break;
+ case Op_LoadVectorGather:
+ if (UseAVX < 2) {
+ return false;
+ }
+ break;
+ case Op_FmaVD:
+ case Op_FmaVF:
+ if (!UseFMA) {
+ return false;
+ }
+ break;
case Op_MacroLogicV:
if (UseAVX < 3 || !UseVectorMacroLogic) {
return false;
}
break;
*** 1458,1469 ****
return false; // 512bit vandps and vxorps are not available
}
break;
case Op_AbsVD:
case Op_NegVD:
if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
! return false; // 512bit vandpd and vxorpd are not available
}
break;
case Op_CMoveVF:
if (vlen != 8) {
return false; // implementation limitation (only vcmov8F_reg is present)
--- 1574,1586 ----
return false; // 512bit vandps and vxorps are not available
}
break;
case Op_AbsVD:
case Op_NegVD:
+ case Op_MulVL:
if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
! return false; // 512bit vpmullq, vandpd and vxorpd are not available
}
break;
case Op_CMoveVF:
if (vlen != 8) {
return false; // implementation limitation (only vcmov8F_reg is present)
*** 1478,1487 ****
--- 1595,1740 ----
case Op_CMoveVD:
if (vlen != 4) {
return false; // implementation limitation (only vcmov4D_reg is present)
}
break;
+ case Op_MaxV:
+ case Op_MinV:
+ if (UseSSE < 4 && is_integral_type(bt)) {
+ return false;
+ }
+ if ((bt == T_FLOAT || bt == T_DOUBLE)) {
+ // Float/Double intrinsics are enabled for AVX family currently.
+ if (UseAVX == 0) {
+ return false;
+ }
+ if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
+ return false;
+ }
+ }
+ break;
+ case Op_AddReductionVI:
+ if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
+ return false;
+ }
+ // fallthrough
+ case Op_AndReductionV:
+ case Op_OrReductionV:
+ case Op_XorReductionV:
+ if (is_subword_type(bt) && (UseSSE < 4)) {
+ return false;
+ }
+ #ifndef _LP64
+ if (bt == T_BYTE || bt == T_LONG) {
+ return false;
+ }
+ #endif
+ break;
+ #ifndef _LP64
+ case Op_VectorInsert:
+ if (bt == T_LONG || bt == T_DOUBLE) {
+ return false;
+ }
+ break;
+ #endif
+ case Op_MinReductionV:
+ case Op_MaxReductionV:
+ if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
+ return false;
+ } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
+ return false;
+ }
+ // Float/Double intrinsics enabled for AVX family.
+ if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
+ return false;
+ }
+ if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
+ return false;
+ }
+ #ifndef _LP64
+ if (bt == T_BYTE || bt == T_LONG) {
+ return false;
+ }
+ #endif
+ break;
+ case Op_VectorTest:
+ if (UseSSE < 4) {
+ return false; // Implementation limitation
+ } else if (size_in_bits < 128) {
+ return false; // Implementation limitation
+ } else if (size_in_bits == 512 && (VM_Version::supports_avx512bw() == false)) {
+ return false; // Implementation limitation
+ }
+ break;
+ case Op_VectorLoadShuffle:
+ case Op_VectorRearrange:
+ if(vlen == 2) {
+ return false; // Implementation limitation due to how shuffle is loaded
+ } else if (size_in_bits == 256 && UseAVX < 2) {
+ return false; // Implementation limitation
+ } else if (bt == T_BYTE && size_in_bits >= 256 && !VM_Version::supports_avx512_vbmi()) {
+ return false; // Implementation limitation
+ } else if (bt == T_SHORT && size_in_bits >= 256 && !VM_Version::supports_avx512bw()) {
+ return false; // Implementation limitation
+ }
+ break;
+ case Op_VectorLoadMask:
+ if (size_in_bits == 256 && UseAVX < 2) {
+ return false; // Implementation limitation
+ }
+ // fallthrough
+ case Op_VectorStoreMask:
+ if (vlen == 2) {
+ return false; // Implementation limitation
+ }
+ break;
+ case Op_VectorCastB2X:
+ if (size_in_bits == 256 && UseAVX < 2) {
+ return false; // Implementation limitation
+ }
+ break;
+ case Op_VectorCastS2X:
+ if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
+ return false;
+ }
+ break;
+ case Op_VectorCastI2X:
+ if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
+ return false;
+ }
+ break;
+ case Op_VectorCastL2X:
+ if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
+ return false;
+ } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
+ return false;
+ }
+ break;
+ case Op_VectorCastF2X:
+ case Op_VectorCastD2X:
+ if (is_integral_type(bt)) {
+ // Casts from FP to integral types require special fixup logic not easily
+ // implementable with vectors.
+ return false; // Implementation limitation
+ }
+ case Op_MulReductionVI:
+ if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
+ return false;
+ }
+ break;
+ case Op_StoreVectorScatter:
+ if(bt == T_BYTE || bt == T_SHORT) {
+ return false;
+ } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
+ return false;
+ }
+ // fallthrough
+ case Op_LoadVectorGather:
+ if (size_in_bits == 64 ) {
+ return false;
+ }
+ break;
}
return true; // Per default match rules are supported.
}
// x86 supports generic vector operands: vec and legVec.
*** 1536,1545 ****
--- 1789,1802 ----
}
}
//------------------------------------------------------------------------
+ bool Matcher::supports_vector_variable_shifts(void) {
+ return (UseAVX >= 2);
+ }
+
const bool Matcher::has_predicated_vectors(void) {
bool ret_value = false;
if (UseAVX > 2) {
ret_value = VM_Version::supports_avx512vl();
}
*** 1819,1862 ****
}
void Compile::reshape_address(AddPNode* addp) {
}
! static inline uint vector_length(const MachNode* n) {
! const TypeVect* vt = n->bottom_type()->is_vect();
! return vt->length();
! }
!
! static inline uint vector_length(const MachNode* use, MachOper* opnd) {
! uint def_idx = use->operand_index(opnd);
! Node* def = use->in(def_idx);
! return def->bottom_type()->is_vect()->length();
! }
!
! static inline uint vector_length_in_bytes(const MachNode* n) {
! const TypeVect* vt = n->bottom_type()->is_vect();
! return vt->length_in_bytes();
! }
!
! static inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) {
! uint def_idx = use->operand_index(opnd);
! Node* def = use->in(def_idx);
! return def->bottom_type()->is_vect()->length_in_bytes();
}
! static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* n) {
! switch(vector_length_in_bytes(n)) {
! case 4: // fall-through
! case 8: // fall-through
! case 16: return Assembler::AVX_128bit;
! case 32: return Assembler::AVX_256bit;
! case 64: return Assembler::AVX_512bit;
!
! default: {
! ShouldNotReachHere();
! return Assembler::AVX_NoVec;
! }
}
}
// Helper methods for MachSpillCopyNode::implementation().
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
--- 2076,2107 ----
}
void Compile::reshape_address(AddPNode* addp) {
}
! static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
! switch (bt) {
! case BoolTest::eq: return Assembler::eq;
! case BoolTest::ne: return Assembler::neq;
! case BoolTest::le: return Assembler::le;
! case BoolTest::ge: return Assembler::nlt;
! case BoolTest::lt: return Assembler::lt;
! case BoolTest::gt: return Assembler::nle;
! default : ShouldNotReachHere(); return Assembler::_false;
! }
}
! static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
! switch (bt) {
! case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
! // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
! case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
! case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
! case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
! case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
! case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
! default: ShouldNotReachHere(); return Assembler::FALSE_OS;
}
}
// Helper methods for MachSpillCopyNode::implementation().
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
*** 2179,2188 ****
--- 2424,2440 ----
}
%}
%}
+ // Operands for bound floating pointer register arguments
+ operand rxmm0() %{
+ constraint(ALLOC_IN_RC(xmm0_reg));
+ match(VecX);
+ format%{%}
+ interface(REG_INTER);
+ %}
//----------OPERANDS-----------------------------------------------------------
// Operand definitions must precede instruction definitions for correct parsing
// in the ADLC because operands constitute user defined types which are used in
// instruction definitions.
*** 2945,2957 ****
predicate(UseAVX > 0);
match(Set dst (AbsF src));
ins_cost(150);
format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
ins_encode %{
! int vector_len = 0;
__ vandps($dst$$XMMRegister, $src$$XMMRegister,
! ExternalAddress(float_signmask()), vector_len);
%}
ins_pipe(pipe_slow);
%}
instruct absD_reg(regD dst) %{
--- 3197,3209 ----
predicate(UseAVX > 0);
match(Set dst (AbsF src));
ins_cost(150);
format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
ins_encode %{
! int vlen_enc = Assembler::AVX_128bit;
__ vandps($dst$$XMMRegister, $src$$XMMRegister,
! ExternalAddress(float_signmask()), vlen_enc);
%}
ins_pipe(pipe_slow);
%}
instruct absD_reg(regD dst) %{
*** 2971,2983 ****
match(Set dst (AbsD src));
ins_cost(150);
format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
"# abs double by sign masking" %}
ins_encode %{
! int vector_len = 0;
__ vandpd($dst$$XMMRegister, $src$$XMMRegister,
! ExternalAddress(double_signmask()), vector_len);
%}
ins_pipe(pipe_slow);
%}
instruct negF_reg(regF dst) %{
--- 3223,3235 ----
match(Set dst (AbsD src));
ins_cost(150);
format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
"# abs double by sign masking" %}
ins_encode %{
! int vlen_enc = Assembler::AVX_128bit;
__ vandpd($dst$$XMMRegister, $src$$XMMRegister,
! ExternalAddress(double_signmask()), vlen_enc);
%}
ins_pipe(pipe_slow);
%}
instruct negF_reg(regF dst) %{
*** 3097,3106 ****
--- 3349,3445 ----
__ sqrtsd($dst$$XMMRegister, $constantaddress($con));
%}
ins_pipe(pipe_slow);
%}
+ // ---------------------------------------- VectorReinterpret ------------------------------------
+
+ instruct reinterpret(vec dst) %{
+ predicate(vector_length_in_bytes(n) == vector_length_in_bytes(n->in(1))); // dst == src
+ match(Set dst (VectorReinterpret dst));
+ ins_cost(125);
+ format %{ "vector_reinterpret $dst\t!" %}
+ ins_encode %{
+ // empty
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct reinterpret_expand(vec dst, vec src, rRegP scratch) %{
+ predicate(UseAVX == 0 &&
+ (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst
+ match(Set dst (VectorReinterpret src));
+ ins_cost(125);
+ effect(TEMP dst, TEMP scratch);
+ format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %}
+ ins_encode %{
+ assert(vector_length_in_bytes(this) <= 16, "required");
+ assert(vector_length_in_bytes(this, $src) <= 8, "required");
+
+ int src_vlen_in_bytes = vector_length_in_bytes(this, $src);
+ if (src_vlen_in_bytes == 4) {
+ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register);
+ } else {
+ assert(src_vlen_in_bytes == 8, "");
+ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), $scratch$$Register);
+ }
+ __ pand($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct vreinterpret_expand4(legVec dst, vec src, rRegP scratch) %{
+ predicate(UseAVX > 0 &&
+ (vector_length_in_bytes(n->in(1)) == 4) && // src
+ (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst
+ match(Set dst (VectorReinterpret src));
+ ins_cost(125);
+ effect(TEMP scratch);
+ format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %}
+ ins_encode %{
+ __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, $scratch$$Register);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+
+ instruct vreinterpret_expand(legVec dst, vec src) %{
+ predicate(UseAVX > 0 &&
+ (vector_length_in_bytes(n->in(1)) > 4) && // src
+ (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst
+ match(Set dst (VectorReinterpret src));
+ ins_cost(125);
+ format %{ "vector_reinterpret_expand $dst,$src\t!" %}
+ ins_encode %{
+ switch (vector_length_in_bytes(this, $src)) {
+ case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
+ case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
+ case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
+ default: ShouldNotReachHere();
+ }
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct reinterpret_shrink(vec dst, legVec src) %{
+ predicate(vector_length_in_bytes(n->in(1)) > vector_length_in_bytes(n)); // src > dst
+ match(Set dst (VectorReinterpret src));
+ ins_cost(125);
+ format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
+ ins_encode %{
+ switch (vector_length_in_bytes(this)) {
+ case 4: __ movflt ($dst$$XMMRegister, $src$$XMMRegister); break;
+ case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
+ case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
+ case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
+ default: ShouldNotReachHere();
+ }
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ // ----------------------------------------------------------------------------------------------------
#ifdef _LP64
instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
match(Set dst (RoundDoubleMode src rmode));
format %{ "roundsd $dst,$src" %}
*** 3134,3179 ****
%}
ins_pipe(pipe_slow);
%}
instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
! predicate(n->as_Vector()->length() < 8);
match(Set dst (RoundDoubleModeV src rmode));
format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
ins_encode %{
assert(UseAVX > 0, "required");
! int vector_len = vector_length_encoding(this);
! __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
! predicate(n->as_Vector()->length() == 8);
match(Set dst (RoundDoubleModeV src rmode));
format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
ins_encode %{
assert(UseAVX > 2, "required");
__ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
%}
ins_pipe( pipe_slow );
%}
instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
! predicate(n->as_Vector()->length() < 8);
match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
ins_encode %{
assert(UseAVX > 0, "required");
! int vector_len = vector_length_encoding(this);
! __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
! predicate(n->as_Vector()->length() == 8);
match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
ins_encode %{
assert(UseAVX > 2, "required");
__ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
--- 3473,3518 ----
%}
ins_pipe(pipe_slow);
%}
instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
! predicate(vector_length(n) < 8);
match(Set dst (RoundDoubleModeV src rmode));
format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
ins_encode %{
assert(UseAVX > 0, "required");
! int vlen_enc = vector_length_encoding(this);
! __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
! predicate(vector_length(n) == 8);
match(Set dst (RoundDoubleModeV src rmode));
format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
ins_encode %{
assert(UseAVX > 2, "required");
__ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
%}
ins_pipe( pipe_slow );
%}
instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
! predicate(vector_length(n) < 8);
match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
ins_encode %{
assert(UseAVX > 0, "required");
! int vlen_enc = vector_length_encoding(this);
! __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
! predicate(vector_length(n) == 8);
match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
ins_encode %{
assert(UseAVX > 2, "required");
__ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
*** 3241,3251 ****
ins_pipe( fpu_reg_reg );
%}
// ============================================================================
! // Load vectors
instruct loadV(vec dst, memory mem) %{
match(Set dst (LoadVector mem));
ins_cost(125);
format %{ "load_vector $dst,$mem" %}
ins_encode %{
--- 3580,3590 ----
ins_pipe( fpu_reg_reg );
%}
// ============================================================================
! // Load vectors generic operand pattern
instruct loadV(vec dst, memory mem) %{
match(Set dst (LoadVector mem));
ins_cost(125);
format %{ "load_vector $dst,$mem" %}
ins_encode %{
*** 3277,3286 ****
--- 3616,3700 ----
}
%}
ins_pipe( pipe_slow );
%}
+ // ---------------------------------------- Gather ------------------------------------
+
+ // Gather INT, LONG, FLOAT, DOUBLE
+
+ instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
+ predicate(vector_length_in_bytes(n) <= 32);
+ match(Set dst (LoadVectorGather mem idx));
+ effect(TEMP dst, TEMP tmp, TEMP mask);
+ format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
+ ins_encode %{
+ assert(UseAVX >= 2, "sanity");
+
+ int vlen_enc = vector_length_encoding(this);
+ BasicType elem_bt = vector_element_basic_type(this);
+
+ assert(vector_length_in_bytes(this) >= 16, "sanity");
+ assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
+
+ if (vlen_enc == Assembler::AVX_128bit) {
+ __ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()));
+ } else {
+ __ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()));
+ }
+ __ lea($tmp$$Register, $mem$$Address);
+ __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct evgather(vec dst, memory mem, vec idx, rRegP tmp) %{
+ predicate(vector_length_in_bytes(n) == 64);
+ match(Set dst (LoadVectorGather mem idx));
+ effect(TEMP dst, TEMP tmp);
+ format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and k2 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 2, "sanity");
+
+ int vlen_enc = vector_length_encoding(this);
+ BasicType elem_bt = vector_element_basic_type(this);
+
+ assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
+
+ KRegister ktmp = k2;
+ __ kmovwl(k2, ExternalAddress(vector_all_bits_set()), $tmp$$Register);
+ __ lea($tmp$$Register, $mem$$Address);
+ __ evgather(elem_bt, $dst$$XMMRegister, ktmp, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ // ====================Scatter=======================================
+
+ // Scatter INT, LONG, FLOAT, DOUBLE
+
+ instruct scatter(memory mem, vec src, vec idx, rRegP tmp) %{
+ match(Set mem (StoreVectorScatter mem (Binary src idx)));
+ effect(TEMP tmp);
+ format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 2, "sanity");
+
+ int vlen_enc = vector_length_encoding(this, $src);
+ BasicType elem_bt = vector_element_basic_type(this, $src);
+
+ assert(vector_length_in_bytes(this, $src) >= 16, "sanity");
+ assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
+
+ KRegister ktmp = k2;
+ __ kmovwl(k2, ExternalAddress(vector_all_bits_set()), $tmp$$Register);
+ __ lea($tmp$$Register, $mem$$Address);
+ __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, ktmp, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
// ====================REPLICATE=======================================
// Replicate byte scalar to be vector
instruct ReplB_reg(vec dst, rRegI src) %{
match(Set dst (ReplicateB src));
*** 3310,3321 ****
instruct ReplB_mem(vec dst, memory mem) %{
predicate(VM_Version::supports_avx2());
match(Set dst (ReplicateB (LoadB mem)));
format %{ "replicateB $dst,$mem" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct ReplB_imm(vec dst, immI con) %{
--- 3724,3735 ----
instruct ReplB_mem(vec dst, memory mem) %{
predicate(VM_Version::supports_avx2());
match(Set dst (ReplicateB (LoadB mem)));
format %{ "replicateB $dst,$mem" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct ReplB_imm(vec dst, immI con) %{
*** 3341,3351 ****
%}
ins_pipe( pipe_slow );
%}
// Replicate byte scalar zero to be vector
! instruct ReplB_zero(vec dst, immI0 zero) %{
match(Set dst (ReplicateB zero));
format %{ "replicateB $dst,$zero" %}
ins_encode %{
uint vlen = vector_length(this);
if (vlen <= 16) {
--- 3755,3765 ----
%}
ins_pipe( pipe_slow );
%}
// Replicate byte scalar zero to be vector
! instruct ReplB_zero(vec dst, immI_0 zero) %{
match(Set dst (ReplicateB zero));
format %{ "replicateB $dst,$zero" %}
ins_encode %{
uint vlen = vector_length(this);
if (vlen <= 16) {
*** 3418,3428 ****
}
%}
ins_pipe( fpu_reg_reg );
%}
! instruct ReplS_zero(vec dst, immI0 zero) %{
match(Set dst (ReplicateS zero));
format %{ "replicateS $dst,$zero" %}
ins_encode %{
uint vlen = vector_length(this);
if (vlen <= 8) {
--- 3832,3842 ----
}
%}
ins_pipe( fpu_reg_reg );
%}
! instruct ReplS_zero(vec dst, immI_0 zero) %{
match(Set dst (ReplicateS zero));
format %{ "replicateS $dst,$zero" %}
ins_encode %{
uint vlen = vector_length(this);
if (vlen <= 8) {
*** 3465,3476 ****
if (vlen <= 4) {
__ movdl($dst$$XMMRegister, $mem$$Address);
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
} else {
assert(VM_Version::supports_avx2(), "sanity");
! int vector_len = vector_length_encoding(this);
! __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
}
%}
ins_pipe( pipe_slow );
%}
--- 3879,3890 ----
if (vlen <= 4) {
__ movdl($dst$$XMMRegister, $mem$$Address);
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
} else {
assert(VM_Version::supports_avx2(), "sanity");
! int vlen_enc = vector_length_encoding(this);
! __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
}
%}
ins_pipe( pipe_slow );
%}
*** 3485,3504 ****
if (vlen == 4) {
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
}
} else {
assert(VM_Version::supports_avx2(), "sanity");
! int vector_len = vector_length_encoding(this);
__ movq($dst$$XMMRegister, const_addr);
! __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
}
%}
ins_pipe( pipe_slow );
%}
// Replicate integer (4 byte) scalar zero to be vector
! instruct ReplI_zero(vec dst, immI0 zero) %{
match(Set dst (ReplicateI zero));
format %{ "replicateI $dst,$zero" %}
ins_encode %{
uint vlen = vector_length(this);
if (vlen <= 4) {
--- 3899,3918 ----
if (vlen == 4) {
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
}
} else {
assert(VM_Version::supports_avx2(), "sanity");
! int vlen_enc = vector_length_encoding(this);
__ movq($dst$$XMMRegister, const_addr);
! __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
}
%}
ins_pipe( pipe_slow );
%}
// Replicate integer (4 byte) scalar zero to be vector
! instruct ReplI_zero(vec dst, immI_0 zero) %{
match(Set dst (ReplicateI zero));
format %{ "replicateI $dst,$zero" %}
ins_encode %{
uint vlen = vector_length(this);
if (vlen <= 4) {
*** 3550,3560 ****
ins_pipe( pipe_slow );
%}
#else // _LP64
// Replicate long (8 byte) scalar to be vector
instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{
! predicate(n->as_Vector()->length() <= 4);
match(Set dst (ReplicateL src));
effect(TEMP dst, USE src, TEMP tmp);
format %{ "replicateL $dst,$src" %}
ins_encode %{
uint vlen = vector_length(this);
--- 3964,3974 ----
ins_pipe( pipe_slow );
%}
#else // _LP64
// Replicate long (8 byte) scalar to be vector
instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{
! predicate(vector_length(n) <= 4);
match(Set dst (ReplicateL src));
effect(TEMP dst, USE src, TEMP tmp);
format %{ "replicateL $dst,$src" %}
ins_encode %{
uint vlen = vector_length(this);
*** 3562,3576 ****
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
} else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
! int vector_len = Assembler::AVX_256bit;
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
! __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
} else {
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
--- 3976,3990 ----
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
} else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
! int vlen_enc = Assembler::AVX_256bit;
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
! __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
} else {
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
*** 3579,3589 ****
%}
ins_pipe( pipe_slow );
%}
instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{
! predicate(n->as_Vector()->length() == 8);
match(Set dst (ReplicateL src));
effect(TEMP dst, USE src, TEMP tmp);
format %{ "replicateL $dst,$src" %}
ins_encode %{
if (VM_Version::supports_avx512vl()) {
--- 3993,4003 ----
%}
ins_pipe( pipe_slow );
%}
instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{
! predicate(vector_length(n) == 8);
match(Set dst (ReplicateL src));
effect(TEMP dst, USE src, TEMP tmp);
format %{ "replicateL $dst,$src" %}
ins_encode %{
if (VM_Version::supports_avx512vl()) {
*** 3592,3606 ****
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
} else {
! int vector_len = Assembler::AVX_512bit;
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
! __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
}
%}
ins_pipe( pipe_slow );
%}
#endif // _LP64
--- 4006,4020 ----
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
} else {
! int vlen_enc = Assembler::AVX_512bit;
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
! __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
}
%}
ins_pipe( pipe_slow );
%}
#endif // _LP64
*** 3677,3688 ****
ins_encode %{
uint vlen = vector_length(this);
if (vlen <= 4) {
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
} else if (VM_Version::supports_avx2()) {
! int vector_len = vector_length_encoding(this);
! __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2
} else {
assert(vlen == 8, "sanity");
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
}
--- 4091,4102 ----
ins_encode %{
uint vlen = vector_length(this);
if (vlen <= 4) {
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
} else if (VM_Version::supports_avx2()) {
! int vlen_enc = vector_length_encoding(this);
! __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
} else {
assert(vlen == 8, "sanity");
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
}
*** 3698,3709 ****
if (vlen <= 4) {
__ movdl($dst$$XMMRegister, $mem$$Address);
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
} else {
assert(VM_Version::supports_avx(), "sanity");
! int vector_len = vector_length_encoding(this);
! __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
}
%}
ins_pipe( pipe_slow );
%}
--- 4112,4123 ----
if (vlen <= 4) {
__ movdl($dst$$XMMRegister, $mem$$Address);
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
} else {
assert(VM_Version::supports_avx(), "sanity");
! int vlen_enc = vector_length_encoding(this);
! __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
}
%}
ins_pipe( pipe_slow );
%}
*** 3731,3742 ****
ins_encode %{
uint vlen = vector_length(this);
if (vlen == 2) {
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
} else if (VM_Version::supports_avx2()) {
! int vector_len = vector_length_encoding(this);
! __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2
} else {
assert(vlen == 4, "sanity");
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
}
--- 4145,4156 ----
ins_encode %{
uint vlen = vector_length(this);
if (vlen == 2) {
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
} else if (VM_Version::supports_avx2()) {
! int vlen_enc = vector_length_encoding(this);
! __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
} else {
assert(vlen == 4, "sanity");
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
}
*** 3752,3763 ****
if (vlen == 2) {
__ movq($dst$$XMMRegister, $mem$$Address);
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44);
} else {
assert(VM_Version::supports_avx(), "sanity");
! int vector_len = vector_length_encoding(this);
! __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
}
%}
ins_pipe( pipe_slow );
%}
--- 4166,4177 ----
if (vlen == 2) {
__ movq($dst$$XMMRegister, $mem$$Address);
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44);
} else {
assert(VM_Version::supports_avx(), "sanity");
! int vlen_enc = vector_length_encoding(this);
! __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
}
%}
ins_pipe( pipe_slow );
%}
*** 3774,3794 ****
}
%}
ins_pipe( fpu_reg_reg );
%}
// ====================REDUCTION ARITHMETIC=======================================
// =======================Int Reduction==========================================
instruct reductionI(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
! predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT &&
! n->in(2)->bottom_type()->is_vect()->length() < 16);
match(Set dst (AddReductionVI src1 src2));
match(Set dst (MulReductionVI src1 src2));
match(Set dst (AndReductionV src1 src2));
match(Set dst ( OrReductionV src1 src2));
match(Set dst (XorReductionV src1 src2));
effect(TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen = vector_length(this, $src2);
--- 4188,4431 ----
}
%}
ins_pipe( fpu_reg_reg );
%}
+ // ====================VECTOR INSERT=======================================
+
+ instruct insert(vec dst, rRegI val, immU8 idx) %{
+ predicate(vector_length_in_bytes(n) < 32);
+ match(Set dst (VectorInsert (Binary dst val) idx));
+ format %{ "vector_insert $dst,$val,$idx" %}
+ ins_encode %{
+ assert(UseSSE >= 4, "required");
+ assert(vector_length_in_bytes(this) >= 8, "required");
+
+ BasicType elem_bt = vector_element_basic_type(this);
+
+ assert(is_integral_type(elem_bt), "");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
+ predicate(vector_length_in_bytes(n) == 32);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ effect(TEMP vtmp);
+ format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
+ ins_encode %{
+ int vlen_enc = Assembler::AVX_256bit;
+ BasicType elem_bt = vector_element_basic_type(this);
+ int elem_per_lane = 16/type2aelembytes(elem_bt);
+ int log2epr = log2(elem_per_lane);
+
+ assert(is_integral_type(elem_bt), "sanity");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ uint x_idx = $idx$$constant & right_n_bits(log2epr);
+ uint y_idx = ($idx$$constant >> log2epr) & 1;
+ __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
+ __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
+ __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
+ predicate(vector_length_in_bytes(n) == 64);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ effect(TEMP vtmp);
+ format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 2, "sanity");
+
+ BasicType elem_bt = vector_element_basic_type(this);
+ int elem_per_lane = 16/type2aelembytes(elem_bt);
+ int log2epr = log2(elem_per_lane);
+
+ assert(is_integral_type(elem_bt), "");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ uint x_idx = $idx$$constant & right_n_bits(log2epr);
+ uint y_idx = ($idx$$constant >> log2epr) & 3;
+ __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
+ __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
+ __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ #ifdef _LP64
+ instruct insert2L(vec dst, rRegL val, immU8 idx) %{
+ predicate(vector_length(n) == 2);
+ match(Set dst (VectorInsert (Binary dst val) idx));
+ format %{ "vector_insert $dst,$val,$idx" %}
+ ins_encode %{
+ assert(UseSSE >= 4, "required");
+ assert(vector_element_basic_type(this) == T_LONG, "");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
+ predicate(vector_length(n) == 4);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ effect(TEMP vtmp);
+ format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
+ ins_encode %{
+ assert(vector_element_basic_type(this) == T_LONG, "");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ uint x_idx = $idx$$constant & right_n_bits(1);
+ uint y_idx = ($idx$$constant >> 1) & 1;
+ int vlen_enc = Assembler::AVX_256bit;
+ __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
+ __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
+ __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
+ predicate(vector_length(n) == 8);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ effect(TEMP vtmp);
+ format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
+ ins_encode %{
+ assert(vector_element_basic_type(this) == T_LONG, "sanity");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ uint x_idx = $idx$$constant & right_n_bits(1);
+ uint y_idx = ($idx$$constant >> 1) & 3;
+ __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
+ __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
+ __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+ #endif
+
+ instruct insertF(vec dst, regF val, immU8 idx) %{
+ predicate(vector_length(n) < 8);
+ match(Set dst (VectorInsert (Binary dst val) idx));
+ format %{ "vector_insert $dst,$val,$idx" %}
+ ins_encode %{
+ assert(UseSSE >= 4, "sanity");
+
+ assert(vector_element_basic_type(this) == T_FLOAT, "sanity");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ __ insertps($dst$$XMMRegister, $val$$XMMRegister, $idx$$constant);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
+ predicate(vector_length(n) >= 8);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ effect(TEMP vtmp);
+ format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
+ ins_encode %{
+ assert(vector_element_basic_type(this) == T_FLOAT, "sanity");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ int vlen = vector_length(this);
+ uint x_idx = $idx$$constant & right_n_bits(2);
+ if (vlen == 8) {
+ uint y_idx = ($idx$$constant >> 2) & 1;
+ int vlen_enc = Assembler::AVX_256bit;
+ __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
+ __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx);
+ __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
+ } else {
+ assert(vlen == 16, "sanity");
+ uint y_idx = ($idx$$constant >> 2) & 3;
+ __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
+ __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx);
+ __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
+ }
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ #ifdef _LP64
+ instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
+ predicate(vector_length(n) == 2);
+ match(Set dst (VectorInsert (Binary dst val) idx));
+ effect(TEMP tmp);
+ format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
+ ins_encode %{
+ assert(UseSSE >= 4, "sanity");
+ assert(vector_element_basic_type(this) == T_DOUBLE, "sanity");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ __ movq($tmp$$Register, $val$$XMMRegister);
+ __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
+ predicate(vector_length(n) == 4);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ effect(TEMP vtmp, TEMP tmp);
+ format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
+ ins_encode %{
+ assert(vector_element_basic_type(this) == T_DOUBLE, "sanity");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ uint x_idx = $idx$$constant & right_n_bits(1);
+ uint y_idx = ($idx$$constant >> 1) & 1;
+ int vlen_enc = Assembler::AVX_256bit;
+ __ movq($tmp$$Register, $val$$XMMRegister);
+ __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
+ __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
+ __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
+ predicate(vector_length(n) == 8);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ effect(TEMP tmp, TEMP vtmp);
+ format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
+ ins_encode %{
+ assert(vector_element_basic_type(this) == T_DOUBLE, "sanity");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ uint x_idx = $idx$$constant & right_n_bits(1);
+ uint y_idx = ($idx$$constant >> 1) & 3;
+ __ movq($tmp$$Register, $val$$XMMRegister);
+ __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
+ __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
+ __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+ #endif
+
// ====================REDUCTION ARITHMETIC=======================================
+
// =======================Int Reduction==========================================
instruct reductionI(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
! predicate(vector_element_basic_type(n->in(2)) == T_INT &&
! vector_length(n->in(2)) < 16); // src2
match(Set dst (AddReductionVI src1 src2));
match(Set dst (MulReductionVI src1 src2));
match(Set dst (AndReductionV src1 src2));
match(Set dst ( OrReductionV src1 src2));
match(Set dst (XorReductionV src1 src2));
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
effect(TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen = vector_length(this, $src2);
*** 3796,3833 ****
%}
ins_pipe( pipe_slow );
%}
instruct reduction16I(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
! predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT &&
! n->in(2)->bottom_type()->is_vect()->length() == 16);
match(Set dst (AddReductionVI src1 src2));
match(Set dst (MulReductionVI src1 src2));
match(Set dst (AndReductionV src1 src2));
match(Set dst ( OrReductionV src1 src2));
match(Set dst (XorReductionV src1 src2));
effect(TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen = vector_length(this, $src2);
__ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
! %}
ins_pipe( pipe_slow );
%}
// =======================Long Reduction==========================================
#ifdef _LP64
instruct reductionL(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
! predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
! n->in(2)->bottom_type()->is_vect()->length() < 8);
match(Set dst (AddReductionVL src1 src2));
match(Set dst (MulReductionVL src1 src2));
match(Set dst (AndReductionV src1 src2));
match(Set dst ( OrReductionV src1 src2));
match(Set dst (XorReductionV src1 src2));
effect(TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen = vector_length(this, $src2);
--- 4433,4474 ----
%}
ins_pipe( pipe_slow );
%}
instruct reduction16I(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
! predicate(vector_element_basic_type(n->in(2)) == T_INT &&
! vector_length(n->in(2)) == 16); // src2
match(Set dst (AddReductionVI src1 src2));
match(Set dst (MulReductionVI src1 src2));
match(Set dst (AndReductionV src1 src2));
match(Set dst ( OrReductionV src1 src2));
match(Set dst (XorReductionV src1 src2));
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
effect(TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen = vector_length(this, $src2);
__ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
! %}
ins_pipe( pipe_slow );
%}
// =======================Long Reduction==========================================
#ifdef _LP64
instruct reductionL(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
! predicate(vector_element_basic_type(n->in(2)) == T_LONG &&
! vector_length(n->in(2)) < 8); // src2
match(Set dst (AddReductionVL src1 src2));
match(Set dst (MulReductionVL src1 src2));
match(Set dst (AndReductionV src1 src2));
match(Set dst ( OrReductionV src1 src2));
match(Set dst (XorReductionV src1 src2));
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
effect(TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen = vector_length(this, $src2);
*** 3835,3851 ****
%}
ins_pipe( pipe_slow );
%}
instruct reduction8L(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
! predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
! n->in(2)->bottom_type()->is_vect()->length() == 8);
match(Set dst (AddReductionVL src1 src2));
match(Set dst (MulReductionVL src1 src2));
match(Set dst (AndReductionV src1 src2));
match(Set dst ( OrReductionV src1 src2));
match(Set dst (XorReductionV src1 src2));
effect(TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen = vector_length(this, $src2);
--- 4476,4494 ----
%}
ins_pipe( pipe_slow );
%}
instruct reduction8L(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
! predicate(vector_element_basic_type(n->in(2)) == T_LONG &&
! vector_length(n->in(2)) == 8); // src2
match(Set dst (AddReductionVL src1 src2));
match(Set dst (MulReductionVL src1 src2));
match(Set dst (AndReductionV src1 src2));
match(Set dst ( OrReductionV src1 src2));
match(Set dst (XorReductionV src1 src2));
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
effect(TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen = vector_length(this, $src2);
*** 3856,3880 ****
#endif // _LP64
// =======================Float Reduction==========================================
instruct reductionF128(regF dst, vec src, vec vtmp) %{
! predicate(n->in(2)->bottom_type()->is_vect()->length() <= 4);
match(Set dst (AddReductionVF dst src));
match(Set dst (MulReductionVF dst src));
effect(TEMP dst, TEMP vtmp);
! format %{ "vector_reduction_fp $dst,$src ; using $vtmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen = vector_length(this, $src);
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
! predicate(n->in(2)->bottom_type()->is_vect()->length() == 8);
match(Set dst (AddReductionVF dst src));
match(Set dst (MulReductionVF dst src));
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
--- 4499,4523 ----
#endif // _LP64
// =======================Float Reduction==========================================
instruct reductionF128(regF dst, vec src, vec vtmp) %{
! predicate(vector_length(n->in(2)) <= 4); // src
match(Set dst (AddReductionVF dst src));
match(Set dst (MulReductionVF dst src));
effect(TEMP dst, TEMP vtmp);
! format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen = vector_length(this, $src);
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
! predicate(vector_length(n->in(2)) == 8); // src
match(Set dst (AddReductionVF dst src));
match(Set dst (MulReductionVF dst src));
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
*** 3884,3894 ****
%}
ins_pipe( pipe_slow );
%}
instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
! predicate(n->in(2)->bottom_type()->is_vect()->length() == 16);
match(Set dst (AddReductionVF dst src));
match(Set dst (MulReductionVF dst src));
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
--- 4527,4537 ----
%}
ins_pipe( pipe_slow );
%}
instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
! predicate(vector_length(n->in(2)) == 16); // src
match(Set dst (AddReductionVF dst src));
match(Set dst (MulReductionVF dst src));
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
*** 3900,3924 ****
%}
// =======================Double Reduction==========================================
instruct reduction2D(regD dst, vec src, vec vtmp) %{
! predicate(n->in(2)->bottom_type()->is_vect()->length() == 2);
match(Set dst (AddReductionVD dst src));
match(Set dst (MulReductionVD dst src));
effect(TEMP dst, TEMP vtmp);
format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen = vector_length(this, $src);
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
! %}
ins_pipe( pipe_slow );
%}
instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
! predicate(n->in(2)->bottom_type()->is_vect()->length() == 4);
match(Set dst (AddReductionVD dst src));
match(Set dst (MulReductionVD dst src));
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
--- 4543,4567 ----
%}
// =======================Double Reduction==========================================
instruct reduction2D(regD dst, vec src, vec vtmp) %{
! predicate(vector_length(n->in(2)) == 2); // src
match(Set dst (AddReductionVD dst src));
match(Set dst (MulReductionVD dst src));
effect(TEMP dst, TEMP vtmp);
format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen = vector_length(this, $src);
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
! %}
ins_pipe( pipe_slow );
%}
instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
! predicate(vector_length(n->in(2)) == 4); // src
match(Set dst (AddReductionVD dst src));
match(Set dst (MulReductionVD dst src));
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
*** 3928,3938 ****
%}
ins_pipe( pipe_slow );
%}
instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
! predicate(n->in(2)->bottom_type()->is_vect()->length() == 8);
match(Set dst (AddReductionVD dst src));
match(Set dst (MulReductionVD dst src));
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
--- 4571,4581 ----
%}
ins_pipe( pipe_slow );
%}
instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
! predicate(vector_length(n->in(2)) == 8); // src
match(Set dst (AddReductionVD dst src));
match(Set dst (MulReductionVD dst src));
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
*** 3941,3950 ****
--- 4584,4877 ----
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
+ // =======================Byte Reduction==========================================
+
+ #ifdef _LP64
+ instruct reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_BYTE &&
+ vector_length(n->in(2)) <= 32); // src2
+ match(Set dst (AddReductionVI src1 src2));
+ match(Set dst (AndReductionV src1 src2));
+ match(Set dst ( OrReductionV src1 src2));
+ match(Set dst (XorReductionV src1 src2));
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
+ effect(TEMP vtmp1, TEMP vtmp2);
+ format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
+ ins_encode %{
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_BYTE &&
+ vector_length(n->in(2)) == 64); // src2
+ match(Set dst (AddReductionVI src1 src2));
+ match(Set dst (AndReductionV src1 src2));
+ match(Set dst ( OrReductionV src1 src2));
+ match(Set dst (XorReductionV src1 src2));
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
+ effect(TEMP vtmp1, TEMP vtmp2);
+ format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
+ ins_encode %{
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+ #endif
+
+ // =======================Short Reduction==========================================
+
+ instruct reductionS(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_SHORT &&
+ vector_length(n->in(2)) <= 16); // src2
+ match(Set dst (AddReductionVI src1 src2));
+ match(Set dst (MulReductionVI src1 src2));
+ match(Set dst (AndReductionV src1 src2));
+ match(Set dst ( OrReductionV src1 src2));
+ match(Set dst (XorReductionV src1 src2));
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
+ effect(TEMP vtmp1, TEMP vtmp2);
+ format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
+ ins_encode %{
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct reduction32S(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_SHORT &&
+ vector_length(n->in(2)) == 32); // src2
+ match(Set dst (AddReductionVI src1 src2));
+ match(Set dst (MulReductionVI src1 src2));
+ match(Set dst (AndReductionV src1 src2));
+ match(Set dst ( OrReductionV src1 src2));
+ match(Set dst (XorReductionV src1 src2));
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
+ effect(TEMP vtmp1, TEMP vtmp2);
+ format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
+ ins_encode %{
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ // =======================Mul Reduction==========================================
+
+ instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_BYTE &&
+ vector_length(n->in(2)) <= 32); // src2
+ match(Set dst (MulReductionVI src1 src2));
+ effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
+ format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
+ ins_encode %{
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_BYTE &&
+ vector_length(n->in(2)) == 64); // src2
+ match(Set dst (MulReductionVI src1 src2));
+ effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
+ format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
+ ins_encode %{
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ //--------------------Min/Max Float Reduction --------------------
+ // Float Min Reduction
+ instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp,
+ legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_FLOAT &&
+ ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
+ (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
+ vector_length(n->in(2)) == 2);
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
+ effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
+ format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "sanity");
+
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
+ $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
+ legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_FLOAT &&
+ ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
+ (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
+ vector_length(n->in(2)) >= 4);
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
+ effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
+ format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "sanity");
+
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
+ $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp,
+ legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_FLOAT &&
+ vector_length(n->in(2)) == 2);
+ match(Set dst (MinReductionV dst src));
+ match(Set dst (MaxReductionV dst src));
+ effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
+ format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "sanity");
+
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src);
+ __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
+ $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+
+ instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp,
+ legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_FLOAT &&
+ vector_length(n->in(2)) >= 4);
+ match(Set dst (MinReductionV dst src));
+ match(Set dst (MaxReductionV dst src));
+ effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
+ format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "sanity");
+
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src);
+ __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
+ $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+
+ //--------------------Min Double Reduction --------------------
+ instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2,
+ legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs
+ rFlagsReg cr) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE &&
+ ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
+ (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
+ vector_length(n->in(2)) == 2);
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+ format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "sanity");
+
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
+ $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct minmax_reductionD(legRegD dst, immD src1, legVec src2,
+ legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs
+ rFlagsReg cr) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE &&
+ ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
+ (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
+ vector_length(n->in(2)) >= 4);
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
+ format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "sanity");
+
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
+ $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+
+ instruct minmax_reduction2D_av(legRegD dst, legVec src,
+ legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs
+ rFlagsReg cr) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE &&
+ vector_length(n->in(2)) == 2);
+ match(Set dst (MinReductionV dst src));
+ match(Set dst (MaxReductionV dst src));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+ format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "sanity");
+
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src);
+ __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
+ $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct minmax_reductionD_av(legRegD dst, legVec src,
+ legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs
+ rFlagsReg cr) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE &&
+ vector_length(n->in(2)) >= 4);
+ match(Set dst (MinReductionV dst src));
+ match(Set dst (MaxReductionV dst src));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
+ format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "sanity");
+
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src);
+ __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
+ $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
// ====================VECTOR ARITHMETIC=======================================
// --------------------------------- ADD --------------------------------------
// Bytes vector add
*** 3961,3983 ****
instruct vaddB_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (AddVB src1 src2));
format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vaddB_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (AddVB src (LoadVector mem)));
format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Shorts/Chars vector add
--- 4888,4910 ----
instruct vaddB_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (AddVB src1 src2));
format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vaddB_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (AddVB src (LoadVector mem)));
format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Shorts/Chars vector add
*** 3994,4016 ****
instruct vaddS_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (AddVS src1 src2));
format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vaddS_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (AddVS src (LoadVector mem)));
format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Integers vector add
--- 4921,4943 ----
instruct vaddS_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (AddVS src1 src2));
format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vaddS_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (AddVS src (LoadVector mem)));
format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Integers vector add
*** 4027,4050 ****
instruct vaddI_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (AddVI src1 src2));
format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vaddI_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (AddVI src (LoadVector mem)));
format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Longs vector add
--- 4954,4977 ----
instruct vaddI_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (AddVI src1 src2));
format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vaddI_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (AddVI src (LoadVector mem)));
format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Longs vector add
*** 4061,4083 ****
instruct vaddL_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (AddVL src1 src2));
format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vaddL_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (AddVL src (LoadVector mem)));
format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Floats vector add
--- 4988,5010 ----
instruct vaddL_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (AddVL src1 src2));
format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vaddL_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (AddVL src (LoadVector mem)));
format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Floats vector add
*** 4094,4116 ****
instruct vaddF_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (AddVF src1 src2));
format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vaddF_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (AddVF src (LoadVector mem)));
format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Doubles vector add
--- 5021,5043 ----
instruct vaddF_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (AddVF src1 src2));
format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vaddF_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (AddVF src (LoadVector mem)));
format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Doubles vector add
*** 4127,4149 ****
instruct vaddD_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (AddVD src1 src2));
format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vaddD_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (AddVD src (LoadVector mem)));
format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- SUB --------------------------------------
--- 5054,5076 ----
instruct vaddD_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (AddVD src1 src2));
format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vaddD_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (AddVD src (LoadVector mem)));
format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- SUB --------------------------------------
*** 4162,4184 ****
instruct vsubB_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (SubVB src1 src2));
format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsubB_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (SubVB src (LoadVector mem)));
format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Shorts/Chars vector sub
--- 5089,5111 ----
instruct vsubB_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (SubVB src1 src2));
format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vsubB_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (SubVB src (LoadVector mem)));
format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Shorts/Chars vector sub
*** 4196,4218 ****
instruct vsubS_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsubS_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (SubVS src (LoadVector mem)));
format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Integers vector sub
--- 5123,5145 ----
instruct vsubS_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vsubS_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (SubVS src (LoadVector mem)));
format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Integers vector sub
*** 4229,4251 ****
instruct vsubI_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (SubVI src1 src2));
format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsubI_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (SubVI src (LoadVector mem)));
format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Longs vector sub
--- 5156,5178 ----
instruct vsubI_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (SubVI src1 src2));
format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vsubI_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (SubVI src (LoadVector mem)));
format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Longs vector sub
*** 4262,4285 ****
instruct vsubL_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (SubVL src1 src2));
format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsubL_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (SubVL src (LoadVector mem)));
format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Floats vector sub
--- 5189,5212 ----
instruct vsubL_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (SubVL src1 src2));
format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vsubL_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (SubVL src (LoadVector mem)));
format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Floats vector sub
*** 4296,4318 ****
instruct vsubF_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (SubVF src1 src2));
format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsubF_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (SubVF src (LoadVector mem)));
format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Doubles vector sub
--- 5223,5245 ----
instruct vsubF_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (SubVF src1 src2));
format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vsubF_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (SubVF src (LoadVector mem)));
format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Doubles vector sub
*** 4329,4361 ****
instruct vsubD_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (SubVD src1 src2));
format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsubD_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (SubVD src (LoadVector mem)));
format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- MUL --------------------------------------
// Byte vector mul
instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{
! predicate(n->as_Vector()->length() == 4 ||
! n->as_Vector()->length() == 8);
match(Set dst (MulVB src1 src2));
effect(TEMP dst, TEMP tmp, TEMP scratch);
format %{"vector_mulB $dst,$src1,$src2" %}
ins_encode %{
assert(UseSSE > 3, "required");
--- 5256,5288 ----
instruct vsubD_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (SubVD src1 src2));
format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vsubD_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (SubVD src (LoadVector mem)));
format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- MUL --------------------------------------
// Byte vector mul
instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{
! predicate(vector_length(n) == 4 ||
! vector_length(n) == 8);
match(Set dst (MulVB src1 src2));
effect(TEMP dst, TEMP tmp, TEMP scratch);
format %{"vector_mulB $dst,$src1,$src2" %}
ins_encode %{
assert(UseSSE > 3, "required");
*** 4368,4378 ****
%}
ins_pipe( pipe_slow );
%}
instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
! predicate(n->as_Vector()->length() == 16 && UseAVX <= 1);
match(Set dst (MulVB src1 src2));
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
format %{"vector_mulB $dst,$src1,$src2" %}
ins_encode %{
assert(UseSSE > 3, "required");
--- 5295,5305 ----
%}
ins_pipe( pipe_slow );
%}
instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
! predicate(vector_length(n) == 16 && UseAVX <= 1);
match(Set dst (MulVB src1 src2));
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
format %{"vector_mulB $dst,$src1,$src2" %}
ins_encode %{
assert(UseSSE > 3, "required");
*** 4391,4466 ****
%}
ins_pipe( pipe_slow );
%}
instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{
! predicate(n->as_Vector()->length() == 16 && UseAVX > 1);
match(Set dst (MulVB src1 src2));
effect(TEMP dst, TEMP tmp, TEMP scratch);
format %{"vector_mulB $dst,$src1,$src2" %}
ins_encode %{
! int vector_len = Assembler::AVX_256bit;
! __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len);
! __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
! __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vector_len);
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
! __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
__ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister);
__ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0);
%}
ins_pipe( pipe_slow );
%}
instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
! predicate(n->as_Vector()->length() == 32);
match(Set dst (MulVB src1 src2));
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
format %{"vector_mulB $dst,$src1,$src2" %}
ins_encode %{
assert(UseAVX > 1, "required");
! int vector_len = Assembler::AVX_256bit;
__ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister);
__ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister);
! __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
! __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
! __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
! __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len);
! __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
! __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
! __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
! __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
! __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len);
! __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vector_len);
! __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
! predicate(n->as_Vector()->length() == 64);
match(Set dst (MulVB src1 src2));
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
format %{"vector_mulB $dst,$src1,$src2\n\t" %}
ins_encode %{
assert(UseAVX > 2, "required");
! int vector_len = Assembler::AVX_512bit;
__ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister);
__ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister);
! __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
! __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
! __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
! __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len);
! __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
! __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
! __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
! __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
! __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
! __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len);
! __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register);
! __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Shorts/Chars vector mul
--- 5318,5393 ----
%}
ins_pipe( pipe_slow );
%}
instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{
! predicate(vector_length(n) == 16 && UseAVX > 1);
match(Set dst (MulVB src1 src2));
effect(TEMP dst, TEMP tmp, TEMP scratch);
format %{"vector_mulB $dst,$src1,$src2" %}
ins_encode %{
! int vlen_enc = Assembler::AVX_256bit;
! __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
! __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc);
! __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
! __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
__ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister);
__ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0);
%}
ins_pipe( pipe_slow );
%}
instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
! predicate(vector_length(n) == 32);
match(Set dst (MulVB src1 src2));
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
format %{"vector_mulB $dst,$src1,$src2" %}
ins_encode %{
assert(UseAVX > 1, "required");
! int vlen_enc = Assembler::AVX_256bit;
__ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister);
__ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister);
! __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
! __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
! __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc);
! __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
! __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
! __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
! __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
! predicate(vector_length(n) == 64);
match(Set dst (MulVB src1 src2));
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
format %{"vector_mulB $dst,$src1,$src2\n\t" %}
ins_encode %{
assert(UseAVX > 2, "required");
! int vlen_enc = Assembler::AVX_512bit;
__ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister);
__ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister);
! __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
! __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
! __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc);
! __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
! __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
! __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register);
! __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Shorts/Chars vector mul
*** 4477,4499 ****
instruct vmulS_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (MulVS src1 src2));
format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vmulS_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (MulVS src (LoadVector mem)));
format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Integers vector mul
--- 5404,5426 ----
instruct vmulS_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (MulVS src1 src2));
format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vmulS_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (MulVS src (LoadVector mem)));
format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Integers vector mul
*** 4511,4556 ****
instruct vmulI_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (MulVI src1 src2));
format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vmulI_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (MulVI src (LoadVector mem)));
format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Longs vector mul
instruct vmulL_reg(vec dst, vec src1, vec src2) %{
match(Set dst (MulVL src1 src2));
format %{ "vpmullq $dst,$src1,$src2\t! mul packedL" %}
ins_encode %{
assert(UseAVX > 2, "required");
! int vector_len = vector_length_encoding(this);
! __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vmulL_mem(vec dst, vec src, memory mem) %{
match(Set dst (MulVL src (LoadVector mem)));
format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %}
ins_encode %{
assert(UseAVX > 2, "required");
! int vector_len = vector_length_encoding(this);
! __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Floats vector mul
--- 5438,5536 ----
instruct vmulI_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (MulVI src1 src2));
format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vmulI_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (MulVI src (LoadVector mem)));
format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Longs vector mul
instruct vmulL_reg(vec dst, vec src1, vec src2) %{
+ predicate(VM_Version::supports_avx512dq());
match(Set dst (MulVL src1 src2));
format %{ "vpmullq $dst,$src1,$src2\t! mul packedL" %}
ins_encode %{
assert(UseAVX > 2, "required");
! int vlen_enc = vector_length_encoding(this);
! __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vmulL_mem(vec dst, vec src, memory mem) %{
+ predicate(VM_Version::supports_avx512dq());
match(Set dst (MulVL src (LoadVector mem)));
format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %}
ins_encode %{
assert(UseAVX > 2, "required");
! int vlen_enc = vector_length_encoding(this);
! __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct mul2L_reg(vec dst, vec src2, vec tmp) %{
! predicate(vector_length(n) == 2 && !VM_Version::supports_avx512dq());
! match(Set dst (MulVL dst src2));
! effect(TEMP dst, TEMP tmp);
! format %{ "pshufd $tmp,$src2, 177\n\t"
! "pmulld $tmp,$dst\n\t"
! "phaddd $tmp,$tmp\n\t"
! "pmovzxdq $tmp,$tmp\n\t"
! "psllq $tmp, 32\n\t"
! "pmuludq $dst,$src2\n\t"
! "paddq $dst,$tmp\n\t! mul packed2L" %}
!
! ins_encode %{
! assert(VM_Version::supports_sse4_1(), "required");
! int vlen_enc = Assembler::AVX_128bit;
! __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177);
! __ pmulld($tmp$$XMMRegister, $dst$$XMMRegister);
! __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
! __ pmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister);
! __ psllq($tmp$$XMMRegister, 32);
! __ pmuludq($dst$$XMMRegister, $src2$$XMMRegister);
! __ paddq($dst$$XMMRegister, $tmp$$XMMRegister);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vmul4L_reg_avx(vec dst, vec src1, vec src2, vec tmp, vec tmp1) %{
! predicate(vector_length(n) == 4 && !VM_Version::supports_avx512dq());
! match(Set dst (MulVL src1 src2));
! effect(TEMP tmp1, TEMP tmp);
! format %{ "vpshufd $tmp,$src2\n\t"
! "vpmulld $tmp,$src1,$tmp\n\t"
! "vphaddd $tmp,$tmp,$tmp\n\t"
! "vpmovzxdq $tmp,$tmp\n\t"
! "vpsllq $tmp,$tmp\n\t"
! "vpmuludq $tmp1,$src1,$src2\n\t"
! "vpaddq $dst,$tmp,$tmp1\t! mul packed4L" %}
! ins_encode %{
! int vlen_enc = Assembler::AVX_256bit;
! __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vlen_enc);
! __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
! __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister);
! __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
! __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
! __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vlen_enc);
! __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
! __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Floats vector mul
*** 4567,4589 ****
instruct vmulF_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (MulVF src1 src2));
format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vmulF_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (MulVF src (LoadVector mem)));
format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Doubles vector mul
--- 5547,5569 ----
instruct vmulF_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (MulVF src1 src2));
format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vmulF_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (MulVF src (LoadVector mem)));
format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Doubles vector mul
*** 4600,4654 ****
instruct vmulD_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (MulVD src1 src2));
format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vmulD_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (MulVD src (LoadVector mem)));
format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{
! predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2)));
effect(TEMP dst, USE src1, USE src2);
format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t"
"blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t"
%}
ins_encode %{
! int vector_len = 1;
int cond = (Assembler::Condition)($copnd$$cmpcode);
! __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len);
! __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{
! predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2)));
effect(TEMP dst, USE src1, USE src2);
format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t"
! "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t"
%}
ins_encode %{
! int vector_len = 1;
int cond = (Assembler::Condition)($copnd$$cmpcode);
! __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len);
! __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- DIV --------------------------------------
--- 5580,5638 ----
instruct vmulD_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (MulVD src1 src2));
format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vmulD_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (MulVD src (LoadVector mem)));
format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{
! predicate(vector_length(n) == 8);
match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2)));
effect(TEMP dst, USE src1, USE src2);
format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t"
"blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t"
%}
ins_encode %{
! assert(UseAVX > 0, "required");
!
! int vlen_enc = Assembler::AVX_256bit;
int cond = (Assembler::Condition)($copnd$$cmpcode);
! __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc);
! __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{
! predicate(vector_length(n) == 4);
match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2)));
effect(TEMP dst, USE src1, USE src2);
format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t"
! "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t"
%}
ins_encode %{
! assert(UseAVX > 0, "required");
!
! int vlen_enc = Assembler::AVX_256bit;
int cond = (Assembler::Condition)($copnd$$cmpcode);
! __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc);
! __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- DIV --------------------------------------
*** 4667,4689 ****
instruct vdivF_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (DivVF src1 src2));
format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vdivF_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (DivVF src (LoadVector mem)));
format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Doubles vector div
--- 5651,5673 ----
instruct vdivF_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (DivVF src1 src2));
format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vdivF_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (DivVF src (LoadVector mem)));
format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Doubles vector div
*** 4700,4722 ****
instruct vdivD_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (DivVD src1 src2));
format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vdivD_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (DivVD src (LoadVector mem)));
format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- Sqrt --------------------------------------
--- 5684,5843 ----
instruct vdivD_reg(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (DivVD src1 src2));
format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vdivD_mem(vec dst, vec src, memory mem) %{
predicate(UseAVX > 0);
match(Set dst (DivVD src (LoadVector mem)));
format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! // ------------------------------ MinMax ---------------------------------------
!
! // Byte, Short, Int vector Min/Max
! instruct minmax_reg_sse(vec dst, vec src) %{
! predicate(is_integral_type(vector_element_basic_type(n)) && vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
! UseAVX == 0);
! match(Set dst (MinV dst src));
! match(Set dst (MaxV dst src));
! format %{ "vector_minmax $dst,$src\t! " %}
! ins_encode %{
! assert(UseSSE >= 4, "required");
!
! int opcode = this->ideal_Opcode();
! BasicType elem_bt = vector_element_basic_type(this);
! __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vminmax_reg(vec dst, vec src1, vec src2) %{
! predicate(is_integral_type(vector_element_basic_type(n)) && vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
! UseAVX > 0);
! match(Set dst (MinV src1 src2));
! match(Set dst (MaxV src1 src2));
! format %{ "vector_minmax $dst,$src1,$src2\t! " %}
! ins_encode %{
! int opcode = this->ideal_Opcode();
! int vlen_enc = vector_length_encoding(this);
! BasicType elem_bt = vector_element_basic_type(this);
!
! __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! // Long vector Min/Max
! instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
! predicate(vector_length_in_bytes(n) == 16 && vector_element_basic_type(n) == T_LONG &&
! UseAVX == 0);
! match(Set dst (MinV dst src));
! match(Set dst (MaxV src dst));
! effect(TEMP dst, TEMP tmp);
! format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
! ins_encode %{
! assert(UseSSE >= 4, "required");
!
! int opcode = this->ideal_Opcode();
! BasicType elem_bt = vector_element_basic_type(this);
! assert(elem_bt == T_LONG, "sanity");
!
! __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
! predicate(vector_length_in_bytes(n) <= 32 && vector_element_basic_type(n) == T_LONG &&
! UseAVX > 0 && !VM_Version::supports_avx512vl());
! match(Set dst (MinV src1 src2));
! match(Set dst (MaxV src1 src2));
! effect(TEMP dst);
! format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
! ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! int opcode = this->ideal_Opcode();
! BasicType elem_bt = vector_element_basic_type(this);
! assert(elem_bt == T_LONG, "sanity");
!
! __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
! predicate((vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
! vector_element_basic_type(n) == T_LONG);
! match(Set dst (MinV src1 src2));
! match(Set dst (MaxV src1 src2));
! format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
! ins_encode %{
! assert(UseAVX > 2, "required");
!
! int vlen_enc = vector_length_encoding(this);
! int opcode = this->ideal_Opcode();
! BasicType elem_bt = vector_element_basic_type(this);
! assert(elem_bt == T_LONG, "sanity");
!
! __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! // Float/Double vector Min/Max
! instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
! predicate(vector_length_in_bytes(n) <= 32 &&
! is_floating_point_type(vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
! UseAVX > 0);
! match(Set dst (MinV a b));
! match(Set dst (MaxV a b));
! effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
! format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
! ins_encode %{
! assert(UseAVX > 0, "required");
!
! int opcode = this->ideal_Opcode();
! int vlen_enc = vector_length_encoding(this);
! BasicType elem_bt = vector_element_basic_type(this);
!
! __ vminmax_fp(opcode, elem_bt,
! $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
! $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp) %{
! predicate(vector_length_in_bytes(n) == 64 &&
! is_floating_point_type(vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
! match(Set dst (MinV a b));
! match(Set dst (MaxV a b));
! effect(USE a, USE b, TEMP atmp, TEMP btmp);
! format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
! ins_encode %{
! assert(UseAVX > 2, "required");
!
! int opcode = this->ideal_Opcode();
! int vlen_enc = vector_length_encoding(this);
! BasicType elem_bt = vector_element_basic_type(this);
!
! KRegister ktmp = k1;
! __ evminmax_fp(opcode, elem_bt,
! $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
! ktmp, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- Sqrt --------------------------------------
*** 4724,4769 ****
instruct vsqrtF_reg(vec dst, vec src) %{
match(Set dst (SqrtVF src));
format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
ins_encode %{
assert(UseAVX > 0, "required");
! int vector_len = vector_length_encoding(this);
! __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsqrtF_mem(vec dst, memory mem) %{
match(Set dst (SqrtVF (LoadVector mem)));
format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
ins_encode %{
assert(UseAVX > 0, "required");
! int vector_len = vector_length_encoding(this);
! __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Floating point vector sqrt
instruct vsqrtD_reg(vec dst, vec src) %{
match(Set dst (SqrtVD src));
format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
ins_encode %{
assert(UseAVX > 0, "required");
! int vector_len = vector_length_encoding(this);
! __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vsqrtD_mem(vec dst, memory mem) %{
match(Set dst (SqrtVD (LoadVector mem)));
format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
ins_encode %{
assert(UseAVX > 0, "required");
! int vector_len = vector_length_encoding(this);
! __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// ------------------------------ Shift ---------------------------------------
--- 5845,5890 ----
instruct vsqrtF_reg(vec dst, vec src) %{
match(Set dst (SqrtVF src));
format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
ins_encode %{
assert(UseAVX > 0, "required");
! int vlen_enc = vector_length_encoding(this);
! __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vsqrtF_mem(vec dst, memory mem) %{
match(Set dst (SqrtVF (LoadVector mem)));
format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
ins_encode %{
assert(UseAVX > 0, "required");
! int vlen_enc = vector_length_encoding(this);
! __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Floating point vector sqrt
instruct vsqrtD_reg(vec dst, vec src) %{
match(Set dst (SqrtVD src));
format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
ins_encode %{
assert(UseAVX > 0, "required");
! int vlen_enc = vector_length_encoding(this);
! __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vsqrtD_mem(vec dst, memory mem) %{
match(Set dst (SqrtVD (LoadVector mem)));
format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
ins_encode %{
assert(UseAVX > 0, "required");
! int vlen_enc = vector_length_encoding(this);
! __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// ------------------------------ Shift ---------------------------------------
*** 4780,4909 ****
ins_pipe( pipe_slow );
%}
// Byte vector shift
instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
! predicate(n->as_Vector()->length() <= 8);
! match(Set dst (LShiftVB src shift));
! match(Set dst (RShiftVB src shift));
match(Set dst (URShiftVB src shift));
effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch);
format %{"vector_byte_shift $dst,$src,$shift" %}
ins_encode %{
assert(UseSSE > 3, "required");
int opcode = this->ideal_Opcode();
! __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister);
__ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
__ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
__ pand($dst$$XMMRegister, $tmp$$XMMRegister);
__ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{
! predicate(n->as_Vector()->length() == 16 && UseAVX <= 1);
! match(Set dst (LShiftVB src shift));
! match(Set dst (RShiftVB src shift));
match(Set dst (URShiftVB src shift));
effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch);
format %{"vector_byte_shift $dst,$src,$shift" %}
ins_encode %{
assert(UseSSE > 3, "required");
int opcode = this->ideal_Opcode();
!
! __ vextendbw(opcode, $tmp1$$XMMRegister, $src$$XMMRegister);
__ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
__ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
! __ vextendbw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
__ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
__ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
__ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
__ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
__ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
! predicate(n->as_Vector()->length() == 16 && UseAVX > 1);
! match(Set dst (LShiftVB src shift));
! match(Set dst (RShiftVB src shift));
match(Set dst (URShiftVB src shift));
effect(TEMP dst, TEMP tmp, TEMP scratch);
format %{"vector_byte_shift $dst,$src,$shift" %}
ins_encode %{
int opcode = this->ideal_Opcode();
! int vector_len = Assembler::AVX_256bit;
! __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister, vector_len);
! __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
! __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
__ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
__ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
%}
ins_pipe( pipe_slow );
%}
instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
! predicate(n->as_Vector()->length() == 32);
! match(Set dst (LShiftVB src shift));
! match(Set dst (RShiftVB src shift));
match(Set dst (URShiftVB src shift));
effect(TEMP dst, TEMP tmp, TEMP scratch);
format %{"vector_byte_shift $dst,$src,$shift" %}
ins_encode %{
assert(UseAVX > 1, "required");
int opcode = this->ideal_Opcode();
! int vector_len = Assembler::AVX_256bit;
__ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
! __ vextendbw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len);
! __ vextendbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len);
! __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
! __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vector_len);
! __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
! __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
! __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
! __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{
! predicate(n->as_Vector()->length() == 64);
! match(Set dst (LShiftVB src shift));
match(Set dst (RShiftVB src shift));
match(Set dst (URShiftVB src shift));
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
format %{"vector_byte_shift $dst,$src,$shift" %}
ins_encode %{
assert(UseAVX > 2, "required");
int opcode = this->ideal_Opcode();
! int vector_len = Assembler::AVX_512bit;
__ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
! __ vextendbw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
! __ vextendbw(opcode, $tmp2$$XMMRegister, $src$$XMMRegister, vector_len);
! __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vector_len);
! __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len);
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
! __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
! __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
! __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
! __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len);
! __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register);
! __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Shorts vector logical right shift produces incorrect Java result
// for negative data because java code convert short value into int with
// sign extension before a shift. But char vectors are fine since chars are
// unsigned values.
// Shorts/Chars vector left shift
instruct vshiftS(vec dst, vec src, vec shift) %{
! match(Set dst (LShiftVS src shift));
! match(Set dst (RShiftVS src shift));
match(Set dst (URShiftVS src shift));
effect(TEMP dst, USE src, USE shift);
format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
ins_encode %{
int opcode = this->ideal_Opcode();
--- 5901,6037 ----
ins_pipe( pipe_slow );
%}
// Byte vector shift
instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
! predicate(vector_length(n) <= 8 && VectorNode::is_vshift_cnt(n->in(2)));
! match(Set dst ( LShiftVB src shift));
! match(Set dst ( RShiftVB src shift));
match(Set dst (URShiftVB src shift));
effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch);
format %{"vector_byte_shift $dst,$src,$shift" %}
ins_encode %{
assert(UseSSE > 3, "required");
int opcode = this->ideal_Opcode();
! bool sign = (opcode != Op_URShiftVB);
! __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
__ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
__ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
__ pand($dst$$XMMRegister, $tmp$$XMMRegister);
__ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{
! predicate(vector_length(n) == 16 && VectorNode::is_vshift_cnt(n->in(2)) &&
! UseAVX <= 1);
! match(Set dst ( LShiftVB src shift));
! match(Set dst ( RShiftVB src shift));
match(Set dst (URShiftVB src shift));
effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch);
format %{"vector_byte_shift $dst,$src,$shift" %}
ins_encode %{
assert(UseSSE > 3, "required");
int opcode = this->ideal_Opcode();
! bool sign = (opcode != Op_URShiftVB);
! __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
__ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
__ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
! __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
__ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
__ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
__ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
__ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
__ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
! predicate(vector_length(n) == 16 && VectorNode::is_vshift_cnt(n->in(2)) &&
! UseAVX > 1);
! match(Set dst ( LShiftVB src shift));
! match(Set dst ( RShiftVB src shift));
match(Set dst (URShiftVB src shift));
effect(TEMP dst, TEMP tmp, TEMP scratch);
format %{"vector_byte_shift $dst,$src,$shift" %}
ins_encode %{
int opcode = this->ideal_Opcode();
! bool sign = (opcode != Op_URShiftVB);
! int vlen_enc = Assembler::AVX_256bit;
! __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
! __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
! __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register);
__ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
__ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
%}
ins_pipe( pipe_slow );
%}
instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
! predicate(vector_length(n) == 32 && VectorNode::is_vshift_cnt(n->in(2)));
! match(Set dst ( LShiftVB src shift));
! match(Set dst ( RShiftVB src shift));
match(Set dst (URShiftVB src shift));
effect(TEMP dst, TEMP tmp, TEMP scratch);
format %{"vector_byte_shift $dst,$src,$shift" %}
ins_encode %{
assert(UseAVX > 1, "required");
int opcode = this->ideal_Opcode();
! bool sign = (opcode != Op_URShiftVB);
! int vlen_enc = Assembler::AVX_256bit;
__ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
! __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
! __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
! __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
! __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register);
! __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register);
! __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
! __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{
! predicate(vector_length(n) == 64 && VectorNode::is_vshift_cnt(n->in(2)));
! match(Set dst ( LShiftVB src shift));
match(Set dst (RShiftVB src shift));
match(Set dst (URShiftVB src shift));
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
format %{"vector_byte_shift $dst,$src,$shift" %}
ins_encode %{
assert(UseAVX > 2, "required");
int opcode = this->ideal_Opcode();
! bool sign = (opcode != Op_URShiftVB);
! int vlen_enc = Assembler::AVX_512bit;
__ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
! __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
! __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
! __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
! __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
! __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
! __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register);
! __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Shorts vector logical right shift produces incorrect Java result
// for negative data because java code convert short value into int with
// sign extension before a shift. But char vectors are fine since chars are
// unsigned values.
// Shorts/Chars vector left shift
instruct vshiftS(vec dst, vec src, vec shift) %{
! predicate(VectorNode::is_vshift_cnt(n->in(2)));
! match(Set dst ( LShiftVS src shift));
! match(Set dst ( RShiftVS src shift));
match(Set dst (URShiftVS src shift));
effect(TEMP dst, USE src, USE shift);
format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
ins_encode %{
int opcode = this->ideal_Opcode();
*** 4928,4947 ****
ins_pipe( pipe_slow );
%}
// Integers vector left shift
instruct vshiftI(vec dst, vec src, vec shift) %{
! match(Set dst (LShiftVI src shift));
! match(Set dst (RShiftVI src shift));
match(Set dst (URShiftVI src shift));
effect(TEMP dst, USE src, USE shift);
format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
ins_encode %{
int opcode = this->ideal_Opcode();
if (UseAVX > 0) {
! int vector_len = vector_length_encoding(this);
! __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
} else {
int vlen = vector_length(this);
if (vlen == 2) {
__ movdbl($dst$$XMMRegister, $src$$XMMRegister);
__ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
--- 6056,6076 ----
ins_pipe( pipe_slow );
%}
// Integers vector left shift
instruct vshiftI(vec dst, vec src, vec shift) %{
! predicate(VectorNode::is_vshift_cnt(n->in(2)));
! match(Set dst ( LShiftVI src shift));
! match(Set dst ( RShiftVI src shift));
match(Set dst (URShiftVI src shift));
effect(TEMP dst, USE src, USE shift);
format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
ins_encode %{
int opcode = this->ideal_Opcode();
if (UseAVX > 0) {
! int vlen_enc = vector_length_encoding(this);
! __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
} else {
int vlen = vector_length(this);
if (vlen == 2) {
__ movdbl($dst$$XMMRegister, $src$$XMMRegister);
__ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
*** 4955,4973 ****
ins_pipe( pipe_slow );
%}
// Longs vector shift
instruct vshiftL(vec dst, vec src, vec shift) %{
! match(Set dst (LShiftVL src shift));
match(Set dst (URShiftVL src shift));
effect(TEMP dst, USE src, USE shift);
format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
ins_encode %{
int opcode = this->ideal_Opcode();
if (UseAVX > 0) {
! int vector_len = vector_length_encoding(this);
! __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
} else {
assert(vector_length(this) == 2, "");
__ movdqu($dst$$XMMRegister, $src$$XMMRegister);
__ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
}
--- 6084,6103 ----
ins_pipe( pipe_slow );
%}
// Longs vector shift
instruct vshiftL(vec dst, vec src, vec shift) %{
! predicate(VectorNode::is_vshift_cnt(n->in(2)));
! match(Set dst ( LShiftVL src shift));
match(Set dst (URShiftVL src shift));
effect(TEMP dst, USE src, USE shift);
format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
ins_encode %{
int opcode = this->ideal_Opcode();
if (UseAVX > 0) {
! int vlen_enc = vector_length_encoding(this);
! __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
} else {
assert(vector_length(this) == 2, "");
__ movdqu($dst$$XMMRegister, $src$$XMMRegister);
__ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
}
*** 4976,4986 ****
%}
// -------------------ArithmeticRightShift -----------------------------------
// Long vector arithmetic right shift
instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
! predicate(UseAVX <= 2);
match(Set dst (RShiftVL src shift));
effect(TEMP dst, TEMP tmp, TEMP scratch);
format %{ "vshiftq $dst,$src,$shift" %}
ins_encode %{
uint vlen = vector_length(this);
--- 6106,6116 ----
%}
// -------------------ArithmeticRightShift -----------------------------------
// Long vector arithmetic right shift
instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
! predicate(VectorNode::is_vshift_cnt(n->in(2)) && UseAVX <= 2);
match(Set dst (RShiftVL src shift));
effect(TEMP dst, TEMP tmp, TEMP scratch);
format %{ "vshiftq $dst,$src,$shift" %}
ins_encode %{
uint vlen = vector_length(this);
*** 4993,5149 ****
__ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
__ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
} else {
assert(vlen == 4, "sanity");
assert(UseAVX > 1, "required");
! int vector_len = Assembler::AVX_256bit;
! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
__ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register);
! __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
! __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
}
%}
ins_pipe( pipe_slow );
%}
instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
! predicate(UseAVX > 2);
match(Set dst (RShiftVL src shift));
format %{ "vshiftq $dst,$src,$shift" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
! // --------------------------------- AND --------------------------------------
!
! instruct vand(vec dst, vec src) %{
! predicate(UseAVX == 0);
! match(Set dst (AndV dst src));
! format %{ "pand $dst,$src\t! and vectors" %}
ins_encode %{
! __ pand($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
! instruct vand_reg(vec dst, vec src1, vec src2) %{
! predicate(UseAVX > 0);
! match(Set dst (AndV src1 src2));
! format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
! instruct vand_mem(vec dst, vec src, memory mem) %{
! predicate(UseAVX > 0);
! match(Set dst (AndV src (LoadVector mem)));
! format %{ "vpand $dst,$src,$mem\t! and vectors" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
! // --------------------------------- OR ---------------------------------------
!
! instruct vor(vec dst, vec src) %{
! predicate(UseAVX == 0);
! match(Set dst (OrV dst src));
! format %{ "por $dst,$src\t! or vectors" %}
ins_encode %{
! __ por($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
! instruct vor_reg(vec dst, vec src1, vec src2) %{
! predicate(UseAVX > 0);
! match(Set dst (OrV src1 src2));
! format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
! instruct vor_mem(vec dst, vec src, memory mem) %{
! predicate(UseAVX > 0);
! match(Set dst (OrV src (LoadVector mem)));
! format %{ "vpor $dst,$src,$mem\t! or vectors" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
! // --------------------------------- XOR --------------------------------------
!
! instruct vxor(vec dst, vec src) %{
! predicate(UseAVX == 0);
! match(Set dst (XorV dst src));
! format %{ "pxor $dst,$src\t! xor vectors" %}
ins_encode %{
! __ pxor($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
! instruct vxor_reg(vec dst, vec src1, vec src2) %{
! predicate(UseAVX > 0);
! match(Set dst (XorV src1 src2));
! format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
! instruct vxor_mem(vec dst, vec src, memory mem) %{
! predicate(UseAVX > 0);
! match(Set dst (XorV src (LoadVector mem)));
! format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
! // --------------------------------- ABS --------------------------------------
! // a = |a|
! instruct vabsB_reg(vec dst, vec src) %{
! match(Set dst (AbsVB src));
! format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
ins_encode %{
! uint vlen = vector_length(this);
! if (vlen <= 16) {
! __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
! } else {
int vlen_enc = vector_length_encoding(this);
! __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! }
%}
ins_pipe( pipe_slow );
%}
! instruct vabsS_reg(vec dst, vec src) %{
! match(Set dst (AbsVS src));
! format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
ins_encode %{
! uint vlen = vector_length(this);
! if (vlen <= 8) {
__ pabsw($dst$$XMMRegister, $src$$XMMRegister);
} else {
int vlen_enc = vector_length_encoding(this);
__ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
}
--- 6123,7142 ----
__ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
__ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
} else {
assert(vlen == 4, "sanity");
assert(UseAVX > 1, "required");
! int vlen_enc = Assembler::AVX_256bit;
! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
__ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register);
! __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
! __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
}
%}
ins_pipe( pipe_slow );
%}
instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
! predicate(VectorNode::is_vshift_cnt(n->in(2)) && UseAVX > 2);
match(Set dst (RShiftVL src shift));
format %{ "vshiftq $dst,$src,$shift" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
! // ------------------- Variable Shift -----------------------------
! // Byte variable shift
! instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{
! predicate(vector_length(n) <= 8 &&
! !VectorNode::is_vshift_cnt(n->in(2)) &&
! !VM_Version::supports_avx512bw());
! match(Set dst ( LShiftVB src shift));
! match(Set dst ( RShiftVB src shift));
! match(Set dst (URShiftVB src shift));
! effect(TEMP dst, TEMP vtmp, TEMP scratch);
! format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %}
ins_encode %{
! assert(UseAVX >= 2, "required");
!
! int opcode = this->ideal_Opcode();
! int vlen_enc = Assembler::AVX_128bit;
! __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register);
! __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
%}
ins_pipe( pipe_slow );
%}
! instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{
! predicate(vector_length(n) == 16 &&
! !VectorNode::is_vshift_cnt(n->in(2)) &&
! !VM_Version::supports_avx512bw());
! match(Set dst ( LShiftVB src shift));
! match(Set dst ( RShiftVB src shift));
! match(Set dst (URShiftVB src shift));
! effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch);
! format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %}
ins_encode %{
! assert(UseAVX >= 2, "required");
!
! int opcode = this->ideal_Opcode();
! int vlen_enc = Assembler::AVX_128bit;
! // Shift lower half and get word result in dst
! __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register);
!
! // Shift upper half and get word result in vtmp1
! __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
! __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
! __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register);
!
! // Merge and down convert the two word results to byte in dst
! __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
%}
ins_pipe( pipe_slow );
%}
! instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4, rRegP scratch) %{
! predicate(vector_length(n) == 32 &&
! !VectorNode::is_vshift_cnt(n->in(2)) &&
! !VM_Version::supports_avx512bw());
! match(Set dst ( LShiftVB src shift));
! match(Set dst ( RShiftVB src shift));
! match(Set dst (URShiftVB src shift));
! effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP scratch);
! format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 and $scratch as TEMP" %}
ins_encode %{
! assert(UseAVX >= 2, "required");
!
! int opcode = this->ideal_Opcode();
! int vlen_enc = Assembler::AVX_128bit;
! // Process lower 128 bits and get result in dst
! __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register);
! __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
! __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
! __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register);
! __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
!
! // Process higher 128 bits and get result in vtmp3
! __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
! __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
! __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister, $scratch$$Register);
! __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
! __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
! __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register);
! __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
!
! // Merge the two results in dst
! __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
! instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{
! predicate(vector_length(n) <= 32 &&
! !VectorNode::is_vshift_cnt(n->in(2)) &&
! VM_Version::supports_avx512bw());
! match(Set dst ( LShiftVB src shift));
! match(Set dst ( RShiftVB src shift));
! match(Set dst (URShiftVB src shift));
! effect(TEMP dst, TEMP vtmp, TEMP scratch);
! format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %}
ins_encode %{
! assert(UseAVX > 2, "required");
!
! int opcode = this->ideal_Opcode();
! int vlen_enc = vector_length_encoding(this);
! __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register);
%}
ins_pipe( pipe_slow );
%}
! instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{
! predicate(vector_length(n) == 64 &&
! !VectorNode::is_vshift_cnt(n->in(2)) &&
! VM_Version::supports_avx512bw());
! match(Set dst ( LShiftVB src shift));
! match(Set dst ( RShiftVB src shift));
! match(Set dst (URShiftVB src shift));
! effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch);
! format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %}
ins_encode %{
! assert(UseAVX > 2, "required");
!
! int opcode = this->ideal_Opcode();
! int vlen_enc = Assembler::AVX_256bit;
! __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register);
! __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
! __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
! __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register);
! __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
! // Short variable shift
! instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{
! predicate(vector_length(n) <= 8 &&
! !VectorNode::is_vshift_cnt(n->in(2)) &&
! !VM_Version::supports_avx512bw());
! match(Set dst ( LShiftVS src shift));
! match(Set dst ( RShiftVS src shift));
! match(Set dst (URShiftVS src shift));
! effect(TEMP dst, TEMP vtmp, TEMP scratch);
! format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
ins_encode %{
! assert(UseAVX >= 2, "required");
!
! int opcode = this->ideal_Opcode();
! bool sign = (opcode != Op_URShiftVS);
! int vlen_enc = Assembler::AVX_256bit;
! __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
! __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
! __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
! __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register);
! __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
! __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
%}
ins_pipe( pipe_slow );
%}
! instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{
! predicate(vector_length(n) == 16 &&
! !VectorNode::is_vshift_cnt(n->in(2)) &&
! !VM_Version::supports_avx512bw());
! match(Set dst ( LShiftVS src shift));
! match(Set dst ( RShiftVS src shift));
! match(Set dst (URShiftVS src shift));
! effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch);
! format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
ins_encode %{
! assert(UseAVX >= 2, "required");
!
! int opcode = this->ideal_Opcode();
! bool sign = (opcode != Op_URShiftVS);
! int vlen_enc = Assembler::AVX_256bit;
! // Shift lower half, with result in vtmp2 usign vtmp1 as TEMP
! __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
! __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
! __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
! __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register);
!
! // Shift upper half, with result in dst usign vtmp1 as TEMP
! __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
! __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
! __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
! __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
! __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register);
!
! // Merge lower and upper half result into dst
! __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
! instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
! predicate(!VectorNode::is_vshift_cnt(n->in(2)) &&
! VM_Version::supports_avx512bw());
! match(Set dst ( LShiftVS src shift));
! match(Set dst ( RShiftVS src shift));
! match(Set dst (URShiftVS src shift));
! format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
ins_encode %{
! assert(UseAVX > 2, "required");
!
! int opcode = this->ideal_Opcode();
! int vlen_enc = vector_length_encoding(this);
! if (!VM_Version::supports_avx512vl()) {
! vlen_enc = Assembler::AVX_512bit;
! }
! __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
! //Integer variable shift
! instruct vshiftI_var(vec dst, vec src, vec shift) %{
! predicate(!VectorNode::is_vshift_cnt(n->in(2)));
! match(Set dst ( LShiftVI src shift));
! match(Set dst ( RShiftVI src shift));
! match(Set dst (URShiftVI src shift));
! format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
ins_encode %{
! assert(UseAVX >= 2, "required");
!
! int opcode = this->ideal_Opcode();
! int vlen_enc = vector_length_encoding(this);
! __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
! //Long variable shift
! instruct vshiftL_var(vec dst, vec src, vec shift) %{
! predicate(!VectorNode::is_vshift_cnt(n->in(2)));
! match(Set dst ( LShiftVL src shift));
! match(Set dst (URShiftVL src shift));
! format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
ins_encode %{
! assert(UseAVX >= 2, "required");
!
! int opcode = this->ideal_Opcode();
int vlen_enc = vector_length_encoding(this);
! __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
! //Long variable right shift arithmetic
! instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
! predicate(vector_length(n) <= 4 &&
! !VectorNode::is_vshift_cnt(n->in(2)) &&
! UseAVX == 2);
! match(Set dst (RShiftVL src shift));
! effect(TEMP dst, TEMP vtmp);
! format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
ins_encode %{
! int opcode = this->ideal_Opcode();
! int vlen_enc = vector_length_encoding(this);
! __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
! $vtmp$$XMMRegister);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
! predicate(!VectorNode::is_vshift_cnt(n->in(2)) &&
! UseAVX > 2);
! match(Set dst (RShiftVL src shift));
! format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
! ins_encode %{
! int opcode = this->ideal_Opcode();
! int vlen_enc = vector_length_encoding(this);
! __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! // --------------------------------- AND --------------------------------------
!
! instruct vand(vec dst, vec src) %{
! predicate(UseAVX == 0);
! match(Set dst (AndV dst src));
! format %{ "pand $dst,$src\t! and vectors" %}
! ins_encode %{
! __ pand($dst$$XMMRegister, $src$$XMMRegister);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vand_reg(vec dst, vec src1, vec src2) %{
! predicate(UseAVX > 0);
! match(Set dst (AndV src1 src2));
! format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
! ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vand_mem(vec dst, vec src, memory mem) %{
! predicate(UseAVX > 0);
! match(Set dst (AndV src (LoadVector mem)));
! format %{ "vpand $dst,$src,$mem\t! and vectors" %}
! ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! // --------------------------------- OR ---------------------------------------
!
! instruct vor(vec dst, vec src) %{
! predicate(UseAVX == 0);
! match(Set dst (OrV dst src));
! format %{ "por $dst,$src\t! or vectors" %}
! ins_encode %{
! __ por($dst$$XMMRegister, $src$$XMMRegister);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vor_reg(vec dst, vec src1, vec src2) %{
! predicate(UseAVX > 0);
! match(Set dst (OrV src1 src2));
! format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
! ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vor_mem(vec dst, vec src, memory mem) %{
! predicate(UseAVX > 0);
! match(Set dst (OrV src (LoadVector mem)));
! format %{ "vpor $dst,$src,$mem\t! or vectors" %}
! ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! // --------------------------------- XOR --------------------------------------
!
! instruct vxor(vec dst, vec src) %{
! predicate(UseAVX == 0);
! match(Set dst (XorV dst src));
! format %{ "pxor $dst,$src\t! xor vectors" %}
! ins_encode %{
! __ pxor($dst$$XMMRegister, $src$$XMMRegister);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vxor_reg(vec dst, vec src1, vec src2) %{
! predicate(UseAVX > 0);
! match(Set dst (XorV src1 src2));
! format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
! ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vxor_mem(vec dst, vec src, memory mem) %{
! predicate(UseAVX > 0);
! match(Set dst (XorV src (LoadVector mem)));
! format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
! ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! // --------------------------------- VectorCast --------------------------------------
!
! instruct vcastBtoX(vec dst, vec src) %{
! match(Set dst (VectorCastB2X src));
! format %{ "vector_cast_b2x $dst,$src\t!" %}
! ins_encode %{
! assert(UseAVX > 0, "required");
!
! BasicType to_elem_bt = vector_element_basic_type(this);
! int vlen_enc = vector_length_encoding(this);
! switch (to_elem_bt) {
! case T_SHORT:
! __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! break;
! case T_INT:
! __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! break;
! case T_FLOAT:
! __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! break;
! case T_LONG:
! __ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! break;
! case T_DOUBLE:
! __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! break;
!
! default: assert(false, "%s", type2name(to_elem_bt));
! }
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct castStoX(vec dst, vec src, rRegP scratch) %{
! predicate(UseAVX <= 2 &&
! vector_length(n->in(1)) <= 8 && // src
! vector_element_basic_type(n) == T_BYTE);
! effect(TEMP scratch);
! match(Set dst (VectorCastS2X src));
! format %{ "vector_cast_s2x $dst,$src\t! using $scratch as TEMP" %}
! ins_encode %{
! assert(UseAVX > 0, "required");
!
! __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, $scratch$$Register);
! __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vcastStoX(vec dst, vec src, vec vtmp, rRegP scratch) %{
! predicate(UseAVX <= 2 &&
! vector_length(n->in(1)) == 16 && // src
! vector_element_basic_type(n) == T_BYTE);
! effect(TEMP dst, TEMP vtmp, TEMP scratch);
! match(Set dst (VectorCastS2X src));
! format %{ "vector_cast_s2x $dst,$src\t! using $vtmp, $scratch as TEMP" %}
! ins_encode %{
! assert(UseAVX > 0, "required");
!
! int vlen_enc = vector_length_encoding(vector_length_in_bytes(this, $src));
! __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register);
! __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
! __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vcastStoX_evex(vec dst, vec src) %{
! predicate(UseAVX > 2 ||
! (vector_length_in_bytes(n) >= vector_length_in_bytes(n->in(1)))); // dst >= src
! match(Set dst (VectorCastS2X src));
! format %{ "vector_cast_s2x $dst,$src\t!" %}
! ins_encode %{
! BasicType to_elem_bt = vector_element_basic_type(this);
! int src_vlen_enc = vector_length_encoding(this, $src);
! int vlen_enc = vector_length_encoding(this);
! switch (to_elem_bt) {
! case T_BYTE:
! if (!VM_Version::supports_avx512vl()) {
! vlen_enc = Assembler::AVX_512bit;
! }
! __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
! break;
! case T_INT:
! __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! break;
! case T_FLOAT:
! __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! break;
! case T_LONG:
! __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! break;
! case T_DOUBLE:
! __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! break;
! default:
! ShouldNotReachHere();
! }
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct castItoX(vec dst, vec src, rRegP scratch) %{
! predicate(UseAVX <= 2 &&
! (vector_length_in_bytes(n->in(1)) <= 16) &&
! (vector_length_in_bytes(n) < vector_length_in_bytes(n->in(1)))); // dst < src
! match(Set dst (VectorCastI2X src));
! format %{ "vector_cast_i2x $dst,$src\t! using $scratch as TEMP" %}
! effect(TEMP scratch);
! ins_encode %{
! assert(UseAVX > 0, "required");
!
! BasicType to_elem_bt = vector_element_basic_type(this);
! int vlen_enc = vector_length_encoding(this, $src);
!
! if (to_elem_bt == T_BYTE) {
! __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register);
! __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! } else {
! assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
! __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register);
! __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! }
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vcastItoX(vec dst, vec src, vec vtmp, rRegP scratch) %{
! predicate(UseAVX <= 2 &&
! (vector_length_in_bytes(n->in(1)) == 32) &&
! (vector_length_in_bytes(n) < vector_length_in_bytes(n->in(1)))); // dst < src
! match(Set dst (VectorCastI2X src));
! format %{ "vector_cast_i2x $dst,$src\t! using $vtmp and $scratch as TEMP" %}
! effect(TEMP dst, TEMP vtmp, TEMP scratch);
! ins_encode %{
! assert(UseAVX > 0, "required");
!
! BasicType to_elem_bt = vector_element_basic_type(this);
! int vlen_enc = vector_length_encoding(this, $src);
!
! if (to_elem_bt == T_BYTE) {
! __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register);
! __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
! __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
! } else {
! assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
! __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register);
! __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
! __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
! }
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vcastItoX_evex(vec dst, vec src) %{
! predicate(UseAVX > 2 ||
! (vector_length_in_bytes(n) >= vector_length_in_bytes(n->in(1)))); // dst >= src
! match(Set dst (VectorCastI2X src));
! format %{ "vector_cast_i2x $dst,$src\t!" %}
! ins_encode %{
! assert(UseAVX > 0, "required");
!
! BasicType dst_elem_bt = vector_element_basic_type(this);
! int src_vlen_enc = vector_length_encoding(this, $src);
! int dst_vlen_enc = vector_length_encoding(this);
! switch (dst_elem_bt) {
! case T_BYTE:
! if (!VM_Version::supports_avx512vl()) {
! src_vlen_enc = Assembler::AVX_512bit;
! }
! __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
! break;
! case T_SHORT:
! if (!VM_Version::supports_avx512vl()) {
! src_vlen_enc = Assembler::AVX_512bit;
! }
! __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
! break;
! case T_FLOAT:
! __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
! break;
! case T_LONG:
! __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
! break;
! case T_DOUBLE:
! __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
! break;
! default:
! ShouldNotReachHere();
! }
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vcastLtoBS(vec dst, vec src, rRegP scratch) %{
! predicate((vector_element_basic_type(n) == T_BYTE || vector_element_basic_type(n) == T_SHORT) &&
! UseAVX <= 2);
! match(Set dst (VectorCastL2X src));
! effect(TEMP scratch);
! format %{ "vector_cast_l2x $dst,$src\t! using $scratch as TEMP" %}
! ins_encode %{
! assert(UseAVX > 0, "required");
!
! int vlen = vector_length_in_bytes(this, $src);
! BasicType to_elem_bt = vector_element_basic_type(this);
! AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
! : ExternalAddress(vector_int_to_short_mask());
! if (vlen <= 16) {
! __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
! __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register);
! __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
! } else {
! assert(vlen <= 32, "required");
! __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
! __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
! __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register);
! __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
! }
! if (to_elem_bt == T_BYTE) {
! __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
! }
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vcastLtoX_evex(vec dst, vec src) %{
! predicate(UseAVX > 2 ||
! (vector_element_basic_type(n) == T_INT ||
! vector_element_basic_type(n) == T_FLOAT ||
! vector_element_basic_type(n) == T_DOUBLE));
! match(Set dst (VectorCastL2X src));
! format %{ "vector_cast_l2x $dst,$src\t!" %}
! ins_encode %{
! BasicType to_elem_bt = vector_element_basic_type(this);
! int vlen = vector_length_in_bytes(this, $src);
! int vlen_enc = vector_length_encoding(this, $src);
! switch (to_elem_bt) {
! case T_BYTE:
! if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
! vlen_enc = Assembler::AVX_512bit;
! }
! __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! break;
! case T_SHORT:
! if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
! vlen_enc = Assembler::AVX_512bit;
! }
! __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! break;
! case T_INT:
! if (vlen == 8) {
! if ($dst$$XMMRegister != $src$$XMMRegister) {
! __ movflt($dst$$XMMRegister, $src$$XMMRegister);
! }
! } else if (vlen == 16) {
! __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
! } else if (vlen == 32) {
! if (UseAVX > 2) {
! if (!VM_Version::supports_avx512vl()) {
! vlen_enc = Assembler::AVX_512bit;
! }
! __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! } else {
! __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
! __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
! }
! } else { // vlen == 64
! __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! }
! break;
! case T_FLOAT:
! assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
! __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! break;
! case T_DOUBLE:
! assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
! __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! break;
!
! default: assert(false, "%s", type2name(to_elem_bt));
! }
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vcastFtoD_reg(vec dst, vec src) %{
! predicate(vector_element_basic_type(n) == T_DOUBLE);
! match(Set dst (VectorCastF2X src));
! format %{ "vector_cast_f2x $dst,$src\t!" %}
! ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vcastDtoF_reg(vec dst, vec src) %{
! predicate(vector_element_basic_type(n) == T_FLOAT);
! match(Set dst (VectorCastD2X src));
! format %{ "vector_cast_d2x $dst,$src\t!" %}
! ins_encode %{
! int vlen_enc = vector_length_encoding(this, $src);
! __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! // --------------------------------- VectorMaskCmp --------------------------------------
!
! instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
! predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
! vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
! is_floating_point_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
! match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
! format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
! ins_encode %{
! int vlen_enc = vector_length_encoding(this, $src1);
! Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
! if (vector_element_basic_type(this, $src1) == T_FLOAT) {
! __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
! } else {
! __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
! }
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct evcmpFD(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch) %{
! predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
! is_floating_point_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
! match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
! effect(TEMP scratch);
! format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
! ins_encode %{
! int vlen_enc = Assembler::AVX_512bit;
! Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
! KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation.
! KRegister mask = k0; // The comparison itself is not being masked.
! if (vector_element_basic_type(this, $src1) == T_FLOAT) {
! __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
! __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register);
! } else {
! __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
! __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register);
! }
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) %{
! predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
! vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
! is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1
! match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
! effect(TEMP scratch);
! format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
! ins_encode %{
! int vlen_enc = vector_length_encoding(this, $src1);
! Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
! Assembler::Width ww = widthForType(vector_element_basic_type(this, $src1));
! __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, ww, vlen_enc, $scratch$$Register);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct evcmp(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch) %{
! predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
! is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1
! match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
! effect(TEMP scratch);
! format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
! ins_encode %{
! assert(UseAVX > 2, "required");
!
! int vlen_enc = Assembler::AVX_512bit;
! Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
! KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation.
! KRegister mask = k0; // The comparison itself is not being masked.
! bool merge = false;
! BasicType src1_elem_bt = vector_element_basic_type(this, $src1);
!
! switch (src1_elem_bt) {
! case T_BYTE: {
! __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
! __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
! break;
! }
! case T_SHORT: {
! __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
! __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
! break;
! }
! case T_INT: {
! __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
! __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
! break;
! }
! case T_LONG: {
! __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
! __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
! break;
! }
!
! default: assert(false, "%s", type2name(src1_elem_bt));
! }
! %}
! ins_pipe( pipe_slow );
! %}
!
! // Extract
!
! instruct extractI(rRegI dst, legVec src, immU8 idx) %{
! predicate(vector_length_in_bytes(n->in(1)) <= 16); // src
! match(Set dst (ExtractI src idx));
! match(Set dst (ExtractS src idx));
! #ifdef _LP64
! match(Set dst (ExtractB src idx));
! #endif
! ins_encode %{
! assert($idx$$constant < (int)vector_length(this, $src), "out of bounds");
!
! BasicType elem_bt = vector_element_basic_type(this, $src);
! __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
! predicate(vector_length_in_bytes(n->in(1)) == 32 || // src
! vector_length_in_bytes(n->in(1)) == 64); // src
! match(Set dst (ExtractI src idx));
! match(Set dst (ExtractS src idx));
! #ifdef _LP64
! match(Set dst (ExtractB src idx));
! #endif
! effect(TEMP vtmp);
! ins_encode %{
! assert($idx$$constant < (int)vector_length(this, $src), "out of bounds");
!
! BasicType elem_bt = vector_element_basic_type(this, $src);
! XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
! __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
! %}
! ins_pipe( pipe_slow );
! %}
!
! #ifdef _LP64
! instruct extractL(rRegL dst, legVec src, immU8 idx) %{
! predicate(vector_length(n->in(1)) <= 2); // src
! match(Set dst (ExtractL src idx));
! ins_encode %{
! assert(UseSSE >= 4, "required");
! assert($idx$$constant < (int)vector_length(this, $src), "out of bounds");
!
! __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
! predicate(vector_length(n->in(1)) == 4 || // src
! vector_length(n->in(1)) == 8); // src
! match(Set dst (ExtractL src idx));
! effect(TEMP vtmp);
! ins_encode %{
! assert($idx$$constant < (int)vector_length(this, $src), "out of bounds");
!
! XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
! __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
! %}
! ins_pipe( pipe_slow );
! %}
! #endif
!
! instruct extractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{
! predicate(vector_length(n->in(1)) <= 4);
! match(Set dst (ExtractF src idx));
! effect(TEMP dst, TEMP tmp, TEMP vtmp);
! ins_encode %{
! assert($idx$$constant < (int)vector_length(this, $src), "out of bounds");
!
! __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $tmp$$Register, $vtmp$$XMMRegister);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vextractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{
! predicate(vector_length(n->in(1)/*src*/) == 8 ||
! vector_length(n->in(1)/*src*/) == 16);
! match(Set dst (ExtractF src idx));
! effect(TEMP tmp, TEMP vtmp);
! ins_encode %{
! assert($idx$$constant < (int)vector_length(this, $src), "out of bounds");
!
! XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
! __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant, $tmp$$Register);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct extractD(legRegD dst, legVec src, immU8 idx) %{
! predicate(vector_length(n->in(1)) == 2); // src
! match(Set dst (ExtractD src idx));
! ins_encode %{
! assert($idx$$constant < (int)vector_length(this, $src), "out of bounds");
!
! __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
! predicate(vector_length(n->in(1)) == 4 || // src
! vector_length(n->in(1)) == 8); // src
! match(Set dst (ExtractD src idx));
! effect(TEMP vtmp);
! ins_encode %{
! assert($idx$$constant < (int)vector_length(this, $src), "out of bounds");
!
! XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
! __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
! %}
! ins_pipe( pipe_slow );
! %}
!
! // --------------------------------- Vector Blend --------------------------------------
!
! instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
! predicate(UseAVX == 0);
! match(Set dst (VectorBlend (Binary dst src) mask));
! format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
! effect(TEMP tmp);
! ins_encode %{
! assert(UseSSE >= 4, "required");
!
! if ($mask$$XMMRegister != $tmp$$XMMRegister) {
! __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
! }
! __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
! predicate(UseAVX > 0 &&
! vector_length_in_bytes(n) <= 32 &&
! is_integral_type(vector_element_basic_type(n)));
! match(Set dst (VectorBlend (Binary src1 src2) mask));
! format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
! ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
! predicate(UseAVX > 0 &&
! vector_length_in_bytes(n) <= 32 &&
! !is_integral_type(vector_element_basic_type(n)));
! match(Set dst (VectorBlend (Binary src1 src2) mask));
! format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
! ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch) %{
! predicate(vector_length_in_bytes(n) == 64);
! match(Set dst (VectorBlend (Binary src1 src2) mask));
! format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %}
! effect(TEMP scratch);
! ins_encode %{
! int vlen_enc = Assembler::AVX_512bit;
! BasicType elem_bt = vector_element_basic_type(this);
! KRegister ktmp = k2;
! __ evpcmp(elem_bt, ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, $scratch$$Register);
! __ evpblend(elem_bt, $dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
! %}
! ins_pipe( pipe_slow );
! %}
!
! // --------------------------------- ABS --------------------------------------
! // a = |a|
! instruct vabsB_reg(vec dst, vec src) %{
! match(Set dst (AbsVB src));
! format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
! ins_encode %{
! uint vlen = vector_length(this);
! if (vlen <= 16) {
! __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
! } else {
! int vlen_enc = vector_length_encoding(this);
! __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
! }
! %}
! ins_pipe( pipe_slow );
! %}
!
! instruct vabsS_reg(vec dst, vec src) %{
! match(Set dst (AbsVS src));
! format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
! ins_encode %{
! uint vlen = vector_length(this);
! if (vlen <= 8) {
__ pabsw($dst$$XMMRegister, $src$$XMMRegister);
} else {
int vlen_enc = vector_length_encoding(this);
__ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
}
*** 5169,5188 ****
instruct vabsL_reg(vec dst, vec src) %{
match(Set dst (AbsVL src));
format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
ins_encode %{
assert(UseAVX > 2, "required");
! int vector_len = vector_length_encoding(this);
! __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- ABSNEG --------------------------------------
instruct vabsnegF(vec dst, vec src, rRegI scratch) %{
! predicate(n->as_Vector()->length() != 4); // handled by 1-operand instruction vabsneg4F
match(Set dst (AbsVF src));
match(Set dst (NegVF src));
effect(TEMP scratch);
format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
ins_cost(150);
--- 7162,7184 ----
instruct vabsL_reg(vec dst, vec src) %{
match(Set dst (AbsVL src));
format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
ins_encode %{
assert(UseAVX > 2, "required");
! int vlen_enc = vector_length_encoding(this);
! if (!VM_Version::supports_avx512vl()) {
! vlen_enc = Assembler::AVX_512bit;
! }
! __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- ABSNEG --------------------------------------
instruct vabsnegF(vec dst, vec src, rRegI scratch) %{
! predicate(vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
match(Set dst (AbsVF src));
match(Set dst (NegVF src));
effect(TEMP scratch);
format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
ins_cost(150);
*** 5199,5209 ****
%}
ins_pipe( pipe_slow );
%}
instruct vabsneg4F(vec dst, rRegI scratch) %{
! predicate(n->as_Vector()->length() == 4);
match(Set dst (AbsVF dst));
match(Set dst (NegVF dst));
effect(TEMP scratch);
format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
ins_cost(150);
--- 7195,7205 ----
%}
ins_pipe( pipe_slow );
%}
instruct vabsneg4F(vec dst, rRegI scratch) %{
! predicate(vector_length(n) == 4);
match(Set dst (AbsVF dst));
match(Set dst (NegVF dst));
effect(TEMP scratch);
format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
ins_cost(150);
*** 5231,5297 ****
}
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- FMA --------------------------------------
// a * b + c
instruct vfmaF_reg(vec a, vec b, vec c) %{
match(Set c (FmaVF c (Binary a b)));
format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
ins_cost(150);
ins_encode %{
assert(UseFMA, "not enabled");
! int vector_len = vector_length_encoding(this);
! __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vfmaF_mem(vec a, memory b, vec c) %{
match(Set c (FmaVF c (Binary a (LoadVector b))));
format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
ins_cost(150);
ins_encode %{
assert(UseFMA, "not enabled");
! int vector_len = vector_length_encoding(this);
! __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vfmaD_reg(vec a, vec b, vec c) %{
match(Set c (FmaVD c (Binary a b)));
format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
ins_cost(150);
ins_encode %{
assert(UseFMA, "not enabled");
! int vector_len = vector_length_encoding(this);
! __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vfmaD_mem(vec a, memory b, vec c) %{
match(Set c (FmaVD c (Binary a (LoadVector b))));
format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
ins_cost(150);
ins_encode %{
assert(UseFMA, "not enabled");
! int vector_len = vector_length_encoding(this);
! __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- Vector Multiply Add --------------------------------------
instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
predicate(UseAVX == 0);
match(Set dst (MulAddVS2VI dst src1));
! format %{ "pmaddwd $dst,$dst,$src1\t! muladd packedStoI" %}
ins_encode %{
__ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
--- 7227,7791 ----
}
%}
ins_pipe( pipe_slow );
%}
+ //------------------------------------- VectorTest --------------------------------------------
+
+ #ifdef _LP64
+ instruct vptest_alltrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{
+ predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
+ match(Set dst (VectorTest src1 src2 ));
+ effect(KILL cr);
+ format %{ "vector_test $dst,$src1, $src2\t! using $cr as TEMP" %}
+ ins_encode %{
+ int vlen = vector_length_in_bytes(this, $src1);
+ int vlen_enc = vector_length_encoding(vlen);
+ if (vlen <= 32) {
+ if (UseAVX == 0) {
+ assert(vlen <= 16, "required");
+ __ ptest($src1$$XMMRegister, $src2$$XMMRegister);
+ } else {
+ __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
+ }
+ } else {
+ KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation.
+ __ evpcmpeqb(ktmp, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
+ __ kortestql(ktmp, ktmp);
+ }
+ __ setb(Assembler::carrySet, $dst$$Register);
+ __ movzbl($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct vptest_anytrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{
+ predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
+ match(Set dst (VectorTest src1 src2 ));
+ effect(KILL cr);
+ format %{ "vector_test_any_true $dst,$src1,$src2\t! using $cr as TEMP" %}
+ ins_encode %{
+ int vlen = vector_length_in_bytes(this, $src1);
+ int vlen_enc = vector_length_encoding(vlen);
+ if (vlen <= 32) {
+ if (UseAVX == 0) {
+ assert(vlen <= 16, "required");
+ __ ptest($src1$$XMMRegister, $src2$$XMMRegister);
+ } else {
+ __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
+ }
+ } else {
+ KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation.
+ __ evpcmpeqb(ktmp, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
+ __ ktestql(ktmp, ktmp);
+ }
+ __ setb(Assembler::notZero, $dst$$Register);
+ __ movzbl($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+ #endif
+
+ //------------------------------------- LoadMask --------------------------------------------
+
+ instruct loadMask(vec dst, vec src) %{
+ match(Set dst (VectorLoadMask src));
+ effect(TEMP dst);
+ format %{ "vector_loadmask_byte $dst,$src\n\t" %}
+ ins_encode %{
+ int vlen_in_bytes = vector_length_in_bytes(this);
+ BasicType elem_bt = vector_element_basic_type(this);
+
+ __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ //------------------------------------- StoreMask --------------------------------------------
+
+ instruct storeMask1B(vec dst, vec src, immI_1 size) %{
+ predicate(vector_length(n) < 64 || VM_Version::supports_avx512vlbw());
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\t!" %}
+ ins_encode %{
+ assert(UseSSE >= 3, "required");
+ if (vector_length_in_bytes(this) <= 16) {
+ __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
+ } else {
+ assert(UseAVX >= 2, "required");
+ int src_vlen_enc = vector_length_encoding(this, $src);
+ __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
+ }
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct storeMask2B(vec dst, vec src, immI_2 size) %{
+ predicate(vector_length(n) <= 8);
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\n\t" %}
+ ins_encode %{
+ assert(UseSSE >= 3, "required");
+ __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
+ __ packsswb($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct vstoreMask2B(vec dst, vec src, immI_2 size) %{
+ predicate(vector_length(n) == 16 && !VM_Version::supports_avx512bw());
+ match(Set dst (VectorStoreMask src size));
+ effect(TEMP dst);
+ format %{ "vector_store_mask $dst,$src\t!" %}
+ ins_encode %{
+ int vlen_enc = Assembler::AVX_128bit;
+ __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
+ __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister,vlen_enc);
+ __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct vstoreMask2B_evex(vec dst, vec src, immI_2 size) %{
+ predicate(VM_Version::supports_avx512bw());
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\t!" %}
+ ins_encode %{
+ int src_vlen_enc = vector_length_encoding(this, $src);
+ int dst_vlen_enc = vector_length_encoding(this);
+ __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
+ __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct storeMask4B(vec dst, vec src, immI_4 size) %{
+ predicate (vector_length(n) <= 4 && UseAVX <= 2);
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\t!" %}
+ ins_encode %{
+ assert(UseSSE >= 3, "required");
+ __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
+ __ packssdw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ packsswb($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct vstoreMask4B(vec dst, vec src, immI_4 size) %{
+ predicate(vector_length(n) == 8 && UseAVX <= 2);
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\t!" %}
+ effect(TEMP dst);
+ ins_encode %{
+ int vlen_enc = Assembler::AVX_128bit;
+ __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
+ __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct vstoreMask4B_evex(vec dst, vec src, immI_4 size) %{
+ predicate(UseAVX > 2);
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\t!" %}
+ ins_encode %{
+ int src_vlen_enc = vector_length_encoding(this, $src);
+ int dst_vlen_enc = vector_length_encoding(this);
+ if (!VM_Version::supports_avx512vl()) {
+ src_vlen_enc = Assembler::AVX_512bit;
+ }
+ __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
+ __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct storeMask8B(vec dst, vec src, immI_8 size) %{
+ predicate(vector_length(n) == 2 && UseAVX <= 2);
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\t!" %}
+ ins_encode %{
+ assert(UseSSE >= 3, "required");
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
+ __ packssdw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ packsswb($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pabsb($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct storeMask8B_avx(vec dst, vec src, immI_8 size, legVec vtmp) %{
+ predicate(vector_length(n) == 4 && UseAVX <= 2);
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\t! using $vtmp as TEMP" %}
+ effect(TEMP dst, TEMP vtmp);
+ ins_encode %{
+ int vlen_enc = Assembler::AVX_128bit;
+ __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
+ __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
+ __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
+ __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct vstoreMask8B_evex(vec dst, vec src, immI_8 size) %{
+ predicate(UseAVX > 2);
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\t!" %}
+ ins_encode %{
+ int src_vlen_enc = vector_length_encoding(this, $src);
+ int dst_vlen_enc = vector_length_encoding(this);
+ if (!VM_Version::supports_avx512vl()) {
+ src_vlen_enc = Assembler::AVX_512bit;
+ }
+ __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
+ __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ //-------------------------------- Load Iota Indices ----------------------------------
+
+ instruct loadIotaIndices(vec dst, immI_0 src, rRegP scratch) %{
+ predicate(vector_element_basic_type(n) == T_BYTE);
+ match(Set dst (VectorLoadConst src));
+ effect(TEMP scratch);
+ format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
+ ins_encode %{
+ int vlen_in_bytes = vector_length_in_bytes(this);
+ __ load_iota_indices($dst$$XMMRegister, $scratch$$Register, vlen_in_bytes);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ //-------------------------------- Rearrange ----------------------------------
+
+ // LoadShuffle/Rearrange for Byte
+
+ instruct loadShuffleB(vec dst) %{
+ predicate(vector_element_basic_type(n) == T_BYTE);
+ match(Set dst (VectorLoadShuffle dst));
+ format %{ "vector_load_shuffle $dst, $dst" %}
+ ins_encode %{
+ // empty
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct rearrangeB(vec dst, vec shuffle) %{
+ predicate(vector_element_basic_type(n) == T_BYTE &&
+ vector_length(n) < 32);
+ match(Set dst (VectorRearrange dst shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $dst" %}
+ ins_encode %{
+ assert(UseSSE >= 4, "required");
+ __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct rearrangeB_avx(vec dst, vec src, vec shuffle) %{
+ predicate(vector_element_basic_type(n) == T_BYTE &&
+ vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
+ match(Set dst (VectorRearrange src shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $src" %}
+ ins_encode %{
+ __ vpshufb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, Assembler::AVX_256bit);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct rearrangeB_evex(vec dst, vec src, vec shuffle) %{
+ predicate(vector_element_basic_type(n) == T_BYTE &&
+ vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
+ match(Set dst (VectorRearrange src shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $src" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this);
+ __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ // LoadShuffle/Rearrange for Short
+
+ instruct loadShuffleS(vec dst, vec src, vec vtmp, rRegP scratch) %{
+ predicate(vector_element_basic_type(n) == T_SHORT &&
+ vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS
+ match(Set dst (VectorLoadShuffle src));
+ effect(TEMP dst, TEMP vtmp, TEMP scratch);
+ format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %}
+ ins_encode %{
+ // Create a byte shuffle mask from short shuffle mask
+ // only byte shuffle instruction available on these platforms
+
+ // Multiply each shuffle by two to get byte index
+ __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister);
+ __ psllw($vtmp$$XMMRegister, 1);
+
+ // Duplicate to create 2 copies of byte index
+ __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
+ __ psllw($dst$$XMMRegister, 8);
+ __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
+
+ // Add one to get alternate byte index
+ __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), $scratch$$Register);
+ __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct rearrangeS(vec dst, vec shuffle) %{
+ predicate(vector_element_basic_type(n) == T_SHORT &&
+ vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
+ match(Set dst (VectorRearrange dst shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $dst" %}
+ ins_encode %{
+ assert(UseSSE >= 4, "required");
+ __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct loadShuffleS_evex(vec dst, vec src) %{
+ predicate(vector_element_basic_type(n) == T_SHORT &&
+ VM_Version::supports_avx512bw());
+ match(Set dst (VectorLoadShuffle src));
+ format %{ "vector_load_shuffle $dst, $src" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this);
+ if (!VM_Version::supports_avx512vl()) {
+ vlen_enc = Assembler::AVX_512bit;
+ }
+ __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
+ predicate(vector_element_basic_type(n) == T_SHORT &&
+ VM_Version::supports_avx512bw());
+ match(Set dst (VectorRearrange src shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $src" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this);
+ if (!VM_Version::supports_avx512vl()) {
+ vlen_enc = Assembler::AVX_512bit;
+ }
+ __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ // LoadShuffle/Rearrange for Integer and Float
+
+ instruct loadShuffleI(vec dst, vec src, vec vtmp, rRegP scratch) %{
+ predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) &&
+ vector_length(n) == 4 && UseAVX < 2);
+ match(Set dst (VectorLoadShuffle src));
+ effect(TEMP dst, TEMP vtmp, TEMP scratch);
+ format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %}
+ ins_encode %{
+ assert(UseSSE >= 4, "required");
+
+ // Create a byte shuffle mask from int shuffle mask
+ // only byte shuffle instruction available on these platforms
+
+ // Duplicate and multiply each shuffle by 4
+ __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister);
+ __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
+ __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
+ __ psllw($vtmp$$XMMRegister, 2);
+
+ // Duplicate again to create 4 copies of byte index
+ __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
+ __ psllw($dst$$XMMRegister, 8);
+ __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
+
+ // Add 3,2,1,0 to get alternate byte index
+ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), $scratch$$Register);
+ __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct rearrangeI(vec dst, vec shuffle) %{
+ predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) &&
+ vector_length(n) == 4 && UseAVX < 2);
+ match(Set dst (VectorRearrange dst shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $dst" %}
+ ins_encode %{
+ assert(UseSSE >= 4, "required");
+ __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct loadShuffleI_avx(vec dst, vec src) %{
+ predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) &&
+ UseAVX >= 2);
+ match(Set dst (VectorLoadShuffle src));
+ format %{ "vector_load_shuffle $dst, $src" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this);
+ __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
+ predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) &&
+ UseAVX >= 2);
+ match(Set dst (VectorRearrange src shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $src" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this);
+ if (vlen_enc == Assembler::AVX_128bit) {
+ vlen_enc = Assembler::AVX_256bit;
+ }
+ __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ // LoadShuffle/Rearrange for Long and Double
+
+ instruct loadShuffleL(vec dst, vec src, vec vtmp, rRegP scratch) %{
+ predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
+ vector_length(n) < 8 && !VM_Version::supports_avx512vl());
+ match(Set dst (VectorLoadShuffle src));
+ effect(TEMP dst, TEMP vtmp, TEMP scratch);
+ format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %}
+ ins_encode %{
+ assert(UseAVX >= 2, "required");
+
+ int vlen_enc = vector_length_encoding(this);
+ // Create a double word shuffle mask from long shuffle mask
+ // only double word shuffle instruction available on these platforms
+
+ // Multiply each shuffle by two to get double word index
+ __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc);
+
+ // Duplicate each double word shuffle
+ __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
+ __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
+
+ // Add one to get alternate double word index
+ __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, $scratch$$Register);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct rearrangeL(vec dst, vec src, vec shuffle) %{
+ predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
+ vector_length(n) < 8 && !VM_Version::supports_avx512vl());
+ match(Set dst (VectorRearrange src shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $src" %}
+ ins_encode %{
+ assert(UseAVX >= 2, "required");
+
+ int vlen_enc = vector_length_encoding(this);
+ __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct loadShuffleL_evex(vec dst, vec src) %{
+ predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
+ (vector_length(n) == 8 || VM_Version::supports_avx512vl()));
+ match(Set dst (VectorLoadShuffle src));
+ format %{ "vector_load_shuffle $dst, $src" %}
+ ins_encode %{
+ assert(UseAVX > 2, "required");
+
+ int vlen_enc = vector_length_encoding(this);
+ __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
+ instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
+ predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
+ (vector_length(n) == 8 || VM_Version::supports_avx512vl()));
+ match(Set dst (VectorRearrange src shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $src" %}
+ ins_encode %{
+ assert(UseAVX > 2, "required");
+
+ int vlen_enc = vector_length_encoding(this);
+ if (vlen_enc == Assembler::AVX_128bit) {
+ vlen_enc = Assembler::AVX_256bit;
+ }
+ __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+ %}
+
// --------------------------------- FMA --------------------------------------
// a * b + c
instruct vfmaF_reg(vec a, vec b, vec c) %{
match(Set c (FmaVF c (Binary a b)));
format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
ins_cost(150);
ins_encode %{
assert(UseFMA, "not enabled");
! int vlen_enc = vector_length_encoding(this);
! __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vfmaF_mem(vec a, memory b, vec c) %{
match(Set c (FmaVF c (Binary a (LoadVector b))));
format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
ins_cost(150);
ins_encode %{
assert(UseFMA, "not enabled");
! int vlen_enc = vector_length_encoding(this);
! __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vfmaD_reg(vec a, vec b, vec c) %{
match(Set c (FmaVD c (Binary a b)));
format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
ins_cost(150);
ins_encode %{
assert(UseFMA, "not enabled");
! int vlen_enc = vector_length_encoding(this);
! __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vfmaD_mem(vec a, memory b, vec c) %{
match(Set c (FmaVD c (Binary a (LoadVector b))));
format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
ins_cost(150);
ins_encode %{
assert(UseFMA, "not enabled");
! int vlen_enc = vector_length_encoding(this);
! __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- Vector Multiply Add --------------------------------------
instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
predicate(UseAVX == 0);
match(Set dst (MulAddVS2VI dst src1));
! format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
ins_encode %{
__ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
*** 5299,5310 ****
instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (MulAddVS2VI src1 src2));
format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
ins_encode %{
! int vector_len = vector_length_encoding(this);
! __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- Vector Multiply Add Add ----------------------------------
--- 7793,7804 ----
instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
predicate(UseAVX > 0);
match(Set dst (MulAddVS2VI src1 src2));
format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
ins_encode %{
! int vlen_enc = vector_length_encoding(this);
! __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- Vector Multiply Add Add ----------------------------------
*** 5313,5324 ****
predicate(VM_Version::supports_avx512_vnni());
match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
ins_encode %{
assert(UseAVX > 2, "required");
! int vector_len = vector_length_encoding(this);
! __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
ins_cost(10);
%}
--- 7807,7818 ----
predicate(VM_Version::supports_avx512_vnni());
match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
ins_encode %{
assert(UseAVX > 2, "required");
! int vlen_enc = vector_length_encoding(this);
! __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
ins_cost(10);
%}
*** 5328,5339 ****
match(Set dst (PopCountVI src));
format %{ "vpopcntd $dst,$src\t! vector popcount packedI" %}
ins_encode %{
assert(UsePopCountInstruction, "not enabled");
! int vector_len = vector_length_encoding(this);
! __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- Bitwise Ternary Logic ----------------------------------
--- 7822,7833 ----
match(Set dst (PopCountVI src));
format %{ "vpopcntd $dst,$src\t! vector popcount packedI" %}
ins_encode %{
assert(UsePopCountInstruction, "not enabled");
! int vlen_enc = vector_length_encoding(this);
! __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- Bitwise Ternary Logic ----------------------------------
< prev index next >