--- old/src/cpu/aarch64/vm/aarch64.ad 2015-11-07 22:06:56.795446700 -0800 +++ new/src/cpu/aarch64/vm/aarch64.ad 2015-11-07 22:06:56.578446700 -0800 @@ -1079,10 +1079,10 @@ // and for a volatile write we need // // stlr - // + // // Alternatively, we can implement them by pairing a normal // load/store with a memory barrier. For a volatile read we need - // + // // ldr // dmb ishld // @@ -1240,7 +1240,7 @@ // Alternatively, we can elide generation of the dmb instructions // and plant the alternative CompareAndSwap macro-instruction // sequence (which uses ldaxr). - // + // // Of course, the above only applies when we see these signature // configurations. We still want to plant dmb instructions in any // other cases where we may see a MemBarAcquire, MemBarRelease or @@ -1367,7 +1367,7 @@ opcode = parent->Opcode(); return opcode == Op_MemBarRelease; } - + // 2) card mark detection helper // helper predicate which can be used to detect a volatile membar @@ -1383,7 +1383,7 @@ // true // // iii) the node's Mem projection feeds a StoreCM node. - + bool is_card_mark_membar(const MemBarNode *barrier) { if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) { @@ -1402,7 +1402,7 @@ return true; } } - + return false; } @@ -1430,7 +1430,7 @@ // where // || and \\ represent Ctl and Mem feeds via Proj nodes // | \ and / indicate further routing of the Ctl and Mem feeds - // + // // this is the graph we see for non-object stores. however, for a // volatile Object store (StoreN/P) we may see other nodes below the // leading membar because of the need for a GC pre- or post-write @@ -1592,7 +1592,7 @@ // ordering but neither will a releasing store (stlr). The latter // guarantees that the object put is visible but does not guarantee // that writes by other threads have also been observed. - // + // // So, returning to the task of translating the object put and the // leading/trailing membar nodes: what do the non-normal node graph // look like for these 2 special cases? and how can we determine the @@ -1731,7 +1731,7 @@ // | | | | // C | M | M | M | // \ | | / - // . . . + // . . . // (post write subtree elided) // . . . // C \ M / @@ -1812,12 +1812,12 @@ // | | | / / // | Region . . . Phi[M] _____/ // | / | / - // | | / + // | | / // | . . . . . . | / // | / | / // Region | | Phi[M] // | | | / Bot - // \ MergeMem + // \ MergeMem // \ / // MemBarVolatile // @@ -1858,7 +1858,7 @@ // to a trailing barrier via a MergeMem. That feed is either direct // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier // memory flow (for G1). - // + // // The predicates controlling generation of instructions for store // and barrier nodes employ a few simple helper functions (described // below) which identify the presence or absence of all these @@ -2112,8 +2112,8 @@ x = x->in(MemNode::Memory); } else { // the merge should get its Bottom mem feed from the leading membar - x = mm->in(Compile::AliasIdxBot); - } + x = mm->in(Compile::AliasIdxBot); + } // ensure this is a non control projection if (!x->is_Proj() || x->is_CFG()) { @@ -2190,12 +2190,12 @@ // . . . // | // MemBarVolatile (card mark) - // | | + // | | // | StoreCM // | | // | . . . - // Bot | / - // MergeMem + // Bot | / + // MergeMem // | // | // MemBarVolatile {trailing} @@ -2203,10 +2203,10 @@ // 2) // MemBarRelease/CPUOrder (leading) // | - // | + // | // |\ . . . - // | \ | - // | \ MemBarVolatile (card mark) + // | \ | + // | \ MemBarVolatile (card mark) // | \ | | // \ \ | StoreCM . . . // \ \ | @@ -2231,7 +2231,7 @@ // | \ \ | StoreCM . . . // | \ \ | // \ \ Phi - // \ \ / + // \ \ / // \ Phi // \ / // Phi . . . @@ -2506,7 +2506,7 @@ return (x->is_Load() && x->as_Load()->is_acquire()); } - + // now check for an unsafe volatile get // need to check for @@ -2644,7 +2644,7 @@ } membar = child_membar(membar); - + if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) { return false; } @@ -2703,7 +2703,7 @@ // first we check if this is part of a card mark. if so then we have // to generate a StoreLoad barrier - + if (is_card_mark_membar(mbvol)) { return false; } @@ -2769,7 +2769,7 @@ if (!is_card_mark_membar(mbvol)) { return true; } - + // we found a card mark -- just make sure we have a trailing barrier return (card_mark_to_trailing(mbvol) != NULL); @@ -2808,7 +2808,7 @@ assert(barrier->Opcode() == Op_MemBarCPUOrder, "CAS not fed by cpuorder membar!"); - + MemBarNode *b = parent_membar(barrier); assert ((b != NULL && b->Opcode() == Op_MemBarRelease), "CAS not fed by cpuorder+release membar pair!"); @@ -3463,6 +3463,17 @@ return true; // Per default match rules are supported. } +const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { + + // TODO + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + bool ret_value = match_rule_supported(opcode); + // Add rules here. + + return ret_value; // Per default match rules are supported. +} + const int Matcher::float_pressure(int default_pressure_threshold) { return default_pressure_threshold; } @@ -4663,7 +4674,7 @@ call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf); } if (call == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); + ciEnv::current()->record_failure("CodeCache is full"); return; } @@ -4671,7 +4682,7 @@ // Emit stub for static call address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); if (stub == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); + ciEnv::current()->record_failure("CodeCache is full"); return; } } @@ -4681,7 +4692,7 @@ MacroAssembler _masm(&cbuf); address call = __ ic_call((address)$meth$$method); if (call == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); + ciEnv::current()->record_failure("CodeCache is full"); return; } %} @@ -4706,7 +4717,7 @@ if (cb) { address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type)); if (call == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); + ciEnv::current()->record_failure("CodeCache is full"); return; } } else { --- old/src/cpu/aarch64/vm/c2_globals_aarch64.hpp 2015-11-07 22:06:58.693446700 -0800 +++ new/src/cpu/aarch64/vm/c2_globals_aarch64.hpp 2015-11-07 22:06:58.482446700 -0800 @@ -73,6 +73,7 @@ define_pd_global(bool, OptoScheduling, false); define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoRegScheduling, false); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, false); define_pd_global(intx, ReservedCodeCacheSize, 48*M); define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); --- old/src/cpu/ppc/vm/c2_globals_ppc.hpp 2015-11-07 22:07:00.067446700 -0800 +++ new/src/cpu/ppc/vm/c2_globals_ppc.hpp 2015-11-07 22:06:59.858446700 -0800 @@ -61,6 +61,7 @@ define_pd_global(bool, UseCISCSpill, false); define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoRegScheduling, false); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, false); // GL: // Detected a problem with unscaled compressed oops and // narrow_oop_use_complex_address() == false. --- old/src/cpu/ppc/vm/ppc.ad 2015-11-07 22:07:01.429446700 -0800 +++ new/src/cpu/ppc/vm/ppc.ad 2015-11-07 22:07:01.210446700 -0800 @@ -2064,6 +2064,17 @@ return true; // Per default match rules are supported. } +const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { + + // TODO + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + bool ret_value = match_rule_supported(opcode); + // Add rules here. + + return ret_value; // Per default match rules are supported. +} + const int Matcher::float_pressure(int default_pressure_threshold) { return default_pressure_threshold; } @@ -3416,7 +3427,7 @@ // The stub for call to interpreter. address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); if (stub == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); + ciEnv::current()->record_failure("CodeCache is full"); return; } } @@ -3465,7 +3476,7 @@ // The stub for call to interpreter. address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); if (stub == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); + ciEnv::current()->record_failure("CodeCache is full"); return; } @@ -6912,7 +6923,7 @@ n_compare->_opnds[0] = op_crx; n_compare->_opnds[1] = op_src; n_compare->_opnds[2] = new immN_0Oper(TypeNarrowOop::NULL_PTR); - + decodeN_mergeDisjointNode *n2 = new decodeN_mergeDisjointNode(); n2->add_req(n_region, n_src, n1); n2->_opnds[0] = op_dst; @@ -10589,7 +10600,7 @@ instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{ // Needs matchrule, see cmpDUnordered. - match(Set crx (CmpF src1 src2)); + match(Set crx (CmpF src1 src2)); // no match-rule, false predicate predicate(false); @@ -10698,13 +10709,13 @@ %} instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{ - // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the - // node right before the conditional move using it. + // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the + // node right before the conditional move using it. // In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7, // compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle // crashed in register allocation where the flags Reg between cmpDUnoredered and a // conditional move was supposed to be spilled. - match(Set crx (CmpD src1 src2)); + match(Set crx (CmpD src1 src2)); // False predicate, shall not be matched. predicate(false); --- old/src/cpu/sparc/vm/c2_globals_sparc.hpp 2015-11-07 22:07:03.169446700 -0800 +++ new/src/cpu/sparc/vm/c2_globals_sparc.hpp 2015-11-07 22:07:02.958446700 -0800 @@ -65,6 +65,7 @@ define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoScheduling, true); define_pd_global(bool, OptoRegScheduling, false); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, false); #ifdef _LP64 // We need to make sure that all generated code is within --- old/src/cpu/sparc/vm/sparc.ad 2015-11-07 22:07:04.543446700 -0800 +++ new/src/cpu/sparc/vm/sparc.ad 2015-11-07 22:07:04.324446700 -0800 @@ -1860,6 +1860,17 @@ return true; // Per default match rules are supported. } +const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { + + // TODO + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + bool ret_value = match_rule_supported(opcode); + // Add rules here. + + return ret_value; // Per default match rules are supported. +} + const int Matcher::float_pressure(int default_pressure_threshold) { return default_pressure_threshold; } @@ -1905,7 +1916,7 @@ } // Current (2013) SPARC platforms need to read original key -// to construct decryption expanded key +// to construct decryption expanded key const bool Matcher::pass_original_key_for_aes() { return true; } @@ -2612,7 +2623,7 @@ if (stub == NULL && !(TraceJumps && Compile::current()->in_scratch_emit_size())) { ciEnv::current()->record_failure("CodeCache is full"); return; - } + } } %} @@ -3132,10 +3143,10 @@ // AVOID_NONE - instruction can be placed anywhere // AVOID_BEFORE - instruction cannot be placed after an // instruction with MachNode::AVOID_AFTER -// AVOID_AFTER - the next instruction cannot be the one +// AVOID_AFTER - the next instruction cannot be the one // with MachNode::AVOID_BEFORE -// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at -// the same time +// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at +// the same time ins_attrib ins_avoid_back_to_back(MachNode::AVOID_NONE); ins_attrib ins_short_branch(0); // Required flag: is this instruction a --- old/src/cpu/x86/vm/assembler_x86.cpp 2015-11-07 22:07:06.235446700 -0800 +++ new/src/cpu/x86/vm/assembler_x86.cpp 2015-11-07 22:07:06.010446700 -0800 @@ -313,7 +313,7 @@ switch (cur_tuple_type) { case EVEX_FV: if ((cur_encoding & VEX_W) == VEX_W) { - mod_idx += 2 + ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; + mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2; } else { mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; } @@ -394,25 +394,27 @@ int mod_idx = 0; // We will test if the displacement fits the compressed format and if so // apply the compression to the displacment iff the result is8bit. - if (VM_Version::supports_evex() && _is_evex_instruction) { - switch (_tuple_type) { + if (VM_Version::supports_evex() && (_attributes != NULL) && _attributes->is_evex_instruction()) { + int evex_encoding = _attributes->get_evex_encoding(); + int tuple_type = _attributes->get_tuple_type(); + switch (tuple_type) { case EVEX_FV: - if ((_evex_encoding & VEX_W) == VEX_W) { - mod_idx += 2 + ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; + if ((evex_encoding & VEX_W) == VEX_W) { + mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2; } else { - mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; + mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; } break; case EVEX_HV: - mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; + mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; break; case EVEX_FVM: break; case EVEX_T1S: - switch (_input_size_in_bits) { + switch (_attributes->get_input_size()) { case EVEX_8bit: break; @@ -433,7 +435,7 @@ case EVEX_T1F: case EVEX_T2: case EVEX_T4: - mod_idx = (_input_size_in_bits == EVEX_64bit) ? 1 : 0; + mod_idx = (_attributes->get_input_size() == EVEX_64bit) ? 1 : 0; break; case EVEX_T8: @@ -459,8 +461,9 @@ break; } - if (_avx_vector_len >= AVX_128bit && _avx_vector_len <= AVX_512bit) { - int disp_factor = tuple_table[_tuple_type + mod_idx][_avx_vector_len]; + int vector_len = _attributes->get_vector_len(); + if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) { + int disp_factor = tuple_table[tuple_type + mod_idx][vector_len]; if ((disp % disp_factor) == 0) { int new_disp = disp / disp_factor; if (is8bit(new_disp)) { @@ -591,7 +594,6 @@ emit_data(disp, rspec, disp32_operand); } } - _is_evex_instruction = false; } void Assembler::emit_operand(XMMRegister reg, Register base, Register index, @@ -770,7 +772,7 @@ case 0x55: // andnps case 0x56: // orps case 0x57: // xorps - case 0x59: //mulpd + case 0x59: // mulpd case 0x6E: // movd case 0x7E: // movd case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush @@ -1234,51 +1236,53 @@ void Assembler::addsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::addsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_operand(dst, src); } void Assembler::addss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::addss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_operand(dst, src); } void Assembler::aesdec(XMMRegister dst, Address src) { assert(VM_Version::supports_aes(), ""); InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xDE); emit_operand(dst, src); } void Assembler::aesdec(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_aes(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xDE); emit_int8(0xC0 | encode); } @@ -1286,16 +1290,16 @@ void Assembler::aesdeclast(XMMRegister dst, Address src) { assert(VM_Version::supports_aes(), ""); InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xDF); emit_operand(dst, src); } void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_aes(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xDF); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1303,16 +1307,16 @@ void Assembler::aesenc(XMMRegister dst, Address src) { assert(VM_Version::supports_aes(), ""); InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xDC); emit_operand(dst, src); } void Assembler::aesenc(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_aes(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xDC); emit_int8(0xC0 | encode); } @@ -1320,16 +1324,16 @@ void Assembler::aesenclast(XMMRegister dst, Address src) { assert(VM_Version::supports_aes(), ""); InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xDD); emit_operand(dst, src); } void Assembler::aesenclast(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_aes(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xDD); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1361,15 +1365,17 @@ void Assembler::andnl(Register dst, Register src1, Register src2) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_legacy(dst, src1, src2); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF2); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::andnl(Register dst, Register src1, Address src2) { - InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_legacy(dst, src1, src2); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF2); emit_operand(dst, src2); } @@ -1396,45 +1402,51 @@ void Assembler::blsil(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_legacy(rbx, dst, src); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::blsil(Register dst, Address src) { - InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_legacy(rbx, dst, src); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_operand(rbx, src); } void Assembler::blsmskl(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_legacy(rdx, dst, src); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::blsmskl(Register dst, Address src) { - InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_legacy(rdx, dst, src); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_operand(rdx, src); } void Assembler::blsrl(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_legacy(rcx, dst, src); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::blsrl(Register dst, Address src) { - InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_legacy(rcx, dst, src); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_operand(rcx, src); } @@ -1581,36 +1593,38 @@ // NOTE: dbx seems to decode this as comiss even though the // 0x66 is there. Strangly ucomisd comes out correct NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); - } else { - emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);; + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x2F); + emit_operand(dst, src); } void Assembler::comisd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); - } else { - emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x2F); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::comiss(XMMRegister dst, Address src) { - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x2F); + emit_operand(dst, src); } void Assembler::comiss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x2F); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cpuid() { @@ -1699,100 +1713,113 @@ void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xE6); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5B); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5A); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsd2ss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1F; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5A); + emit_operand(dst, src); } void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VM_Version::supports_evex()); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x2A); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true); - } else { - emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x2A); + emit_operand(dst, src); } void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x2A); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x2A); + emit_operand(dst, src); } void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x2A); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5A); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtss2sd(XMMRegister dst, Address src) { - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5A); + emit_operand(dst, src); } void Assembler::cvttsd2sil(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x2C); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvttss2sil(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x2C); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1807,36 +1834,38 @@ void Assembler::divsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_operand(dst, src); } void Assembler::divsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::divss(XMMRegister dst, Address src) { - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_operand(dst, src); } void Assembler::divss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::emms() { @@ -2082,36 +2111,26 @@ void Assembler::movapd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_avx512novl()) { - int vector_len = AVX_512bit; - int dst_enc = dst->encoding(); - int src_enc = src->encoding(); - int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); - emit_int8(0x28); - emit_int8((unsigned char)(0xC0 | encode)); - } else if (VM_Version::supports_evex()) { - emit_simd_arith_nonds_q(0x28, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66); - } + int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit; + InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x28); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movaps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_avx512novl()) { - int vector_len = AVX_512bit; - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, vector_len); - emit_int8(0x28); - emit_int8((unsigned char)(0xC0 | encode)); - } else { - emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE); - } + int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit; + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x28); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movlhps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int8(0x16); emit_int8((unsigned char)(0xC0 | encode)); } @@ -2125,39 +2144,36 @@ } void Assembler::movddup(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse3(), "")); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, /* no_mask_reg */ false, VEX_OPCODE_0F, - /* rex_w */ VM_Version::supports_evex(), AVX_128bit, /* legacy_mode */ false); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_128bit; + InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x12); emit_int8(0xC0 | encode); - } void Assembler::kmovql(KRegister dst, KRegister src) { NOT_LP64(assert(VM_Version::supports_evex(), "")); - int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, - /* no_mask_reg */ true, VEX_OPCODE_0F, /* rex_w */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0x90); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::kmovql(KRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_evex(), "")); - int dst_enc = dst->encoding(); - int nds_enc = 0; - vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_NONE, - VEX_OPCODE_0F, /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0x90); emit_operand((Register)dst, src); } void Assembler::kmovql(Address dst, KRegister src) { NOT_LP64(assert(VM_Version::supports_evex(), "")); - int src_enc = src->encoding(); - int nds_enc = 0; - vex_prefix(dst, nds_enc, src_enc, VEX_SIMD_NONE, - VEX_OPCODE_0F, /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0x90); emit_operand((Register)src, dst); } @@ -2165,8 +2181,8 @@ void Assembler::kmovql(KRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_evex(), "")); VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE; - int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true, - VEX_OPCODE_0F, /* legacy_mode */ !_legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_bw, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0x92); emit_int8((unsigned char)(0xC0 | encode)); } @@ -2174,14 +2190,16 @@ void Assembler::kmovdl(KRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_evex(), "")); VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE; - int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0x92); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::kmovwl(KRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_evex(), "")); - int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0x92); emit_int8((unsigned char)(0xC0 | encode)); } @@ -2205,190 +2223,174 @@ void Assembler::movdl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x6E); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movdl(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // swap src/dst to get correct prefix - int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66, /* no_mask_reg */ true); + int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x7E); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movdl(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x6E); emit_operand(dst, src); } void Assembler::movdl(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x7E); emit_operand(src, dst); } void Assembler::movdqa(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66); + int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit; + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x6F); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movdqa(XMMRegister dst, Address src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x6F); + emit_operand(dst, src); } void Assembler::movdqu(XMMRegister dst, Address src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x6F); + emit_operand(dst, src); } void Assembler::movdqu(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x6F); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movdqu(Address dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x7F); emit_operand(src, dst); } // Move Unaligned 256bit Vector void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; assert(UseAVX > 0, ""); - int vector_len = AVX_256bit; - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector_len); + InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vmovdqu(XMMRegister dst, Address src) { - _instruction_uses_vl = true; assert(UseAVX > 0, ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } InstructionMark im(this); - int vector_len = AVX_256bit; - vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len); + InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_operand(dst, src); } void Assembler::vmovdqu(Address dst, XMMRegister src) { - _instruction_uses_vl = true; assert(UseAVX > 0, ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } InstructionMark im(this); - int vector_len = AVX_256bit; + InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); // swap src<->dst for encoding assert(src != xnoreg, "sanity"); - vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x7F); emit_operand(src, dst); } // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64) void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 0, ""); - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); - int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F, - /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 0, ""); + assert(VM_Version::supports_evex(), ""); InstructionMark im(this); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_operand(dst, src); } void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 0, ""); - InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); assert(src != xnoreg, "sanity"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - // swap src<->dst for encoding - vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x7F); emit_operand(src, dst); } void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 0, ""); - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); - int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 2, ""); + assert(VM_Version::supports_evex(), ""); InstructionMark im(this); - _tuple_type = EVEX_FVM; - vex_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_operand(dst, src); } void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 2, ""); - InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); assert(src != xnoreg, "sanity"); - _tuple_type = EVEX_FVM; - // swap src<->dst for encoding - vex_prefix_q(src, xnoreg, dst, VEX_SIMD_F3, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x7F); emit_operand(src, dst); } @@ -2434,13 +2436,12 @@ // The selection is done in MacroAssembler::movdbl() and movflt(). void Assembler::movlpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - emit_simd_arith_q(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); - } else { - emit_simd_arith(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x12); + emit_operand(dst, src); } void Assembler::movq( MMXRegister dst, Address src ) { @@ -2466,13 +2467,9 @@ void Assembler::movq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - simd_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, /* no_mask_reg */ true); - } else { - simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x7E); emit_operand(dst, src); } @@ -2480,14 +2477,9 @@ void Assembler::movq(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - simd_prefix(src, xnoreg, dst, VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F, /* rex_w */ true); - } else { - simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ true); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xD6); emit_operand(src, dst); } @@ -2510,60 +2502,56 @@ void Assembler::movsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true); - } else { - emit_simd_arith(0x10, dst, src, VEX_SIMD_F2); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x10); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_nonds_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true); - } else { - emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x10); + emit_operand(dst, src); } void Assembler::movsd(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - simd_prefix_q(src, xnoreg, dst, VEX_SIMD_F2); - } else { - simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, /* no_mask_reg */ false); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x11); emit_operand(src, dst); } void Assembler::movss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x10); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x10); + emit_operand(dst, src); } void Assembler::movss(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x11); emit_operand(src, dst); } @@ -2655,36 +2643,38 @@ void Assembler::mulsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_operand(dst, src); } void Assembler::mulsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::mulss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_operand(dst, src); } void Assembler::mulss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::negl(Register dst) { @@ -2985,28 +2975,35 @@ void Assembler::packuswb(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x67); + emit_operand(dst, src); } void Assembler::packuswb(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x67); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(UseAVX > 0, "some form of AVX must be enabled"); - emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x67); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx2(), ""); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_3A, /* rex_w */ true, vector_len); + InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x00); emit_int8(0xC0 | encode); emit_int8(imm8); @@ -3020,8 +3017,8 @@ void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { assert(VM_Version::supports_sse4_2(), ""); InstructionMark im(this); - simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_3A, - /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x61); emit_operand(dst, src); emit_int8(imm8); @@ -3029,8 +3026,8 @@ void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_2(), ""); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x61); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -3038,37 +3035,42 @@ void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x75, dst, src, VEX_SIMD_66, - false, (VM_Version::supports_avx512dq() == false)); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x75); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - assert(UseAVX > 0, "some form of AVX must be enabled"); - emit_vex_arith(0x75, dst, nds, src, VEX_SIMD_66, vector_len, - false, (VM_Version::supports_avx512dq() == false)); + assert(VM_Version::supports_avx(), ""); + assert(!VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x75); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::pmovmskb(Register dst, XMMRegister src) { assert(VM_Version::supports_sse2(), ""); - int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F, - false, AVX_128bit, (VM_Version::supports_avx512dq() == false)); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xD7); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpmovmskb(Register dst, XMMRegister src) { assert(VM_Version::supports_avx2(), ""); - int vector_len = AVX_256bit; - int encode = vex_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, - vector_len, VEX_OPCODE_0F, true, false); + InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xD7); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::pextrd(Register dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x16); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -3076,8 +3078,8 @@ void Assembler::pextrq(Register dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x16); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -3085,8 +3087,8 @@ void Assembler::pextrw(Register dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse2(), ""); - int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xC5); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -3094,8 +3096,8 @@ void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x22); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -3103,8 +3105,8 @@ void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x22); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -3112,8 +3114,8 @@ void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) { assert(VM_Version::supports_sse2(), ""); - int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xC4); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -3121,18 +3123,18 @@ void Assembler::pmovzxbw(XMMRegister dst, Address src) { assert(VM_Version::supports_sse4_1(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_HVM; - } InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x30); emit_operand(dst, src); } void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x30); emit_int8((unsigned char)(0xC0 | encode)); } @@ -3140,10 +3142,10 @@ void Assembler::vpmovzxbw(XMMRegister dst, Address src) { assert(VM_Version::supports_avx(), ""); InstructionMark im(this); - bool vector256 = true; assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); - vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256); + InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x30); emit_operand(dst, src); } @@ -3246,43 +3248,41 @@ void Assembler::pshufb(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_ssse3(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x00); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::pshufb(XMMRegister dst, Address src) { assert(VM_Version::supports_ssse3(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x00); emit_operand(dst, src); } void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { - _instruction_uses_vl = true; assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_128bit; + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x70); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(mode & 0xFF); } void Assembler::pshufd(XMMRegister dst, Address src, int mode) { - _instruction_uses_vl = true; assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x70); emit_operand(dst, src); emit_int8(mode & 0xFF); @@ -3291,7 +3291,10 @@ void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x70); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(mode & 0xFF); } @@ -3299,12 +3302,10 @@ assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } InstructionMark im(this); - simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, /* no_mask_reg */ false, - VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x70); emit_operand(dst, src); emit_int8(mode & 0xFF); @@ -3313,9 +3314,9 @@ void Assembler::psrldq(XMMRegister dst, int shift) { // Shift left 128 bit value in dst XMMRegister by shift number of bytes. NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); // XMM3 is for /3 encoding: 66 0F 73 /3 ib - int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x73); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift); @@ -3324,9 +3325,9 @@ void Assembler::pslldq(XMMRegister dst, int shift) { // Shift left 128 bit value in dst XMMRegister by shift number of bytes. NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); // XMM7 is for /7 encoding: 66 0F 73 /7 ib - int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x73); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift); @@ -3336,16 +3337,16 @@ assert(VM_Version::supports_sse4_1(), ""); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); - simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x17); emit_operand(dst, src); } void Assembler::ptest(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x17); emit_int8((unsigned char)(0xC0 | encode)); } @@ -3353,20 +3354,18 @@ void Assembler::vptest(XMMRegister dst, Address src) { assert(VM_Version::supports_avx(), ""); InstructionMark im(this); - int vector_len = AVX_256bit; + InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); // swap src<->dst for encoding - vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* rex_w */ false, - vector_len, /* legacy_mode */ true, /* no_mask_reg */ false); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x17); emit_operand(dst, src); } void Assembler::vptest(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int vector_len = AVX_256bit; - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true); + InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x17); emit_int8((unsigned char)(0xC0 | encode)); } @@ -3374,42 +3373,47 @@ void Assembler::punpcklbw(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x60); + emit_operand(dst, src); } void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x60); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::punpckldq(XMMRegister dst, Address src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith(0x62, dst, src, VEX_SIMD_66); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x62); + emit_operand(dst, src); } void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x62, dst, src, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x62); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x6C, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x6C, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x6C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::push(int32_t imm32) { @@ -3454,16 +3458,18 @@ void Assembler::rcpps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int8(0x53); - emit_int8(0xC0 | encode); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::rcpss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x53); - emit_int8(0xC0 | encode); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::rdtsc() { @@ -3622,27 +3628,28 @@ void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x51); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::sqrtsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x51); + emit_operand(dst, src); } void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x51); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::std() { @@ -3651,11 +3658,12 @@ void Assembler::sqrtss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x51); + emit_operand(dst, src); } void Assembler::stmxcsr( Address dst) { @@ -3705,38 +3713,38 @@ void Assembler::subsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::subsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - } - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_operand(dst, src); } void Assembler::subss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::subss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_operand(dst, src); } void Assembler::testb(Register dst, int imm8) { @@ -3765,7 +3773,7 @@ emit_arith(0x85, 0xC0, dst, src); } -void Assembler::testl(Register dst, Address src) { +void Assembler::testl(Register dst, Address src) { InstructionMark im(this); prefix(src, dst); emit_int8((unsigned char)0x85); @@ -3792,36 +3800,38 @@ void Assembler::ucomisd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); - } else { - emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x2E); + emit_operand(dst, src); } void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); - } else { - emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x2E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::ucomiss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x2E); + emit_operand(dst, src); } void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x2E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::xabort(int8_t imm8) { @@ -3903,138 +3913,162 @@ void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } else { - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_operand(dst, src); } void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } else { - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_operand(dst, src); } void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } else { - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } -} - + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_operand(dst, src); +} + void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } else { - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_operand(dst, src); } void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } else { - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_operand(dst, src); } void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } else { - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_operand(dst, src); } void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } else { - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_operand(dst, src); } void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } else { - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_operand(dst, src); } void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_int8((unsigned char)(0xC0 | encode)); } //====================VECTOR ARITHMETIC===================================== @@ -4042,414 +4076,433 @@ // Float-point vector arithmetic void Assembler::addpd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x58, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x58, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::addps(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_operand(dst, src); } void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_operand(dst, src); } void Assembler::subpd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x5C, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::subps(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_operand(dst, src); } void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_operand(dst, src); } void Assembler::mulpd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x59, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::mulpd(XMMRegister dst, Address src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x59, dst, src, VEX_SIMD_66); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_operand(dst, src); } void Assembler::mulps(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_operand(dst, src); } void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_operand(dst, src); } void Assembler::divpd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x5E, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::divps(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_operand(dst, src); } void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_operand(dst, src); } void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x51); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x51); + emit_operand(dst, src); } void Assembler::andpd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_avx512dq()) { - emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x54); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::andps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x54); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::andps(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x54); + emit_operand(dst, src); } void Assembler::andpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_avx512dq()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x54); + emit_operand(dst, src); } void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_avx512dq()) { - emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true); - } + InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x54); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x54); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_avx512dq()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x54); + emit_operand(dst, src); } void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x54); + emit_operand(dst, src); } void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x15, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x15, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x15); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x14, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x14, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x14); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::xorpd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_avx512dq()) { - emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x57); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::xorps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x57); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::xorpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_avx512dq()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x57); + emit_operand(dst, src); } void Assembler::xorps(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x57); + emit_operand(dst, src); } void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_avx512dq()) { - emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true); - } + InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x57); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x57); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_avx512dq()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x57); + emit_operand(dst, src); } void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x57); + emit_operand(dst, src); } // Integer vector arithmetic void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx() && (vector_len == 0) || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x01); emit_int8((unsigned char)(0xC0 | encode)); } @@ -4457,280 +4510,324 @@ void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx() && (vector_len == 0) || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x02); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::paddb(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xFC, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFC); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::paddw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xFD, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFD); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::paddd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xFE, dst, src, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFE); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::paddq(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0xD4, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0xD4, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD4); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::phaddw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse3(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x01); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::phaddd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse3(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x02); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFC); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFD); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFE); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD4); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFC); + emit_operand(dst, src); } void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFD); + emit_operand(dst, src); } void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFE); + emit_operand(dst, src); } void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD4); + emit_operand(dst, src); } void Assembler::psubb(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xF8, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF8); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::psubw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xF9, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF9); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::psubd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xFA, dst, src, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFA); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::psubq(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0xFB, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0xFB, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFB); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF8); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF9); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFA); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFB); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF8); + emit_operand(dst, src); } void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF9); + emit_operand(dst, src); } void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFA); + emit_operand(dst, src); } void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFB); + emit_operand(dst, src); } void Assembler::pmullw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xD5, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD5); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::pmulld(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, - /* no_mask_reg */ false, VEX_OPCODE_0F_38); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x40); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD5); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x40); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(UseAVX > 2, "requires some form of AVX"); - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); + InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); int nds_enc = nds->is_valid() ? nds->encoding() : 0; - int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, - /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x40); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD5); + emit_operand(dst, src); } void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } InstructionMark im(this); - int dst_enc = dst->encoding(); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); int nds_enc = nds->is_valid() ? nds->encoding() : 0; - vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, - VEX_OPCODE_0F_38, /* vex_w */ false, vector_len); + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x40); emit_operand(dst, src); } void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - } InstructionMark im(this); - int dst_enc = dst->encoding(); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); int nds_enc = nds->is_valid() ? nds->encoding() : 0; - vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, - VEX_OPCODE_0F_38, /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq); + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x40); emit_operand(dst, src); } @@ -4738,29 +4835,29 @@ // Shift packed integers left by specified number of bits. void Assembler::psllw(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); // XMM6 is for /6 encoding: 66 0F 71 /6 ib - int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F, - /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x71); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::pslld(XMMRegister dst, int shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM6 is for /6 encoding: 66 0F 72 /6 ib - int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false); + int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x72); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::psllq(XMMRegister dst, int shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM6 is for /6 encoding: 66 0F 73 /6 ib - int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ true); + int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x73); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); @@ -4768,102 +4865,111 @@ void Assembler::psllw(XMMRegister dst, XMMRegister shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF1); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::pslld(XMMRegister dst, XMMRegister shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF2); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::psllq(XMMRegister dst, XMMRegister shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0xF3, dst, shift, VEX_SIMD_66); - } else { - emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF3); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); // XMM6 is for /6 encoding: 66 0F 71 /6 ib - emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x71); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM6 is for /6 encoding: 66 0F 72 /6 ib - emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector_len); + int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x72); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM6 is for /6 encoding: 66 0F 73 /6 ib - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len); - } + int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x73); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF1); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF2); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0xF3, dst, src, shift, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF3); + emit_int8((unsigned char)(0xC0 | encode)); } // Shift packed integers logically right by specified number of bits. void Assembler::psrlw(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); // XMM2 is for /2 encoding: 66 0F 71 /2 ib - int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x71); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::psrld(XMMRegister dst, int shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM2 is for /2 encoding: 66 0F 72 /2 ib - int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false); + int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x72); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::psrlq(XMMRegister dst, int shift) { - _instruction_uses_vl = true; // Do not confuse it with psrldq SSE2 instruction which // shifts 128 bit value in xmm register by number of bytes. NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM2 is for /2 encoding: 66 0F 73 /2 ib - int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F, /* rex_w */ VM_Version::supports_evex()); + int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x73); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); @@ -4871,89 +4977,98 @@ void Assembler::psrlw(XMMRegister dst, XMMRegister shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD1); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::psrld(XMMRegister dst, XMMRegister shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD2); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::psrlq(XMMRegister dst, XMMRegister shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0xD3, dst, shift, VEX_SIMD_66); - } else { - emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD3); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); // XMM2 is for /2 encoding: 66 0F 71 /2 ib - emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x71); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM2 is for /2 encoding: 66 0F 72 /2 ib - emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector_len); + int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x72); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM2 is for /2 encoding: 66 0F 73 /2 ib - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len); - } + int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x73); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD1); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD2); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0xD3, dst, src, shift, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD3); + emit_int8((unsigned char)(0xC0 | encode)); } // Shift packed integers arithmetically right by specified number of bits. void Assembler::psraw(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); // XMM4 is for /4 encoding: 66 0F 71 /4 ib - int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x71); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::psrad(XMMRegister dst, int shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM4 is for /4 encoding: 66 0F 72 /4 ib - int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false); + int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x72); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); @@ -4961,128 +5076,157 @@ void Assembler::psraw(XMMRegister dst, XMMRegister shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xE1); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::psrad(XMMRegister dst, XMMRegister shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xE2); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); // XMM4 is for /4 encoding: 66 0F 71 /4 ib - emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x71); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM4 is for /4 encoding: 66 0F 71 /4 ib - emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector_len); + int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x72); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xE1); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xE2); + emit_int8((unsigned char)(0xC0 | encode)); } // logical operations packed integers void Assembler::pand(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xDB, dst, src, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xDB); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xDB); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xDB); + emit_operand(dst, src); } void Assembler::pandn(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0xDF, dst, src, VEX_SIMD_66); - } - else { - emit_simd_arith(0xDF, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xDF); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::por(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xEB, dst, src, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEB); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEB); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEB); + emit_operand(dst, src); } void Assembler::pxor(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xEF, dst, src, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEF); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEF); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEF); + emit_operand(dst, src); } void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int vector_len = AVX_256bit; - if (VM_Version::supports_evex()) { - vector_len = AVX_512bit; - } - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x18); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 128 bits @@ -5090,45 +5234,38 @@ emit_int8(0x01); } -void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) { +void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - int vector_len = AVX_512bit; - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); + InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int nds_enc = nds->is_valid() ? nds->encoding() : 0; - int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x1A); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 256 bits // 0x01 - insert into upper 256 bits - emit_int8(0x01); + emit_int8(value & 0x01); } -void Assembler::vinsertf64x4h(XMMRegister dst, Address src) { +void Assembler::vinsertf64x4h(XMMRegister dst, Address src, int value) { assert(VM_Version::supports_evex(), ""); - _tuple_type = EVEX_T4; - _input_size_in_bits = EVEX_64bit; - InstructionMark im(this); - int vector_len = AVX_512bit; assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + InstructionMark im(this); + InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ true, vector_len); + vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x1A); emit_operand(dst, src); + // 0x00 - insert into lower 256 bits // 0x01 - insert into upper 128 bits - emit_int8(0x01); + emit_int8(value & 0x01); } void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - int vector_len = AVX_512bit; - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); + InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int nds_enc = nds->is_valid() ? nds->encoding() : 0; - int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x18); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into q0 128 bits (0..127) @@ -5139,15 +5276,14 @@ } void Assembler::vinsertf32x4h(XMMRegister dst, Address src, int value) { - assert(VM_Version::supports_evex(), ""); - _tuple_type = EVEX_T4; - _input_size_in_bits = EVEX_32bit; - InstructionMark im(this); - int vector_len = AVX_512bit; + assert(VM_Version::supports_avx(), ""); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); + vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x18); emit_operand(dst, src); // 0x00 - insert into q0 128 bits (0..127) @@ -5159,17 +5295,13 @@ void Assembler::vinsertf128h(XMMRegister dst, Address src) { assert(VM_Version::supports_avx(), ""); - int vector_len = AVX_256bit; - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T4; - _input_size_in_bits = EVEX_32bit; - vector_len = AVX_512bit; - } - InstructionMark im(this); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); + vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x18); emit_operand(dst, src); // 0x01 - insert into upper 128 bits @@ -5178,11 +5310,9 @@ void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int vector_len = AVX_256bit; - if (VM_Version::supports_evex()) { - vector_len = AVX_512bit; - } - int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 128 bits @@ -5192,16 +5322,12 @@ void Assembler::vextractf128h(Address dst, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int vector_len = AVX_256bit; - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T4; - _input_size_in_bits = EVEX_32bit; - vector_len = AVX_512bit; - } - InstructionMark im(this); assert(src != xnoreg, "sanity"); - int src_enc = src->encoding(); - vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_operand(src, dst); // 0x01 - extract from upper 128 bits @@ -5210,11 +5336,10 @@ void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx2(), ""); - int vector_len = AVX_256bit; - if (VM_Version::supports_evex()) { - vector_len = AVX_512bit; - } - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x38); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 128 bits @@ -5222,34 +5347,27 @@ emit_int8(0x01); } -void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) { +void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - int vector_len = AVX_512bit; - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); + InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int nds_enc = nds->is_valid() ? nds->encoding() : 0; - int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_reg_mask */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x38); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 256 bits // 0x01 - insert into upper 256 bits - emit_int8(0x01); + emit_int8(value & 0x01); } void Assembler::vinserti128h(XMMRegister dst, Address src) { assert(VM_Version::supports_avx2(), ""); - int vector_len = AVX_256bit; - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T4; - _input_size_in_bits = EVEX_32bit; - vector_len = AVX_512bit; - } - InstructionMark im(this); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); + vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x38); emit_operand(dst, src); // 0x01 - insert into upper 128 bits @@ -5258,11 +5376,9 @@ void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int vector_len = AVX_256bit; - if (VM_Version::supports_evex()) { - vector_len = AVX_512bit; - } - int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x39); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 128 bits @@ -5272,48 +5388,33 @@ void Assembler::vextracti128h(Address dst, XMMRegister src) { assert(VM_Version::supports_avx2(), ""); - int vector_len = AVX_256bit; - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T4; - _input_size_in_bits = EVEX_32bit; - vector_len = AVX_512bit; - } - InstructionMark im(this); assert(src != xnoreg, "sanity"); - int src_enc = src->encoding(); - vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x39); emit_operand(src, dst); // 0x01 - extract from upper 128 bits emit_int8(0x01); } -void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src) { +void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - int vector_len = AVX_512bit; - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); - int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x3B); emit_int8((unsigned char)(0xC0 | encode)); + // 0x00 - extract from lower 256 bits // 0x01 - extract from upper 256 bits - emit_int8(0x01); + emit_int8(value & 0x01); } void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - int vector_len = AVX_512bit; - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); - int encode; - if (VM_Version::supports_avx512dq()) { - encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); - } else { - encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ false, vector_len, /* legacy_mode */ true, /* no_mask_reg */ false); - } + InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x39); emit_int8((unsigned char)(0xC0 | encode)); // 0x01 - extract from bits 255:128 @@ -5322,42 +5423,36 @@ emit_int8(value & 0x3); } -void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src) { +void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - int vector_len = AVX_512bit; - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); - int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x1B); emit_int8((unsigned char)(0xC0 | encode)); + // 0x00 - extract from lower 256 bits // 0x01 - extract from upper 256 bits - emit_int8(0x01); + emit_int8(value & 0x1); } -void Assembler::vextractf64x4h(Address dst, XMMRegister src) { +void Assembler::vextractf64x4h(Address dst, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - _tuple_type = EVEX_T4; - _input_size_in_bits = EVEX_64bit; - InstructionMark im(this); - int vector_len = AVX_512bit; assert(src != xnoreg, "sanity"); - int src_enc = src->encoding(); - vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ true, vector_len); + InstructionMark im(this); + InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */ EVEX_64bit); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x1B); emit_operand(src, dst); + // 0x00 - extract from lower 256 bits // 0x01 - extract from upper 256 bits - emit_int8(0x01); + emit_int8(value & 0x01); } void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) { - assert(VM_Version::supports_evex(), ""); - int vector_len = AVX_512bit; - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); - int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + assert(VM_Version::supports_avx(), ""); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - extract from bits 127:0 @@ -5369,13 +5464,11 @@ void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - _tuple_type = EVEX_T4; - _input_size_in_bits = EVEX_32bit; - InstructionMark im(this); - int vector_len = AVX_512bit; assert(src != xnoreg, "sanity"); - int src_enc = src->encoding(); - vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); + InstructionMark im(this); + InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_operand(src, dst); // 0x00 - extract from bits 127:0 @@ -5387,11 +5480,8 @@ void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - int vector_len = AVX_512bit; - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); - int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ !_legacy_mode_dq, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_int8((unsigned char)(0xC0 | encode)); // 0x01 - extract from bits 255:128 @@ -5402,10 +5492,9 @@ // duplicate 4-bytes integer data from src into 8 locations in dest void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - int vector_len = AVX_256bit; - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38); + assert(VM_Version::supports_avx2(), ""); + InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x58); emit_int8((unsigned char)(0xC0 | encode)); } @@ -5413,189 +5502,170 @@ // duplicate 2-bytes integer data from src into 16 locations in dest void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_avx2(), ""); - bool vector_len = AVX_256bit; - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, - vector_len, VEX_OPCODE_0F_38, false); + InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x79); emit_int8((unsigned char)(0xC0 | encode)); } // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38); + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x78); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_8bit; - InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len); + vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x78); emit_operand(dst, src); } // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38); + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x79); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_16bit; - InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len); + vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x79); emit_operand(dst, src); } // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38); + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x58); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len); + vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x58); emit_operand(dst, src); } // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x59); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len); + vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x59); emit_operand(dst, src); } // duplicate single precision fp from src into 4|8|16 locations in dest : requires AVX512VL void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, - /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x18); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) { - assert(UseAVX > 1, ""); - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); // swap src<->dst for encoding - vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x18); emit_operand(dst, src); } // duplicate double precision fp from src into 2|4|8 locations in dest : requires AVX512VL void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, - /*vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x19); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); // swap src<->dst for encoding - vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x19); emit_operand(dst, src); } // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_evex(), ""); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, - /*vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x7A); emit_int8((unsigned char)(0xC0 | encode)); } // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_evex(), ""); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, - /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x7B); emit_int8((unsigned char)(0xC0 | encode)); } // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_evex(), ""); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, - /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x7C); emit_int8((unsigned char)(0xC0 | encode)); } // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_evex(), ""); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x7C); emit_int8((unsigned char)(0xC0 | encode)); } @@ -5603,8 +5673,8 @@ // Carry-Less Multiplication Quadword void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) { assert(VM_Version::supports_clmul(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x44); emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)mask); @@ -5613,8 +5683,9 @@ // Carry-Less Multiplication Quadword void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) { assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), ""); - int vector_len = AVX_128bit; - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x44); emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)mask); @@ -5622,11 +5693,9 @@ void Assembler::vzeroupper() { assert(VM_Version::supports_avx(), ""); - if (UseAVX < 3) - { - (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE); - emit_int8(0x77); - } + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + (void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x77); } @@ -6130,8 +6199,7 @@ if (pre > 0) { emit_int8(simd_pre[pre]); } - int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : - prefix_and_encode(dst_enc, src_enc); + int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc); if (opc > 0) { emit_int8(0x0F); int opc2 = simd_opc[opc]; @@ -6143,7 +6211,9 @@ } -void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, int vector_len) { +void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) { + int vector_len = _attributes->get_vector_len(); + bool vex_w = _attributes->is_rex_vex_w(); if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) { prefix(VEX_3bytes); @@ -6167,13 +6237,13 @@ } // This is a 4 byte encoding -void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v, - int nds_enc, VexSimdPrefix pre, VexOpcode opc, - bool is_extended_context, bool is_merge_context, - int vector_len, bool no_mask_reg ){ +void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){ // EVEX 0x62 prefix prefix(EVEX_4bytes); - _evex_encoding = (vex_w ? VEX_W : 0) | (evex_r ? EVEX_Rb : 0); + bool vex_w = _attributes->is_rex_vex_w(); + int evex_encoding = (vex_w ? VEX_W : 0); + // EVEX.b is not currently used for broadcast of single element or data rounding modes + _attributes->set_evex_encoding(evex_encoding); // P0: byte 2, initialized to RXBR`00mm // instead of not'd @@ -6195,214 +6265,127 @@ emit_int8(byte3); // P2: byte 4 as zL'Lbv'aaa - int byte4 = (no_mask_reg) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now) + int byte4 = (_attributes->is_no_reg_mask()) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now) // EVEX.v` for extending EVEX.vvvv or VIDX byte4 |= (evex_v ? 0: EVEX_V); // third EXEC.b for broadcast actions - byte4 |= (is_extended_context ? EVEX_Rb : 0); + byte4 |= (_attributes->is_extended_context() ? EVEX_Rb : 0); // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024 - byte4 |= ((vector_len) & 0x3) << 5; + byte4 |= ((_attributes->get_vector_len())& 0x3) << 5; // last is EVEX.z for zero/merge actions - byte4 |= (is_merge_context ? EVEX_Z : 0); + byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0); emit_int8(byte4); } -void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, - VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) { +void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) { bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0; bool vex_b = adr.base_needs_rex(); bool vex_x = adr.index_needs_rex(); - _avx_vector_len = vector_len; + set_attributes(attributes); + attributes->set_current_assembler(this); // if vector length is turned off, revert to AVX for vectors smaller than 512-bit - if (_legacy_mode_vl && _instruction_uses_vl) { - switch (vector_len) { + if ((UseAVX > 2) && _legacy_mode_vl && attributes->uses_vl()) { + switch (attributes->get_vector_len()) { case AVX_128bit: case AVX_256bit: - legacy_mode = true; + attributes->set_is_legacy_mode(); break; } } - if ((UseAVX > 2) && (legacy_mode == false)) + if ((UseAVX > 2) && !attributes->is_legacy_mode()) { bool evex_r = (xreg_enc >= 16); bool evex_v = (nds_enc >= 16); - _is_evex_instruction = true; - evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg); + attributes->set_is_evex_instruction(); + evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc); } else { - vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len); + vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc); } - _instruction_uses_vl = false; } -int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, - bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) { +int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) { bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0; bool vex_b = ((src_enc & 8) == 8) ? 1 : 0; bool vex_x = false; - _avx_vector_len = vector_len; + set_attributes(attributes); + attributes->set_current_assembler(this); // if vector length is turned off, revert to AVX for vectors smaller than 512-bit - if (_legacy_mode_vl && _instruction_uses_vl) { - switch (vector_len) { + if ((UseAVX > 2) && _legacy_mode_vl && attributes->uses_vl()) { + switch (attributes->get_vector_len()) { case AVX_128bit: case AVX_256bit: - legacy_mode = true; + if ((dst_enc >= 16) | (nds_enc >= 16) | (src_enc >= 16)) { + // up propagate arithmetic instructions to meet RA requirements + attributes->set_vector_len(AVX_512bit); + } else { + attributes->set_is_legacy_mode(); + } break; } } - if ((UseAVX > 2) && (legacy_mode == false)) + if ((UseAVX > 2) && !attributes->is_legacy_mode()) { bool evex_r = (dst_enc >= 16); bool evex_v = (nds_enc >= 16); // can use vex_x as bank extender on rm encoding vex_x = (src_enc >= 16); - evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg); + attributes->set_is_evex_instruction(); + evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc); } else { - vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len); + vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc); } - _instruction_uses_vl = false; - // return modrm byte components for operands return (((dst_enc & 7) << 3) | (src_enc & 7)); } void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, - bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) { + VexOpcode opc, InstructionAttr *attributes) { if (UseAVX > 0) { int xreg_enc = xreg->encoding(); - int nds_enc = nds->is_valid() ? nds->encoding() : 0; - vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(adr, nds_enc, xreg_enc, pre, opc, attributes); } else { assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding"); - rex_prefix(adr, xreg, pre, opc, rex_w); + rex_prefix(adr, xreg, pre, opc, attributes->is_rex_vex_w()); } } int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, - bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) { + VexOpcode opc, InstructionAttr *attributes) { int dst_enc = dst->encoding(); int src_enc = src->encoding(); if (UseAVX > 0) { int nds_enc = nds->is_valid() ? nds->encoding() : 0; - return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg); + return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes); } else { assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding"); - return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w); + return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w()); } } int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre, - bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) { - int dst_enc = dst->encoding(); - int src_enc = src->encoding(); + VexOpcode opc, InstructionAttr *attributes) { int nds_enc = nds->is_valid() ? nds->encoding() : 0; - return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg); + return vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), pre, opc, attributes); } int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre, - bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) { - int dst_enc = dst->encoding(); - int src_enc = src->encoding(); - int nds_enc = nds->is_valid() ? nds->encoding() : 0; - return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg); -} - -void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) { - InstructionMark im(this); - simd_prefix(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode); - emit_int8(opcode); - emit_operand(dst, src); -} - -void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg) { - InstructionMark im(this); - simd_prefix_q(dst, dst, src, pre, no_mask_reg); - emit_int8(opcode); - emit_operand(dst, src); -} - -void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) { - int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode); - emit_int8(opcode); - emit_int8((unsigned char)(0xC0 | encode)); -} - -void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) { - int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit); - emit_int8(opcode); - emit_int8((unsigned char)(0xC0 | encode)); -} - -// Versions with no second source register (non-destructive source). -void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) { - InstructionMark im(this); - simd_prefix(dst, xnoreg, src, pre, opNoRegMask); - emit_int8(opcode); - emit_operand(dst, src); -} - -void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) { - InstructionMark im(this); - simd_prefix_q(dst, xnoreg, src, pre, opNoRegMask); - emit_int8(opcode); - emit_operand(dst, src); -} - -void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) { - int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode); - emit_int8(opcode); - emit_int8((unsigned char)(0xC0 | encode)); -} - -void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) { - int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, true); - emit_int8(opcode); - emit_int8((unsigned char)(0xC0 | encode)); -} - -// 3-operands AVX instructions -void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, Address src, - VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) { - InstructionMark im(this); - vex_prefix(dst, nds, src, pre, vector_len, no_mask_reg, legacy_mode); - emit_int8(opcode); - emit_operand(dst, src); -} - -void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, - Address src, VexSimdPrefix pre, int vector_len, bool no_mask_reg) { - InstructionMark im(this); - vex_prefix_q(dst, nds, src, pre, vector_len, no_mask_reg); - emit_int8(opcode); - emit_operand(dst, src); -} - -void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, - VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) { - int encode = vex_prefix_and_encode(dst, nds, src, pre, vector_len, VEX_OPCODE_0F, legacy_mode, no_mask_reg); - emit_int8(opcode); - emit_int8((unsigned char)(0xC0 | encode)); -} - -void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, - VexSimdPrefix pre, int vector_len, bool no_mask_reg) { - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); + VexOpcode opc, InstructionAttr *attributes) { int nds_enc = nds->is_valid() ? nds->encoding() : 0; - int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg); - emit_int8(opcode); - emit_int8((unsigned char)(0xC0 | encode)); + return vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), pre, opc, attributes); } void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) { assert(VM_Version::supports_avx(), ""); assert(!VM_Version::supports_evex(), ""); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F, /* no_mask_reg */ false); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xC2); emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xF & cop)); @@ -6411,7 +6394,9 @@ void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) { assert(VM_Version::supports_avx(), ""); assert(!VM_Version::supports_evex(), ""); - int encode = vex_prefix_and_encode(dst, nds, src1, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A, /* no_mask_reg */ false); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8((unsigned char)0x4B); emit_int8((unsigned char)(0xC0 | encode)); int src2_enc = src2->encoding(); @@ -6430,7 +6415,7 @@ leal(dst, src); } -void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { +void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { InstructionMark im(this); emit_int8((unsigned char)0xC7); emit_operand(rax, dst); @@ -6948,15 +6933,17 @@ void Assembler::andnq(Register dst, Register src1, Register src2) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_q_legacy(dst, src1, src2); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF2); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::andnq(Register dst, Register src1, Address src2) { - InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_q_legacy(dst, src1, src2); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF2); emit_operand(dst, src2); } @@ -6983,45 +6970,51 @@ void Assembler::blsiq(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_q_legacy(rbx, dst, src); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::blsiq(Register dst, Address src) { - InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_q_legacy(rbx, dst, src); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_operand(rbx, src); } void Assembler::blsmskq(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_q_legacy(rdx, dst, src); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::blsmskq(Register dst, Address src) { - InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_q_legacy(rdx, dst, src); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_operand(rdx, src); } void Assembler::blsrq(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_q_legacy(rcx, dst, src); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::blsrq(Register dst, Address src) { - InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_q_legacy(rcx, dst, src); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_operand(rcx, src); } @@ -7095,45 +7088,44 @@ void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x2A); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } InstructionMark im(this); - simd_prefix_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x2A); emit_operand(dst, src); } void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } InstructionMark im(this); - simd_prefix_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x2A); emit_operand(dst, src); } void Assembler::cvttsd2siq(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x2C); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvttss2siq(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x2C); emit_int8((unsigned char)(0xC0 | encode)); } @@ -7316,7 +7308,8 @@ void Assembler::movdq(XMMRegister dst, Register src) { // table D-1 says MMX/SSE2 NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x6E); emit_int8((unsigned char)(0xC0 | encode)); } @@ -7324,8 +7317,9 @@ void Assembler::movdq(Register dst, XMMRegister src) { // table D-1 says MMX/SSE2 NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // swap src/dst to get correct prefix - int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66, /* no_mask_reg */ true); + int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x7E); emit_int8((unsigned char)(0xC0 | encode)); } @@ -7458,8 +7452,8 @@ void Assembler::mulxq(Register dst1, Register dst2, Register src) { assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported"); - int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, - /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF6); emit_int8((unsigned char)(0xC0 | encode)); } @@ -7621,8 +7615,8 @@ void Assembler::rorxq(Register dst, Register src, int imm8) { assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported"); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, - /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes); emit_int8((unsigned char)0xF0); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); --- old/src/cpu/x86/vm/assembler_x86.hpp 2015-11-07 22:07:08.372446700 -0800 +++ new/src/cpu/x86/vm/assembler_x86.hpp 2015-11-07 22:07:08.160446700 -0800 @@ -438,6 +438,8 @@ }; +class InstructionAttr; + // 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes // See fxsave and xsave(EVEX enabled) documentation for layout const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize); @@ -568,7 +570,8 @@ EVEX_8bit = 0, EVEX_16bit = 1, EVEX_32bit = 2, - EVEX_64bit = 3 + EVEX_64bit = 3, + EVEX_NObit = 4 }; enum WhichOperand { @@ -598,16 +601,12 @@ private: - int _evex_encoding; - int _input_size_in_bits; - int _avx_vector_len; - int _tuple_type; - bool _is_evex_instruction; bool _legacy_mode_bw; bool _legacy_mode_dq; bool _legacy_mode_vl; bool _legacy_mode_vlbw; - bool _instruction_uses_vl; + + class InstructionAttr *_attributes; // 64bit prefixes int prefix_and_encode(int reg_enc, bool byteinst = false); @@ -637,181 +636,30 @@ int rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w); - void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, - int nds_enc, VexSimdPrefix pre, VexOpcode opc, - int vector_len); - - void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v, - int nds_enc, VexSimdPrefix pre, VexOpcode opc, - bool is_extended_context, bool is_merge_context, - int vector_len, bool no_mask_reg ); + void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc); + + void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, + int nds_enc, VexSimdPrefix pre, VexOpcode opc); void vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, - bool vex_w, int vector_len, - bool legacy_mode = false, bool no_mask_reg = false); - - void vex_prefix(XMMRegister dst, XMMRegister nds, Address src, - VexSimdPrefix pre, int vector_len = AVX_128bit, - bool no_mask_reg = false, bool legacy_mode = false) { - int dst_enc = dst->encoding(); - int nds_enc = nds->is_valid() ? nds->encoding() : 0; - vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, legacy_mode, no_mask_reg); - } - - void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src, - VexSimdPrefix pre, int vector_len = AVX_128bit, - bool no_mask_reg = false) { - int dst_enc = dst->encoding(); - int nds_enc = nds->is_valid() ? nds->encoding() : 0; - vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg); - } - - void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) { - bool vex_w = false; - int vector_len = AVX_128bit; - vex_prefix(src, nds->encoding(), dst->encoding(), - VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, - vector_len, no_mask_reg); - } - - void vex_prefix_0F38_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) { - bool vex_w = false; - int vector_len = AVX_128bit; - vex_prefix(src, nds->encoding(), dst->encoding(), - VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, - vector_len, true, no_mask_reg); - } - - void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) { - bool vex_w = true; - int vector_len = AVX_128bit; - vex_prefix(src, nds->encoding(), dst->encoding(), - VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, - vector_len, no_mask_reg); - } - - void vex_prefix_0F38_q_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) { - bool vex_w = true; - int vector_len = AVX_128bit; - vex_prefix(src, nds->encoding(), dst->encoding(), - VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, - vector_len, true, no_mask_reg); - } + InstructionAttr *attributes); int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, - bool vex_w, int vector_len, - bool legacy_mode, bool no_mask_reg); - - int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) { - bool vex_w = false; - int vector_len = AVX_128bit; - return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), - VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len, - false, no_mask_reg); - } + InstructionAttr *attributes); - int vex_prefix_0F38_and_encode_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) { - bool vex_w = false; - int vector_len = AVX_128bit; - return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), - VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len, - true, no_mask_reg); - } - - int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) { - bool vex_w = true; - int vector_len = AVX_128bit; - return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), - VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len, - false, no_mask_reg); - } + void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, + VexOpcode opc, InstructionAttr *attributes); - int vex_prefix_0F38_and_encode_q_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) { - bool vex_w = true; - int vector_len = AVX_128bit; - return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), - VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len, - true, no_mask_reg); - } + int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, + VexOpcode opc, InstructionAttr *attributes); - int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, - VexSimdPrefix pre, int vector_len = AVX_128bit, - VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false, - bool no_mask_reg = false) { - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); - int nds_enc = nds->is_valid() ? nds->encoding() : 0; - return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg); - } + int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre, + VexOpcode opc, InstructionAttr *attributes); - void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, - VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F, - bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false); - - void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre, - bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) { - simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc); - } - - void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) { - simd_prefix(src, dst, pre, no_mask_reg); - } - void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src, - VexSimdPrefix pre, bool no_mask_reg = false) { - bool rex_w = true; - simd_prefix(dst, nds, src, pre, no_mask_reg, VEX_OPCODE_0F, rex_w); - } - - int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, - VexSimdPrefix pre, bool no_mask_reg, - VexOpcode opc = VEX_OPCODE_0F, - bool rex_w = false, int vector_len = AVX_128bit, - bool legacy_mode = false); - - int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, - VexSimdPrefix pre, bool no_mask_reg, - VexOpcode opc = VEX_OPCODE_0F, - bool rex_w = false, int vector_len = AVX_128bit); - - int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, - VexSimdPrefix pre, bool no_mask_reg, - VexOpcode opc = VEX_OPCODE_0F, - bool rex_w = false, int vector_len = AVX_128bit); - - // Move/convert 32-bit integer value. - int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src, - VexSimdPrefix pre, bool no_mask_reg) { - // It is OK to cast from Register to XMMRegister to pass argument here - // since only encoding is used in simd_prefix_and_encode() and number of - // Gen and Xmm registers are the same. - return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F); - } - int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) { - return simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg); - } - int simd_prefix_and_encode(Register dst, XMMRegister src, - VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, - bool no_mask_reg = false) { - return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc); - } - - // Move/convert 64-bit integer value. - int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src, - VexSimdPrefix pre, bool no_mask_reg = false) { - bool rex_w = true; - return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F, rex_w); - } - int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) { - return simd_prefix_and_encode_q(dst, xnoreg, src, pre, no_mask_reg); - } - int simd_prefix_and_encode_q(Register dst, XMMRegister src, - VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, - bool no_mask_reg = false) { - bool rex_w = true; - return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc, rex_w); - } + int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre, + VexOpcode opc, InstructionAttr *attributes); // Helper functions for groups of instructions void emit_arith_b(int op1, int op2, Register dst, int imm8); @@ -821,27 +669,6 @@ void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32); void emit_arith(int op1, int op2, Register dst, Register src); - void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false); - void emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false); - void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false); - void emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false); - void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false); - void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false); - void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false); - void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false); - void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, - Address src, VexSimdPrefix pre, int vector_len, - bool no_mask_reg = false, bool legacy_mode = false); - void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, - Address src, VexSimdPrefix pre, int vector_len, - bool no_mask_reg = false); - void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, - XMMRegister src, VexSimdPrefix pre, int vector_len, - bool no_mask_reg = false, bool legacy_mode = false); - void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, - XMMRegister src, VexSimdPrefix pre, int vector_len, - bool no_mask_reg = false); - bool emit_compressed_disp_byte(int &disp); void emit_operand(Register reg, @@ -986,18 +813,16 @@ // belong in macro assembler but there is no need for both varieties to exist void init_attributes(void) { - _evex_encoding = 0; - _input_size_in_bits = 0; - _avx_vector_len = AVX_NoVec; - _tuple_type = EVEX_ETUP; - _is_evex_instruction = false; _legacy_mode_bw = (VM_Version::supports_avx512bw() == false); _legacy_mode_dq = (VM_Version::supports_avx512dq() == false); _legacy_mode_vl = (VM_Version::supports_avx512vl() == false); _legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false); - _instruction_uses_vl = false; + _attributes = NULL; } + void set_attributes(InstructionAttr *attributes) { _attributes = attributes; } + void clear_attributes(void) { _attributes = NULL; } + void lea(Register dst, Address src); void mov(Register dst, Register src); @@ -2106,12 +1931,12 @@ void vextracti128h(Address dst, XMMRegister src); // Copy low 256bit into high 256bit of ZMM registers. - void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src); - void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src); - void vextracti64x4h(XMMRegister dst, XMMRegister src); - void vextractf64x4h(XMMRegister dst, XMMRegister src); - void vextractf64x4h(Address dst, XMMRegister src); - void vinsertf64x4h(XMMRegister dst, Address src); + void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value); + void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value); + void vextracti64x4h(XMMRegister dst, XMMRegister src, int value); + void vextractf64x4h(XMMRegister dst, XMMRegister src, int value); + void vextractf64x4h(Address dst, XMMRegister src, int value); + void vinsertf64x4h(XMMRegister dst, Address src, int value); // Copy targeted 128bit segments of the ZMM registers void vextracti64x2h(XMMRegister dst, XMMRegister src, int value); @@ -2173,4 +1998,95 @@ }; +// The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions. +// Specific set functions are for specialized use, else defaults or whatever was supplied to object construction +// are applied. +class InstructionAttr { +public: + InstructionAttr( + int vector_len, + bool rex_vex_w, + bool legacy_mode, + bool no_reg_mask, + bool uses_vl) + : + _avx_vector_len(vector_len), + _rex_vex_w(rex_vex_w), + _legacy_mode(legacy_mode), + _no_reg_mask(no_reg_mask), + _uses_vl(uses_vl), + _tuple_type(Assembler::EVEX_ETUP), + _input_size_in_bits(Assembler::EVEX_NObit), + _is_evex_instruction(false), + _evex_encoding(0), + _is_clear_context(false), + _is_extended_context(false), + _current_assembler(NULL) { + if (UseAVX < 3) _legacy_mode = true; + } + + ~InstructionAttr() { + if (_current_assembler != NULL) { + _current_assembler->clear_attributes(); + } + _current_assembler = NULL; + } + +private: + int _avx_vector_len; + bool _rex_vex_w; + bool _legacy_mode; + bool _no_reg_mask; + bool _uses_vl; + int _tuple_type; + int _input_size_in_bits; + bool _is_evex_instruction; + int _evex_encoding; + bool _is_clear_context; + bool _is_extended_context; + + Assembler *_current_assembler; + +public: + // query functions for field accessors + int get_vector_len(void) const { return _avx_vector_len; } + bool is_rex_vex_w(void) const { return _rex_vex_w; } + bool is_legacy_mode(void) const { return _legacy_mode; } + bool is_no_reg_mask(void) const { return _no_reg_mask; } + bool uses_vl(void) const { return _uses_vl; } + int get_tuple_type(void) const { return _tuple_type; } + int get_input_size(void) const { return _input_size_in_bits; } + int is_evex_instruction(void) const { return _is_evex_instruction; } + int get_evex_encoding(void) const { return _evex_encoding; } + bool is_clear_context(void) const { return _is_clear_context; } + bool is_extended_context(void) const { return _is_extended_context; } + + // Set the vector len manually + void set_vector_len(int vector_len) { _avx_vector_len = vector_len; } + + // Set the instruction to be encoded in AVX mode + void set_is_legacy_mode(void) { _legacy_mode = true; } + + // Set the current instuction to be encoded as an EVEX instuction + void set_is_evex_instruction(void) { _is_evex_instruction = true; } + + // Internal encoding data used in compressed immediate offset programming + void set_evex_encoding(int value) { _evex_encoding = value; } + + // Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components + void set_is_clear_context(void) { _is_clear_context = true; } + + // Map back to current asembler so that we can manage object level assocation + void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; } + + // Address modifiers used for compressed displacement calculation + void set_address_attributes(int tuple_type, int input_size_in_bits) { + if (VM_Version::supports_evex()) { + _tuple_type = tuple_type; + _input_size_in_bits = input_size_in_bits; + } + } + +}; + #endif // CPU_X86_VM_ASSEMBLER_X86_HPP --- old/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp 2015-11-07 22:07:09.824446700 -0800 +++ new/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp 2015-11-07 22:07:09.604446700 -0800 @@ -3714,7 +3714,7 @@ if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) { __ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg()); } - if (UseAVX > 1) { + if (UseAVX > 0) { __ vnegatess(dest->as_xmm_float_reg(), dest->as_xmm_float_reg(), ExternalAddress((address)float_signflip_pool)); } else { @@ -3725,7 +3725,7 @@ if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) { __ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg()); } - if (UseAVX > 1) { + if (UseAVX > 0) { __ vnegatesd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg(), ExternalAddress((address)double_signflip_pool)); } else { --- old/src/cpu/x86/vm/c2_globals_x86.hpp 2015-11-07 22:07:11.319446700 -0800 +++ new/src/cpu/x86/vm/c2_globals_x86.hpp 2015-11-07 22:07:11.105446700 -0800 @@ -84,6 +84,7 @@ define_pd_global(bool, OptoScheduling, false); define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoRegScheduling, true); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); define_pd_global(intx, ReservedCodeCacheSize, 48*M); define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); --- old/src/cpu/x86/vm/c2_init_x86.cpp 2015-11-07 22:07:12.670446700 -0800 +++ new/src/cpu/x86/vm/c2_init_x86.cpp 2015-11-07 22:07:12.453446700 -0800 @@ -58,6 +58,4 @@ OptoReg::invalidate(i); } } - - SuperWordLoopUnrollAnalysis = true; } --- old/src/cpu/x86/vm/macroAssembler_x86.cpp 2015-11-07 22:07:14.055446700 -0800 +++ new/src/cpu/x86/vm/macroAssembler_x86.cpp 2015-11-07 22:07:13.841446700 -0800 @@ -3651,12 +3651,71 @@ LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); } +void MacroAssembler::movdqu(Address dst, XMMRegister src) { + if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) { + Assembler::vextractf32x4h(dst, src, 0); + } else { + Assembler::movdqu(dst, src); + } +} + +void MacroAssembler::movdqu(XMMRegister dst, Address src) { + if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) { + Assembler::vinsertf32x4h(dst, src, 0); + } else { + Assembler::movdqu(dst, src); + } +} + +void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) { + if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { + Assembler::evmovdqul(dst, src, Assembler::AVX_512bit); + } else { + Assembler::movdqu(dst, src); + } +} + void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { - Assembler::movdqu(dst, as_Address(src)); + movdqu(dst, as_Address(src)); } else { lea(rscratch1, src); - Assembler::movdqu(dst, Address(rscratch1, 0)); + movdqu(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vmovdqu(Address dst, XMMRegister src) { + if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) { + Assembler::vextractf64x4h(dst, src, 0); + } else { + Assembler::vmovdqu(dst, src); + } +} + +void MacroAssembler::vmovdqu(XMMRegister dst, Address src) { + if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) { + Assembler::vinsertf64x4h(dst, src, 0); + } else { + Assembler::vmovdqu(dst, src); + } +} + +void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) { + if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { + Assembler::evmovdqul(dst, src, Assembler::AVX_512bit); + } + else { + Assembler::vmovdqu(dst, src); + } +} + +void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + vmovdqu(dst, as_Address(src)); + } + else { + lea(rscratch1, src); + vmovdqu(dst, Address(rscratch1, 0)); } } @@ -3726,6 +3785,10 @@ call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); } +#ifdef _LP64 +#define XSTATE_BV 0x200 +#endif + void MacroAssembler::pop_CPU_state() { pop_FPU_state(); pop_IU_state(); @@ -3735,27 +3798,7 @@ #ifndef _LP64 frstor(Address(rsp, 0)); #else - // AVX will continue to use the fxsave area. - // EVEX needs to utilize the xsave area, which is under different - // management. - if(VM_Version::supports_evex()) { - // EDX:EAX describe the XSAVE header and - // are obtained while fetching info for XCR0 via cpuid. - // These two registers make up 64-bits in the header for which bits - // 62:10 are currently reserved for future implementations and unused. Bit 63 - // is unused for our implementation as we do not utilize - // compressed XSAVE areas. Bits 9..8 are currently ignored as we do not use - // the functionality for PKRU state and MSR tracing. - // Ergo we are primarily concerned with bits 7..0, which define - // which ISA extensions and features are enabled for a given machine and are - // defined in XemXcr0Eax and is used to map the XSAVE area - // for restoring registers as described via XCR0. - movl(rdx,VM_Version::get_xsave_header_upper_segment()); - movl(rax,VM_Version::get_xsave_header_lower_segment()); - xrstor(Address(rsp, 0)); - } else { - fxrstor(Address(rsp, 0)); - } + fxrstor(Address(rsp, 0)); #endif addptr(rsp, FPUStateSizeInWords * wordSize); } @@ -3773,49 +3816,13 @@ push_FPU_state(); } -#ifdef _LP64 -#define XSTATE_BV 0x200 -#endif - void MacroAssembler::push_FPU_state() { subptr(rsp, FPUStateSizeInWords * wordSize); #ifndef _LP64 fnsave(Address(rsp, 0)); fwait(); #else - // AVX will continue to use the fxsave area. - // EVEX needs to utilize the xsave area, which is under different - // management. - if(VM_Version::supports_evex()) { - // Save a copy of EAX and EDX - push(rax); - push(rdx); - // EDX:EAX describe the XSAVE header and - // are obtained while fetching info for XCR0 via cpuid. - // These two registers make up 64-bits in the header for which bits - // 62:10 are currently reserved for future implementations and unused. Bit 63 - // is unused for our implementation as we do not utilize - // compressed XSAVE areas. Bits 9..8 are currently ignored as we do not use - // the functionality for PKRU state and MSR tracing. - // Ergo we are primarily concerned with bits 7..0, which define - // which ISA extensions and features are enabled for a given machine and are - // defined in XemXcr0Eax and is used to program XSAVE area - // for saving the required registers as defined in XCR0. - int xcr0_edx = VM_Version::get_xsave_header_upper_segment(); - int xcr0_eax = VM_Version::get_xsave_header_lower_segment(); - movl(rdx,xcr0_edx); - movl(rax,xcr0_eax); - xsave(Address(rsp, wordSize*2)); - // now Apply control bits and clear bytes 8..23 in the header - pop(rdx); - pop(rax); - movl(Address(rsp, XSTATE_BV), xcr0_eax); - movl(Address(rsp, XSTATE_BV+4), xcr0_edx); - andq(Address(rsp, XSTATE_BV+8), 0); - andq(Address(rsp, XSTATE_BV+16), 0); - } else { - fxsave(Address(rsp, 0)); - } + fxsave(Address(rsp, 0)); #endif // LP64 } @@ -4007,6 +4014,23 @@ } } +void MacroAssembler::xorpd(XMMRegister dst, XMMRegister src) { + if (UseAVX > 2 && !VM_Version::supports_avx512dq() && (dst->encoding() == src->encoding())) { + Assembler::vpxor(dst, dst, src, Assembler::AVX_512bit); + } + else { + Assembler::xorpd(dst, src); + } +} + +void MacroAssembler::xorps(XMMRegister dst, XMMRegister src) { + if (UseAVX > 2 && !VM_Version::supports_avx512dq() && (dst->encoding() == src->encoding())) { + Assembler::vpxor(dst, dst, src, Assembler::AVX_512bit); + } else { + Assembler::xorps(dst, src); + } +} + void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { // Used in sign-bit flipping with aligned address. assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); @@ -4050,6 +4074,682 @@ } } +void MacroAssembler::vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int src_enc = src->encoding(); + if ((dst_enc < 16) && (nds_enc < 16)) { + vandps(dst, nds, negate_field, vector_len); + } else if ((src_enc < 16) && (dst_enc < 16)) { + movss(src, nds); + vandps(dst, src, negate_field, vector_len); + } else if (src_enc < 16) { + movss(src, nds); + vandps(src, src, negate_field, vector_len); + movss(dst, src); + } else if (dst_enc < 16) { + movdqu(src, xmm0); + movss(xmm0, nds); + vandps(dst, xmm0, negate_field, vector_len); + movdqu(xmm0, src); + } else if (nds_enc < 16) { + movdqu(src, xmm0); + vandps(xmm0, nds, negate_field, vector_len); + movss(dst, xmm0); + movdqu(xmm0, src); + } else { + movdqu(src, xmm0); + movss(xmm0, nds); + vandps(xmm0, xmm0, negate_field, vector_len); + movss(dst, xmm0); + movdqu(xmm0, src); + } +} + +void MacroAssembler::vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int src_enc = src->encoding(); + if ((dst_enc < 16) && (nds_enc < 16)) { + vandpd(dst, nds, negate_field, vector_len); + } else if ((src_enc < 16) && (dst_enc < 16)) { + movsd(src, nds); + vandpd(dst, src, negate_field, vector_len); + } else if (src_enc < 16) { + movsd(src, nds); + vandpd(src, src, negate_field, vector_len); + movsd(dst, src); + } else if (dst_enc < 16) { + movdqu(src, xmm0); + movsd(xmm0, nds); + vandpd(dst, xmm0, negate_field, vector_len); + movdqu(xmm0, src); + } else if (nds_enc < 16) { + movdqu(src, xmm0); + vandpd(xmm0, nds, negate_field, vector_len); + movsd(dst, xmm0); + movdqu(xmm0, src); + } else { + movdqu(src, xmm0); + movsd(xmm0, nds); + vandpd(xmm0, xmm0, negate_field, vector_len); + movsd(dst, xmm0); + movdqu(xmm0, src); + } +} + +void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int src_enc = src->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpaddb(dst, nds, src, vector_len); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::vpaddb(dst, dst, src, vector_len); + } else if ((dst_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for src + evmovdqul(nds, src, Assembler::AVX_512bit); + Assembler::vpaddb(dst, dst, nds, vector_len); + } else if ((src_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for dst + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpaddb(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else if (dst_enc < 16) { + // use nds as scatch for xmm0 to hold src + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::vpaddb(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs are in the upper bank + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, src, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpaddb(xmm0, xmm0, xmm1, vector_len); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpaddb(dst, nds, src, vector_len); + } else if (dst_enc < 16) { + Assembler::vpaddb(dst, dst, src, vector_len); + } else if (nds_enc < 16) { + // implies dst_enc in upper bank with src as scratch + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpaddb(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs in upper bank + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpaddb(xmm0, xmm0, src, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } +} + +void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int src_enc = src->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpaddw(dst, nds, src, vector_len); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::vpaddw(dst, dst, src, vector_len); + } else if ((dst_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for src + evmovdqul(nds, src, Assembler::AVX_512bit); + Assembler::vpaddw(dst, dst, nds, vector_len); + } else if ((src_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for dst + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpaddw(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else if (dst_enc < 16) { + // use nds as scatch for xmm0 to hold src + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::vpaddw(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs are in the upper bank + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, src, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpaddw(xmm0, xmm0, xmm1, vector_len); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpaddw(dst, nds, src, vector_len); + } else if (dst_enc < 16) { + Assembler::vpaddw(dst, dst, src, vector_len); + } else if (nds_enc < 16) { + // implies dst_enc in upper bank with src as scratch + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpaddw(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs in upper bank + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpaddw(xmm0, xmm0, src, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } +} + +void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int src_enc = src->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsubb(dst, nds, src, vector_len); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::vpsubb(dst, dst, src, vector_len); + } else if ((dst_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for src + evmovdqul(nds, src, Assembler::AVX_512bit); + Assembler::vpsubb(dst, dst, nds, vector_len); + } else if ((src_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for dst + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsubb(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else if (dst_enc < 16) { + // use nds as scatch for xmm0 to hold src + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::vpsubb(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs are in the upper bank + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, src, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsubb(xmm0, xmm0, xmm1, vector_len); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsubb(dst, nds, src, vector_len); + } else if (dst_enc < 16) { + Assembler::vpsubb(dst, dst, src, vector_len); + } else if (nds_enc < 16) { + // implies dst_enc in upper bank with src as scratch + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsubb(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs in upper bank + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsubw(xmm0, xmm0, src, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } +} + +void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int src_enc = src->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsubw(dst, nds, src, vector_len); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::vpsubw(dst, dst, src, vector_len); + } else if ((dst_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for src + evmovdqul(nds, src, Assembler::AVX_512bit); + Assembler::vpsubw(dst, dst, nds, vector_len); + } else if ((src_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for dst + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsubw(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else if (dst_enc < 16) { + // use nds as scatch for xmm0 to hold src + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::vpsubw(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs are in the upper bank + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, src, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsubw(xmm0, xmm0, xmm1, vector_len); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsubw(dst, nds, src, vector_len); + } else if (dst_enc < 16) { + Assembler::vpsubw(dst, dst, src, vector_len); + } else if (nds_enc < 16) { + // implies dst_enc in upper bank with src as scratch + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsubw(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs in upper bank + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsubw(xmm0, xmm0, src, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } +} + + +void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int src_enc = src->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpmullw(dst, nds, src, vector_len); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::vpmullw(dst, dst, src, vector_len); + } else if ((dst_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for src + evmovdqul(nds, src, Assembler::AVX_512bit); + Assembler::vpmullw(dst, dst, nds, vector_len); + } else if ((src_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for dst + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpmullw(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else if (dst_enc < 16) { + // use nds as scatch for xmm0 to hold src + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::vpmullw(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs are in the upper bank + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, src, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpmullw(dst, nds, src, vector_len); + } else if (dst_enc < 16) { + Assembler::vpmullw(dst, dst, src, vector_len); + } else if (nds_enc < 16) { + // implies dst_enc in upper bank with src as scratch + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpmullw(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs in upper bank + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpmullw(xmm0, xmm0, src, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } +} + +void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int shift_enc = shift->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsraw(dst, nds, shift, vector_len); + } else if ((dst_enc < 16) && (shift_enc < 16)) { + Assembler::vpsraw(dst, dst, shift, vector_len); + } else if ((dst_enc < 16) && (nds_enc < 16)) { + // use nds_enc as scratch with shift + evmovdqul(nds, shift, Assembler::AVX_512bit); + Assembler::vpsraw(dst, dst, nds, vector_len); + } else if ((shift_enc < 16) && (nds_enc < 16)) { + // use nds as scratch with dst + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsraw(nds, nds, shift, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else if (dst_enc < 16) { + // use nds to save a copy of xmm0 and hold shift + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, shift, Assembler::AVX_512bit); + Assembler::vpsraw(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } else if (nds_enc < 16) { + // use nds as dest as temps + evmovdqul(nds, dst, Assembler::AVX_512bit); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, shift, Assembler::AVX_512bit); + Assembler::vpsraw(nds, nds, xmm0, vector_len); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs are in the upper bank + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, shift, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len); + evmovdqul(xmm1, dst, Assembler::AVX_512bit); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsraw(dst, nds, shift, vector_len); + } else if (dst_enc < 16) { + Assembler::vpsraw(dst, dst, shift, vector_len); + } else if (nds_enc < 16) { + // use nds as scratch + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsraw(nds, nds, shift, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // use nds as scratch for xmm0 + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsraw(xmm0, xmm0, shift, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } +} + +void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int shift_enc = shift->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsrlw(dst, nds, shift, vector_len); + } else if ((dst_enc < 16) && (shift_enc < 16)) { + Assembler::vpsrlw(dst, dst, shift, vector_len); + } else if ((dst_enc < 16) && (nds_enc < 16)) { + // use nds_enc as scratch with shift + evmovdqul(nds, shift, Assembler::AVX_512bit); + Assembler::vpsrlw(dst, dst, nds, vector_len); + } else if ((shift_enc < 16) && (nds_enc < 16)) { + // use nds as scratch with dst + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsrlw(nds, nds, shift, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else if (dst_enc < 16) { + // use nds to save a copy of xmm0 and hold shift + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, shift, Assembler::AVX_512bit); + Assembler::vpsrlw(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } else if (nds_enc < 16) { + // use nds as dest as temps + evmovdqul(nds, dst, Assembler::AVX_512bit); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, shift, Assembler::AVX_512bit); + Assembler::vpsrlw(nds, nds, xmm0, vector_len); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs are in the upper bank + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, shift, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len); + evmovdqul(xmm1, dst, Assembler::AVX_512bit); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsrlw(dst, nds, shift, vector_len); + } else if (dst_enc < 16) { + Assembler::vpsrlw(dst, dst, shift, vector_len); + } else if (nds_enc < 16) { + // use nds as scratch + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsrlw(nds, nds, shift, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // use nds as scratch for xmm0 + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsrlw(xmm0, xmm0, shift, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } +} + +void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int shift_enc = shift->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsllw(dst, nds, shift, vector_len); + } else if ((dst_enc < 16) && (shift_enc < 16)) { + Assembler::vpsllw(dst, dst, shift, vector_len); + } else if ((dst_enc < 16) && (nds_enc < 16)) { + // use nds_enc as scratch with shift + evmovdqul(nds, shift, Assembler::AVX_512bit); + Assembler::vpsllw(dst, dst, nds, vector_len); + } else if ((shift_enc < 16) && (nds_enc < 16)) { + // use nds as scratch with dst + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsllw(nds, nds, shift, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else if (dst_enc < 16) { + // use nds to save a copy of xmm0 and hold shift + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, shift, Assembler::AVX_512bit); + Assembler::vpsllw(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } else if (nds_enc < 16) { + // use nds as dest as temps + evmovdqul(nds, dst, Assembler::AVX_512bit); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, shift, Assembler::AVX_512bit); + Assembler::vpsllw(nds, nds, xmm0, vector_len); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs are in the upper bank + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, shift, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len); + evmovdqul(xmm1, dst, Assembler::AVX_512bit); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsllw(dst, nds, shift, vector_len); + } else if (dst_enc < 16) { + Assembler::vpsllw(dst, dst, shift, vector_len); + } else if (nds_enc < 16) { + // use nds as scratch + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsllw(nds, nds, shift, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // use nds as scratch for xmm0 + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsllw(xmm0, xmm0, shift, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } +} + +// This instruction exists within macros, ergo we cannot control its input +// when emitted through those patterns. +void MacroAssembler::punpcklbw(XMMRegister dst, XMMRegister src) { + if (VM_Version::supports_avx512nobw()) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if (dst_enc == src_enc) { + if (dst_enc < 16) { + Assembler::punpcklbw(dst, src); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::punpcklbw(xmm0, xmm0); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } else { + if ((src_enc < 16) && (dst_enc < 16)) { + Assembler::punpcklbw(dst, src); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::punpcklbw(xmm0, src); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::punpcklbw(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + evmovdqul(xmm1, src, Assembler::AVX_512bit); + Assembler::punpcklbw(xmm0, xmm1); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } + } else { + Assembler::punpcklbw(dst, src); + } +} + +// This instruction exists within macros, ergo we cannot control its input +// when emitted through those patterns. +void MacroAssembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { + if (VM_Version::supports_avx512nobw()) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if (dst_enc == src_enc) { + if (dst_enc < 16) { + Assembler::pshuflw(dst, src, mode); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::pshuflw(xmm0, xmm0, mode); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } else { + if ((src_enc < 16) && (dst_enc < 16)) { + Assembler::pshuflw(dst, src, mode); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::pshuflw(xmm0, src, mode); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::pshuflw(dst, xmm0, mode); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + evmovdqul(xmm1, src, Assembler::AVX_512bit); + Assembler::pshuflw(xmm0, xmm1, mode); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } + } else { + Assembler::pshuflw(dst, src, mode); + } +} + void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { if (reachable(src)) { vandpd(dst, nds, as_Address(src), vector_len); @@ -4133,31 +4833,16 @@ subptr(rsp, 64); evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); movflt(xmm0, nds); - if (reachable(src)) { - vxorps(xmm0, xmm0, as_Address(src), Assembler::AVX_128bit); - } else { - lea(rscratch1, src); - vxorps(xmm0, xmm0, Address(rscratch1, 0), Assembler::AVX_128bit); - } + vxorps(xmm0, xmm0, src, Assembler::AVX_128bit); movflt(dst, xmm0); evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); addptr(rsp, 64); } else { movflt(dst, nds); - if (reachable(src)) { - vxorps(dst, dst, as_Address(src), Assembler::AVX_128bit); - } else { - lea(rscratch1, src); - vxorps(dst, dst, Address(rscratch1, 0), Assembler::AVX_128bit); - } + vxorps(dst, dst, src, Assembler::AVX_128bit); } } else { - if (reachable(src)) { - vxorps(dst, nds, as_Address(src), Assembler::AVX_128bit); - } else { - lea(rscratch1, src); - vxorps(dst, nds, Address(rscratch1, 0), Assembler::AVX_128bit); - } + vxorps(dst, nds, src, Assembler::AVX_128bit); } } @@ -4172,31 +4857,16 @@ subptr(rsp, 64); evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); movdbl(xmm0, nds); - if (reachable(src)) { - vxorps(xmm0, xmm0, as_Address(src), Assembler::AVX_128bit); - } else { - lea(rscratch1, src); - vxorps(xmm0, xmm0, Address(rscratch1, 0), Assembler::AVX_128bit); - } + vxorpd(xmm0, xmm0, src, Assembler::AVX_128bit); movdbl(dst, xmm0); evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); addptr(rsp, 64); } else { movdbl(dst, nds); - if (reachable(src)) { - vxorps(dst, dst, as_Address(src), Assembler::AVX_128bit); - } else { - lea(rscratch1, src); - vxorps(dst, dst, Address(rscratch1, 0), Assembler::AVX_128bit); - } + vxorpd(dst, dst, src, Assembler::AVX_128bit); } } else { - if (reachable(src)) { - vxorpd(dst, nds, as_Address(src), Assembler::AVX_128bit); - } else { - lea(rscratch1, src); - vxorpd(dst, nds, Address(rscratch1, 0), Assembler::AVX_128bit); - } + vxorpd(dst, nds, src, Assembler::AVX_128bit); } } @@ -4688,7 +5358,6 @@ pusha(); // if we are coming from c1, xmm registers may be live - int off = 0; int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8); if (UseAVX > 2) { num_xmm_regs = LP64_ONLY(32) NOT_LP64(8); @@ -4697,7 +5366,7 @@ if (UseSSE == 1) { subptr(rsp, sizeof(jdouble)*8); for (int n = 0; n < 8; n++) { - movflt(Address(rsp, off++*sizeof(jdouble)), as_XMMRegister(n)); + movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n)); } } else if (UseSSE >= 2) { if (UseAVX > 2) { @@ -4709,37 +5378,35 @@ #ifdef COMPILER2 if (MaxVectorSize > 16) { if(UseAVX > 2) { - // Save upper half of ZMM registes + // Save upper half of ZMM registers subptr(rsp, 32*num_xmm_regs); for (int n = 0; n < num_xmm_regs; n++) { - vextractf64x4h(Address(rsp, off++*32), as_XMMRegister(n)); + vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1); } - off = 0; } assert(UseAVX > 0, "256 bit vectors are supported only with AVX"); - // Save upper half of YMM registes + // Save upper half of YMM registers subptr(rsp, 16*num_xmm_regs); for (int n = 0; n < num_xmm_regs; n++) { - vextractf128h(Address(rsp, off++*16), as_XMMRegister(n)); + vextractf128h(Address(rsp, n*16), as_XMMRegister(n)); } } #endif // Save whole 128bit (16 bytes) XMM registers subptr(rsp, 16*num_xmm_regs); - off = 0; #ifdef _LP64 - if (VM_Version::supports_avx512novl()) { + if (VM_Version::supports_evex()) { for (int n = 0; n < num_xmm_regs; n++) { - vextractf32x4h(Address(rsp, off++*16), as_XMMRegister(n), 0); + vextractf32x4h(Address(rsp, n*16), as_XMMRegister(n), 0); } } else { for (int n = 0; n < num_xmm_regs; n++) { - movdqu(Address(rsp, off++*16), as_XMMRegister(n)); + movdqu(Address(rsp, n*16), as_XMMRegister(n)); } } #else for (int n = 0; n < num_xmm_regs; n++) { - movdqu(Address(rsp, off++*16), as_XMMRegister(n)); + movdqu(Address(rsp, n*16), as_XMMRegister(n)); } #endif } @@ -4808,44 +5475,40 @@ addptr(rsp, sizeof(jdouble)*nb_args); } - off = 0; if (UseSSE == 1) { for (int n = 0; n < 8; n++) { - movflt(as_XMMRegister(n), Address(rsp, off++*sizeof(jdouble))); + movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble))); } addptr(rsp, sizeof(jdouble)*8); } else if (UseSSE >= 2) { - // Restore whole 128bit (16 bytes) XMM regiters + // Restore whole 128bit (16 bytes) XMM registers #ifdef _LP64 - if (VM_Version::supports_avx512novl()) { - for (int n = 0; n < num_xmm_regs; n++) { - vinsertf32x4h(as_XMMRegister(n), Address(rsp, off++*16), 0); - } - } - else { - for (int n = 0; n < num_xmm_regs; n++) { - movdqu(as_XMMRegister(n), Address(rsp, off++*16)); - } + if (VM_Version::supports_evex()) { + for (int n = 0; n < num_xmm_regs; n++) { + vinsertf32x4h(as_XMMRegister(n), Address(rsp, n*16), 0); } -#else + } else { for (int n = 0; n < num_xmm_regs; n++) { - movdqu(as_XMMRegister(n), Address(rsp, off++ * 16)); + movdqu(as_XMMRegister(n), Address(rsp, n*16)); } + } +#else + for (int n = 0; n < num_xmm_regs; n++) { + movdqu(as_XMMRegister(n), Address(rsp, n*16)); + } #endif addptr(rsp, 16*num_xmm_regs); #ifdef COMPILER2 if (MaxVectorSize > 16) { - // Restore upper half of YMM registes. - off = 0; + // Restore upper half of YMM registers. for (int n = 0; n < num_xmm_regs; n++) { - vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16)); + vinsertf128h(as_XMMRegister(n), Address(rsp, n*16)); } addptr(rsp, 16*num_xmm_regs); if(UseAVX > 2) { - off = 0; for (int n = 0; n < num_xmm_regs; n++) { - vinsertf64x4h(as_XMMRegister(n), Address(rsp, off++*32)); + vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1); } addptr(rsp, 32*num_xmm_regs); } @@ -6831,7 +7494,7 @@ bind(SCAN_TO_16_CHAR_LOOP); vmovdqu(vec3, Address(result, 0)); - vpcmpeqw(vec3, vec3, vec1, true); + vpcmpeqw(vec3, vec3, vec1, 1); vptest(vec2, vec3); jcc(Assembler::carryClear, FOUND_CHAR); addptr(result, 32); @@ -7671,7 +8334,7 @@ BIND(L_check_fill_32_bytes); addl(count, 8 << shift); jccb(Assembler::less, L_check_fill_8_bytes); - evmovdqul(Address(to, 0), xtmp, Assembler::AVX_256bit); + vmovdqu(Address(to, 0), xtmp); addptr(to, 32); subl(count, 8 << shift); --- old/src/cpu/x86/vm/macroAssembler_x86.hpp 2015-11-07 22:07:15.794446700 -0800 +++ new/src/cpu/x86/vm/macroAssembler_x86.hpp 2015-11-07 22:07:15.583446700 -0800 @@ -962,10 +962,15 @@ void divss(XMMRegister dst, AddressLiteral src); // Move Unaligned Double Quadword - void movdqu(Address dst, XMMRegister src) { Assembler::movdqu(dst, src); } - void movdqu(XMMRegister dst, Address src) { Assembler::movdqu(dst, src); } - void movdqu(XMMRegister dst, XMMRegister src) { Assembler::movdqu(dst, src); } + void movdqu(Address dst, XMMRegister src); + void movdqu(XMMRegister dst, Address src); + void movdqu(XMMRegister dst, XMMRegister src); void movdqu(XMMRegister dst, AddressLiteral src); + // AVX Unaligned forms + void vmovdqu(Address dst, XMMRegister src); + void vmovdqu(XMMRegister dst, Address src); + void vmovdqu(XMMRegister dst, XMMRegister src); + void vmovdqu(XMMRegister dst, AddressLiteral src); // Move Aligned Double Quadword void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); } @@ -1024,12 +1029,12 @@ void ucomisd(XMMRegister dst, AddressLiteral src); // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values - void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); } + void xorpd(XMMRegister dst, XMMRegister src); void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); } void xorpd(XMMRegister dst, AddressLiteral src); // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values - void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); } + void xorps(XMMRegister dst, XMMRegister src); void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } void xorps(XMMRegister dst, AddressLiteral src); @@ -1047,6 +1052,39 @@ void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src); + void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); + void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); + + void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + + void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + + void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + + void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + + void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + + void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); + void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); + + void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); + void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); + + void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); + void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); + + void punpcklbw(XMMRegister dst, XMMRegister src); + void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); } + + void pshuflw(XMMRegister dst, XMMRegister src, int mode); + void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); } + void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len); --- old/src/cpu/x86/vm/sharedRuntime_x86_32.cpp 2015-11-07 22:07:17.225446700 -0800 +++ new/src/cpu/x86/vm/sharedRuntime_x86_32.cpp 2015-11-07 22:07:17.013446700 -0800 @@ -192,31 +192,22 @@ } } else if(UseSSE >= 2) { // Save whole 128bit (16 bytes) XMM regiters - if (VM_Version::supports_avx512novl()) { - for (int n = 0; n < num_xmm_regs; n++) { - __ vextractf32x4h(Address(rsp, off*wordSize), as_XMMRegister(n), 0); - off += delta; - } - } else { - for (int n = 0; n < num_xmm_regs; n++) { - __ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n)); - off += delta; - } + for (int n = 0; n < num_xmm_regs; n++) { + __ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n)); + off += delta; } } if (vect_words > 0) { assert(vect_words*wordSize == 128, ""); __ subptr(rsp, 128); // Save upper half of YMM registes - off = 0; for (int n = 0; n < num_xmm_regs; n++) { - __ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n)); + __ vextractf128h(Address(rsp, n*16), as_XMMRegister(n)); } if (UseAVX > 2) { __ subptr(rsp, 256); // Save upper half of ZMM registes - off = 0; for (int n = 0; n < num_xmm_regs; n++) { - __ vextractf64x4h(Address(rsp, off++*32), as_XMMRegister(n)); + __ vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1); } } } @@ -285,31 +276,23 @@ off += delta; } } else if (UseSSE >= 2) { - if (VM_Version::supports_avx512novl()) { - for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf32x4h(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes), 0); - off += delta; - } - } else { - for (int n = 0; n < num_xmm_regs; n++) { - __ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes)); - off += delta; - } + for (int n = 0; n < num_xmm_regs; n++) { + __ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes)); + off += delta; } } if (restore_vectors) { + assert(additional_frame_bytes == 128, ""); if (UseAVX > 2) { - off = 0; + // Restore upper half of ZMM registers. for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, off++*32)); + __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1); } __ addptr(rsp, additional_frame_bytes*2); // Save upper half of ZMM registes } // Restore upper half of YMM registes. - assert(additional_frame_bytes == 128, ""); - off = 0; for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16)); + __ vinsertf128h(as_XMMRegister(n), Address(rsp, n*16)); } __ addptr(rsp, additional_frame_bytes); // Save upper half of YMM registes } --- old/src/cpu/x86/vm/sharedRuntime_x86_64.cpp 2015-11-07 22:07:18.688446700 -0800 +++ new/src/cpu/x86/vm/sharedRuntime_x86_64.cpp 2015-11-07 22:07:18.474446700 -0800 @@ -72,45 +72,28 @@ class RegisterSaver { // Capture info about frame layout. Layout offsets are in jint // units because compiler frame slots are jints. -#define HALF_ZMM_BANK_WORDS 128 +#define XSAVE_AREA_BEGIN 160 +#define XSAVE_AREA_YMM_BEGIN 576 +#define XSAVE_AREA_ZMM_BEGIN 1152 +#define XSAVE_AREA_UPPERBANK 1664 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off +#define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off enum layout { fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area - xmm_off = fpu_state_off + 160/BytesPerInt, // offset in fxsave save area + xmm_off = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt, // offset in fxsave save area DEF_XMM_OFFS(0), DEF_XMM_OFFS(1), - DEF_XMM_OFFS(2), - DEF_XMM_OFFS(3), - DEF_XMM_OFFS(4), - DEF_XMM_OFFS(5), - DEF_XMM_OFFS(6), - DEF_XMM_OFFS(7), - DEF_XMM_OFFS(8), - DEF_XMM_OFFS(9), - DEF_XMM_OFFS(10), - DEF_XMM_OFFS(11), - DEF_XMM_OFFS(12), - DEF_XMM_OFFS(13), - DEF_XMM_OFFS(14), - DEF_XMM_OFFS(15), - zmm_off = fpu_state_off + ((FPUStateSizeInWords - (HALF_ZMM_BANK_WORDS + 1))*wordSize / BytesPerInt), + // 2..15 are implied in range usage + ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, + DEF_YMM_OFFS(0), + DEF_YMM_OFFS(1), + // 2..15 are implied in range usage + zmm_high = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, + zmm_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt, DEF_ZMM_OFFS(16), DEF_ZMM_OFFS(17), - DEF_ZMM_OFFS(18), - DEF_ZMM_OFFS(19), - DEF_ZMM_OFFS(20), - DEF_ZMM_OFFS(21), - DEF_ZMM_OFFS(22), - DEF_ZMM_OFFS(23), - DEF_ZMM_OFFS(24), - DEF_ZMM_OFFS(25), - DEF_ZMM_OFFS(26), - DEF_ZMM_OFFS(27), - DEF_ZMM_OFFS(28), - DEF_ZMM_OFFS(29), - DEF_ZMM_OFFS(30), - DEF_ZMM_OFFS(31), + // 18..31 are implied in range usage fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), fpu_stateH_end, r15_off, r15H_off, @@ -160,8 +143,6 @@ }; OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { - int vect_words = 0; - int ymmhi_offset = -1; int off = 0; int num_xmm_regs = XMMRegisterImpl::number_of_registers; if (UseAVX < 3) { @@ -171,24 +152,15 @@ if (save_vectors) { assert(UseAVX > 0, "512bit vectors are supported only with EVEX"); assert(MaxVectorSize == 64, "only 512bit vectors are supported now"); - // Save upper half of YMM registers - vect_words = 16 * num_xmm_regs / wordSize; - if (UseAVX < 3) { - ymmhi_offset = additional_frame_words; - additional_frame_words += vect_words; - } } #else assert(!save_vectors, "vectors are generated only by C2 and JVMCI"); #endif - // Always make the frame size 16-byte aligned - int frame_size_in_bytes = round_to(additional_frame_words*wordSize + - reg_save_size*BytesPerInt, num_xmm_regs); + // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated + int frame_size_in_bytes = round_to(reg_save_size*BytesPerInt, num_xmm_regs); // OopMap frame size is in compiler stack slots (jint's) not bytes or words int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; - // The caller will allocate additional_frame_words - int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; // CodeBlob frame size is in words. int frame_size_in_words = frame_size_in_bytes / wordSize; *total_frame_words = frame_size_in_words; @@ -203,12 +175,34 @@ __ push_CPU_state(); // Push a multiple of 16 bytes // push cpu state handles this on EVEX enabled targets - if ((vect_words > 0) && (UseAVX < 3)) { - assert(vect_words*wordSize >= 256, ""); - // Save upper half of YMM registes(0..num_xmm_regs) - __ subptr(rsp, num_xmm_regs*16); - for (int n = 0; n < num_xmm_regs; n++) { - __ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n)); + if (save_vectors) { + // Save upper half of YMM registes(0..15) + int base_addr = XSAVE_AREA_YMM_BEGIN; + for (int n = 0; n < 16; n++) { + __ vextractf128h(Address(rsp, base_addr+n*16), as_XMMRegister(n)); + } + if (VM_Version::supports_evex()) { + // Save upper half of ZMM registes(0..15) + base_addr = XSAVE_AREA_ZMM_BEGIN; + for (int n = 0; n < 16; n++) { + __ vextractf64x4h(Address(rsp, base_addr+n*32), as_XMMRegister(n), 1); + } + // Save full ZMM registes(16..num_xmm_regs) + base_addr = XSAVE_AREA_UPPERBANK; + int off = 0; + int vector_len = Assembler::AVX_512bit; + for (int n = 16; n < num_xmm_regs; n++) { + __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len); + } + } + } else { + if (VM_Version::supports_evex()) { + // Save upper bank of ZMM registers(16..31) for double/float usage + int base_addr = XSAVE_AREA_UPPERBANK; + int off = 0; + for (int n = 16; n < num_xmm_regs; n++) { + __ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n)); + } } } if (frame::arg_reg_save_area_bytes != 0) { @@ -224,8 +218,7 @@ OopMapSet *oop_maps = new OopMapSet(); OopMap* map = new OopMap(frame_size_in_slots, 0); -#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) -#define YMMHI_STACK_OFFSET(x) VMRegImpl::stack2reg((x / VMRegImpl::stack_slot_size) + ymmhi_offset) +#define STACK_OFFSET(x) VMRegImpl::stack2reg((x)) map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); @@ -257,31 +250,21 @@ off = zmm16_off; delta = zmm17_off - off; for (int n = 16; n < num_xmm_regs; n++) { - XMMRegister xmm_name = as_XMMRegister(n); - map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()); + XMMRegister zmm_name = as_XMMRegister(n); + map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()); off += delta; } } #if defined(COMPILER2) || INCLUDE_JVMCI if (save_vectors) { - assert(ymmhi_offset != -1, "save area must exist"); - map->set_callee_saved(YMMHI_STACK_OFFSET( 0), xmm0->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET( 16), xmm1->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET( 32), xmm2->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET( 48), xmm3->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET( 64), xmm4->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET( 80), xmm5->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET( 96), xmm6->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(112), xmm7->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(128), xmm8->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(144), xmm9->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(160), xmm10->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(176), xmm11->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(192), xmm12->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(208), xmm13->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(224), xmm14->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(240), xmm15->as_VMReg()->next(4)); + off = ymm0_off; + int delta = ymm1_off - off; + for (int n = 0; n < 16; n++) { + XMMRegister ymm_name = as_XMMRegister(n); + map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4)); + off += delta; + } } #endif // COMPILER2 || INCLUDE_JVMCI @@ -316,8 +299,8 @@ off = zmm16H_off; delta = zmm17H_off - off; for (int n = 16; n < num_xmm_regs; n++) { - XMMRegister xmm_name = as_XMMRegister(n); - map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next()); + XMMRegister zmm_name = as_XMMRegister(n); + map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next()); off += delta; } } @@ -335,21 +318,48 @@ // Pop arg register save area __ addptr(rsp, frame::arg_reg_save_area_bytes); } + #if defined(COMPILER2) || INCLUDE_JVMCI - // On EVEX enabled targets everything is handled in pop fpu state - if ((restore_vectors) && (UseAVX < 3)) { - assert(UseAVX > 0, "256/512-bit vectors are supported only with AVX"); - assert(MaxVectorSize == 64, "up to 512bit vectors are supported now"); - int off = 0; - // Restore upper half of YMM registes (0..num_xmm_regs) - for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16)); - } - __ addptr(rsp, num_xmm_regs*16); + if (restore_vectors) { + assert(UseAVX > 0, "512bit vectors are supported only with EVEX"); + assert(MaxVectorSize == 64, "only 512bit vectors are supported now"); } #else - assert(!restore_vectors, "vectors are generated only by C2 and JVMCI"); + assert(!save_vectors, "vectors are generated only by C2"); #endif + + // On EVEX enabled targets everything is handled in pop fpu state + if (restore_vectors) { + // Restore upper half of YMM registes (0..15) + int base_addr = XSAVE_AREA_YMM_BEGIN; + for (int n = 0; n < 16; n++) { + __ vinsertf128h(as_XMMRegister(n), Address(rsp, base_addr+n*16)); + } + if (VM_Version::supports_evex()) { + // Restore upper half of ZMM registes (0..15) + base_addr = XSAVE_AREA_ZMM_BEGIN; + for (int n = 0; n < 16; n++) { + __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, base_addr+n*32), 1); + } + // Restore full ZMM registes(16..num_xmm_regs) + base_addr = XSAVE_AREA_UPPERBANK; + int vector_len = Assembler::AVX_512bit; + int off = 0; + for (int n = 16; n < num_xmm_regs; n++) { + __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len); + } + } + } else { + if (VM_Version::supports_evex()) { + // Restore upper bank of ZMM registes(16..31) for double/float usage + int base_addr = XSAVE_AREA_UPPERBANK; + int off = 0; + for (int n = 16; n < num_xmm_regs; n++) { + __ movsd(as_XMMRegister(n), Address(rsp, base_addr+(off++*64))); + } + } + } + // Recover CPU state __ pop_CPU_state(); // Get the rbp described implicitly by the calling convention (no oopMap) --- old/src/cpu/x86/vm/stubGenerator_x86_64.cpp 2015-11-07 22:07:20.187446700 -0800 +++ new/src/cpu/x86/vm/stubGenerator_x86_64.cpp 2015-11-07 22:07:19.966446700 -0800 @@ -273,7 +273,7 @@ if (UseAVX > 2) { last_reg = 31; } - if (VM_Version::supports_avx512novl()) { + if (VM_Version::supports_evex()) { for (int i = xmm_save_first; i <= last_reg; i++) { __ vextractf32x4h(xmm_save(i), as_XMMRegister(i), 0); } @@ -391,7 +391,7 @@ // restore regs belonging to calling function #ifdef _WIN64 // emit the restores for xmm regs - if (VM_Version::supports_avx512novl()) { + if (VM_Version::supports_evex()) { for (int i = xmm_save_first; i <= last_reg; i++) { __ vinsertf32x4h(as_XMMRegister(i), xmm_save(i), 0); } --- old/src/cpu/x86/vm/vm_version_x86.cpp 2015-11-07 22:07:21.688446700 -0800 +++ new/src/cpu/x86/vm/vm_version_x86.cpp 2015-11-07 22:07:21.474446700 -0800 @@ -891,7 +891,7 @@ UseNewLongLShift = true; } if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { - if( supports_sse4a() ) { + if (supports_sse4a()) { UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron } else { UseXmmLoadAndClearUpper = false; --- old/src/cpu/x86/vm/vm_version_x86.hpp 2015-11-07 22:07:23.105446700 -0800 +++ new/src/cpu/x86/vm/vm_version_x86.hpp 2015-11-07 22:07:22.891446700 -0800 @@ -552,6 +552,19 @@ break; } } + // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen + if (retVal == false) { + // Verify that OS save/restore all bits of EVEX registers + // during signal processing. + int nreg = 2 LP64_ONLY(+2); + retVal = true; + for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register + if (_cpuid_info.zmm_save[i] != ymm_test_value()) { + retVal = false; + break; + } + } + } } return retVal; } @@ -706,6 +719,9 @@ static bool supports_avx512vl() { return (_cpuFeatures & CPU_AVX512VL) != 0; } static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); } static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); } + static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); } + static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); } + static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); } // Intel features static bool is_intel_family_core() { return is_intel() && extended_cpu_family() == CPU_FAMILY_INTEL_CORE; } --- old/src/cpu/x86/vm/x86.ad 2015-11-07 22:07:24.512446700 -0800 +++ new/src/cpu/x86/vm/x86.ad 2015-11-07 22:07:24.295446700 -0800 @@ -1716,6 +1716,36 @@ return ret_value; // Per default match rules are supported. } +const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + bool ret_value = match_rule_supported(opcode); + if (ret_value) { + switch (opcode) { + case Op_AddVB: + case Op_SubVB: + if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) + ret_value = false; + break; + case Op_URShiftVS: + case Op_RShiftVS: + case Op_LShiftVS: + case Op_MulVS: + case Op_AddVS: + case Op_SubVS: + if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) + ret_value = false; + break; + case Op_CMoveVD: + if (vlen != 4) + ret_value = false; + break; + } + } + + return ret_value; // Per default match rules are supported. +} + const int Matcher::float_pressure(int default_pressure_threshold) { int float_pressure_threshold = default_pressure_threshold; #ifdef _LP64 @@ -1759,11 +1789,9 @@ break; case T_BYTE: if (size < 4) return 0; - if ((size > 32) && !VM_Version::supports_avx512bw()) return 0; break; case T_SHORT: if (size < 4) return 0; - if ((size > 16) && !VM_Version::supports_avx512bw()) return 0; break; default: ShouldNotReachHere(); @@ -1967,27 +1995,34 @@ bool is_single_byte = false; int vec_len = 0; if ((UseAVX > 2) && (stack_offset != 0)) { + int tuple_type = Assembler::EVEX_FVM; + int input_size = Assembler::EVEX_32bit; switch (ireg) { - case Op_VecS: + case Op_VecS: + tuple_type = Assembler::EVEX_T1S; + break; case Op_VecD: + tuple_type = Assembler::EVEX_T1S; + input_size = Assembler::EVEX_64bit; + break; case Op_VecX: - break; - case Op_VecY: - vec_len = 1; - break; + break; + case Op_VecY: + vec_len = 1; + break; case Op_VecZ: - vec_len = 2; - break; + vec_len = 2; + break; } - is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, Assembler::EVEX_FVM, Assembler::EVEX_32bit, 0); + is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); } int offset_size = 0; int size = 5; if (UseAVX > 2 ) { - if ((VM_Version::supports_avx512vl() == false) && (vec_len == 2)) { + if (VM_Version::supports_avx512novl() && (vec_len == 2)) { offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); size += 2; // Need an additional two bytes for EVEX encoding - } else if ((VM_Version::supports_avx512vl() == false) && (vec_len < 2)) { + } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); } else { offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); @@ -2711,7 +2746,48 @@ %} instruct absF_reg_reg(regF dst, regF src) %{ - predicate(UseAVX > 0); + predicate(VM_Version::supports_avxonly()); + match(Set dst (AbsF src)); + ins_cost(150); + format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} + ins_encode %{ + int vector_len = 0; + __ vandps($dst$$XMMRegister, $src$$XMMRegister, + ExternalAddress(float_signmask()), vector_len); + %} + ins_pipe(pipe_slow); +%} + +#ifdef _LP64 +instruct absF_reg_reg_evex(regF dst, regF src) %{ + predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); + match(Set dst (AbsF src)); + ins_cost(150); + format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} + ins_encode %{ + int vector_len = 0; + __ vandps($dst$$XMMRegister, $src$$XMMRegister, + ExternalAddress(float_signmask()), vector_len); + %} + ins_pipe(pipe_slow); +%} + +instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ + predicate(VM_Version::supports_avx512novl()); + match(Set dst (AbsF src1)); + effect(TEMP src2); + ins_cost(150); + format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} + ins_encode %{ + int vector_len = 0; + __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, + ExternalAddress(float_signmask()), vector_len); + %} + ins_pipe(pipe_slow); +%} +#else // _LP64 +instruct absF_reg_reg_evex(regF dst, regF src) %{ + predicate(UseAVX > 2); match(Set dst (AbsF src)); ins_cost(150); format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} @@ -2722,6 +2798,7 @@ %} ins_pipe(pipe_slow); %} +#endif instruct absD_reg(regD dst) %{ predicate((UseSSE>=2) && (UseAVX == 0)); @@ -2736,7 +2813,22 @@ %} instruct absD_reg_reg(regD dst, regD src) %{ - predicate(UseAVX > 0); + predicate(VM_Version::supports_avxonly()); + match(Set dst (AbsD src)); + ins_cost(150); + format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" + "# abs double by sign masking" %} + ins_encode %{ + int vector_len = 0; + __ vandpd($dst$$XMMRegister, $src$$XMMRegister, + ExternalAddress(double_signmask()), vector_len); + %} + ins_pipe(pipe_slow); +%} + +#ifdef _LP64 +instruct absD_reg_reg_evex(regD dst, regD src) %{ + predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (AbsD src)); ins_cost(150); format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" @@ -2749,6 +2841,35 @@ ins_pipe(pipe_slow); %} +instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ + predicate(VM_Version::supports_avx512novl()); + match(Set dst (AbsD src1)); + effect(TEMP src2); + ins_cost(150); + format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} + ins_encode %{ + int vector_len = 0; + __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, + ExternalAddress(double_signmask()), vector_len); + %} + ins_pipe(pipe_slow); +%} +#else // _LP64 +instruct absD_reg_reg_evex(regD dst, regD src) %{ + predicate(UseAVX > 2); + match(Set dst (AbsD src)); + ins_cost(150); + format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" + "# abs double by sign masking" %} + ins_encode %{ + int vector_len = 0; + __ vandpd($dst$$XMMRegister, $src$$XMMRegister, + ExternalAddress(double_signmask()), vector_len); + %} + ins_pipe(pipe_slow); +%} +#endif + instruct negF_reg(regF dst) %{ predicate((UseSSE>=1) && (UseAVX == 0)); match(Set dst (NegF dst)); @@ -4554,7 +4675,7 @@ %} instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ - predicate(UseAVX > 0 && UseAVX < 3); + predicate(VM_Version::supports_avxonly()); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" @@ -4594,37 +4715,37 @@ instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseSSE > 2 && UseAVX == 0); match(Set dst (AddReductionVI src1 src2)); - effect(TEMP tmp2, TEMP tmp); - format %{ "movdqu $tmp2,$src2\n\t" - "phaddd $tmp2,$tmp2\n\t" - "phaddd $tmp2,$tmp2\n\t" - "movd $tmp,$src1\n\t" - "paddd $tmp,$tmp2\n\t" - "movd $dst,$tmp\t! add reduction4I" %} + effect(TEMP tmp, TEMP tmp2); + format %{ "movdqu $tmp,$src2\n\t" + "phaddd $tmp,$tmp\n\t" + "phaddd $tmp,$tmp\n\t" + "movd $tmp2,$src1\n\t" + "paddd $tmp2,$tmp\n\t" + "movd $dst,$tmp2\t! add reduction4I" %} ins_encode %{ - __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); - __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); - __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); - __ movdl($tmp$$XMMRegister, $src1$$Register); - __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ movdl($dst$$Register, $tmp$$XMMRegister); + __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); + __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); + __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); + __ movdl($tmp2$$XMMRegister, $src1$$Register); + __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ - predicate(UseAVX > 0 && UseAVX < 3); + predicate(VM_Version::supports_avxonly()); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" - "vphaddd $tmp,$tmp,$tmp2\n\t" + "vphaddd $tmp,$tmp,$tmp\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t" "movd $dst,$tmp2\t! add reduction4I" %} ins_encode %{ int vector_len = 0; __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); - __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); + __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); @@ -4657,7 +4778,7 @@ %} instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ - predicate(UseAVX > 0 && UseAVX < 3); + predicate(VM_Version::supports_avxonly()); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" @@ -4712,7 +4833,7 @@ predicate(UseAVX > 2); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vextracti64x4 $tmp3,$src2\n\t" + format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" "vpaddd $tmp3,$tmp3,$src2\n\t" "vextracti128 $tmp,$tmp3\n\t" "vpaddd $tmp,$tmp,$tmp3\n\t" @@ -4724,7 +4845,7 @@ "vpaddd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ - __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); + __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); @@ -4763,7 +4884,7 @@ predicate(UseAVX > 2); match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" + format %{ "vextracti128 $tmp,$src2\n\t" "vpaddq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" @@ -4771,7 +4892,7 @@ "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction4L" %} ins_encode %{ - __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -4786,7 +4907,7 @@ predicate(UseAVX > 2); match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti64x4 $tmp2,$src2\n\t" + format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" "vpaddq $tmp2,$tmp2,$src2\n\t" "vextracti128 $tmp,$tmp2\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" @@ -4796,7 +4917,7 @@ "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction8L" %} ins_encode %{ - __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -4810,290 +4931,280 @@ %} #endif -instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ +instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); - match(Set dst (AddReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "movdqu $tmp,$src1\n\t" - "addss $tmp,$src2\n\t" - "pshufd $tmp2,$src2,0x01\n\t" - "addss $tmp,$tmp2\n\t" - "movdqu $dst,$tmp\t! add reduction2F" %} + match(Set dst (AddReductionVF dst src2)); + effect(TEMP dst, TEMP tmp); + format %{ "addss $dst,$src2\n\t" + "pshufd $tmp,$src2,0x01\n\t" + "addss $dst,$tmp\t! add reduction2F" %} ins_encode %{ - __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); - __ addss($tmp$$XMMRegister, $src2$$XMMRegister); - __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); - __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); + __ addss($dst$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); + __ addss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ +instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ predicate(UseAVX > 0); - match(Set dst (AddReductionVF src1 src2)); - effect(TEMP tmp2, TEMP tmp); - format %{ "vaddss $tmp2,$src1,$src2\n\t" + match(Set dst (AddReductionVF dst src2)); + effect(TEMP dst, TEMP tmp); + format %{ "vaddss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" - "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %} + "vaddss $dst,$dst,$tmp\t! add reduction2F" %} ins_encode %{ - __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); - __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ +instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); - match(Set dst (AddReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "movdqu $tmp,$src1\n\t" - "addss $tmp,$src2\n\t" - "pshufd $tmp2,$src2,0x01\n\t" - "addss $tmp,$tmp2\n\t" - "pshufd $tmp2,$src2,0x02\n\t" - "addss $tmp,$tmp2\n\t" - "pshufd $tmp2,$src2,0x03\n\t" - "addss $tmp,$tmp2\n\t" - "movdqu $dst,$tmp\t! add reduction4F" %} - ins_encode %{ - __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); - __ addss($tmp$$XMMRegister, $src2$$XMMRegister); - __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); - __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); - __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); - __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); + match(Set dst (AddReductionVF dst src2)); + effect(TEMP dst, TEMP tmp); + format %{ "addss $dst,$src2\n\t" + "pshufd $tmp,$src2,0x01\n\t" + "addss $dst,$tmp\n\t" + "pshufd $tmp,$src2,0x02\n\t" + "addss $dst,$tmp\n\t" + "pshufd $tmp,$src2,0x03\n\t" + "addss $dst,$tmp\t! add reduction4F" %} + ins_encode %{ + __ addss($dst$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); + __ addss($dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); + __ addss($dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); + __ addss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ +instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ predicate(UseAVX > 0); - match(Set dst (AddReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "vaddss $tmp2,$src1,$src2\n\t" + match(Set dst (AddReductionVF dst src2)); + effect(TEMP tmp, TEMP dst); + format %{ "vaddss $dst,dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" + "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" + "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" - "vaddss $dst,$tmp2,$tmp\t! add reduction4F" %} + "vaddss $dst,$dst,$tmp\t! add reduction4F" %} ins_encode %{ - __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); - __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ +instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0); - match(Set dst (AddReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vaddss $tmp2,$src1,$src2\n\t" + match(Set dst (AddReductionVF dst src2)); + effect(TEMP tmp, TEMP dst, TEMP tmp2); + format %{ "vaddss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" + "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" + "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "vextractf128 $tmp3,$src2\n\t" - "vaddss $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0x01\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x02\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x03\n\t" - "vaddss $dst,$tmp2,$tmp\t! add reduction8F" %} + "vaddss $dst,$dst,$tmp\n\t" + "vextractf128 $tmp2,$src2\n\t" + "vaddss $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0x01\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x02\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x03\n\t" + "vaddss $dst,$dst,$tmp\t! add reduction8F" %} ins_encode %{ - __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); - __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ +instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2); - match(Set dst (AddReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vaddss $tmp2,$src1,$src2\n\t" + match(Set dst (AddReductionVF dst src2)); + effect(TEMP tmp, TEMP dst, TEMP tmp2); + format %{ "vaddss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" + "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" + "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x1\n\t" - "vaddss $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0x01\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x02\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x03\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x2\n\t" - "vaddss $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0x01\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x02\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x03\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x3\n\t" - "vaddss $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0x01\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x02\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x03\n\t" - "vaddss $dst,$tmp2,$tmp\t! add reduction16F" %} + "vaddss $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vaddss $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0x01\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x02\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x03\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vaddss $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0x01\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x02\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x03\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vaddss $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0x01\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x02\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x03\n\t" + "vaddss $dst,$dst,$tmp\t! add reduction16F" %} ins_encode %{ - __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); - __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ +instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); - match(Set dst (AddReductionVD src1 src2)); + match(Set dst (AddReductionVD dst src2)); effect(TEMP tmp, TEMP dst); - format %{ "movdqu $tmp,$src1\n\t" - "addsd $tmp,$src2\n\t" - "pshufd $dst,$src2,0xE\n\t" + format %{ "addsd $dst,$src2\n\t" + "pshufd $tmp,$src2,0xE\n\t" "addsd $dst,$tmp\t! add reduction2D" %} ins_encode %{ - __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); - __ addsd($tmp$$XMMRegister, $src2$$XMMRegister); - __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); + __ addsd($dst$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ +instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ predicate(UseAVX > 0); - match(Set dst (AddReductionVD src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "vaddsd $tmp2,$src1,$src2\n\t" + match(Set dst (AddReductionVD dst src2)); + effect(TEMP tmp, TEMP dst); + format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" - "vaddsd $dst,$tmp2,$tmp\t! add reduction2D" %} + "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} ins_encode %{ - __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); - __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ +instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ predicate(UseAVX > 0); - match(Set dst (AddReductionVD src1 src2)); - effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vaddsd $tmp2,$src1,$src2\n\t" + match(Set dst (AddReductionVD dst src2)); + effect(TEMP tmp, TEMP dst, TEMP tmp2); + format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" - "vaddsd $tmp2,$tmp2,$tmp\n\t" - "vextractf128 $tmp3,$src2\n\t" - "vaddsd $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0xE\n\t" - "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %} + "vaddsd $dst,$dst,$tmp\n\t" + "vextractf32x4h $tmp2,$src2, 0x1\n\t" + "vaddsd $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0xE\n\t" + "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} ins_encode %{ - __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); - __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); - __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); - __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ +instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ predicate(UseAVX > 2); - match(Set dst (AddReductionVD src1 src2)); - effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vaddsd $tmp2,$src1,$src2\n\t" + match(Set dst (AddReductionVD dst src2)); + effect(TEMP tmp, TEMP dst, TEMP tmp2); + format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" - "vaddsd $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x1\n\t" - "vaddsd $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0xE\n\t" - "vaddsd $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x2\n\t" - "vaddsd $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0xE\n\t" - "vaddsd $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x3\n\t" - "vaddsd $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0xE\n\t" - "vaddsd $dst,$tmp2,$tmp\t! add reduction8D" %} + "vaddsd $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vaddsd $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0xE\n\t" + "vaddsd $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vaddsd $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0xE\n\t" + "vaddsd $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vaddsd $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0xE\n\t" + "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} ins_encode %{ - __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); - __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); - __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); - __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); - __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); - __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); - __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); - __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -5216,7 +5327,7 @@ predicate(UseAVX > 2); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vextracti64x4 $tmp3,$src2\n\t" + format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" "vpmulld $tmp3,$tmp3,$src2\n\t" "vextracti128 $tmp,$tmp3\n\t" "vpmulld $tmp,$tmp,$src2\n\t" @@ -5228,7 +5339,7 @@ "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ - __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); + __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); @@ -5267,7 +5378,7 @@ predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); match(Set dst (MulReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" + format %{ "vextracti128 $tmp,$src2\n\t" "vpmullq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" @@ -5275,7 +5386,7 @@ "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction4L" %} ins_encode %{ - __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -5290,7 +5401,7 @@ predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); match(Set dst (MulReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti64x4 $tmp2,$src2\n\t" + format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" "vpmullq $tmp2,$tmp2,$src2\n\t" "vextracti128 $tmp,$tmp2\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" @@ -5300,7 +5411,7 @@ "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction8L" %} ins_encode %{ - __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -5314,290 +5425,280 @@ %} #endif -instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ +instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); - match(Set dst (MulReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "movdqu $tmp,$src1\n\t" - "mulss $tmp,$src2\n\t" - "pshufd $tmp2,$src2,0x01\n\t" - "mulss $tmp,$tmp2\n\t" - "movdqu $dst,$tmp\t! mul reduction2F" %} + match(Set dst (MulReductionVF dst src2)); + effect(TEMP dst, TEMP tmp); + format %{ "mulss $dst,$src2\n\t" + "pshufd $tmp,$src2,0x01\n\t" + "mulss $dst,$tmp\t! mul reduction2F" %} ins_encode %{ - __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); - __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); - __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); - __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); + __ mulss($dst$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); + __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ +instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ predicate(UseAVX > 0); - match(Set dst (MulReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "vmulss $tmp2,$src1,$src2\n\t" + match(Set dst (MulReductionVF dst src2)); + effect(TEMP tmp, TEMP dst); + format %{ "vmulss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" - "vmulss $dst,$tmp2,$tmp\t! mul reduction2F" %} + "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} ins_encode %{ - __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); - __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ +instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); - match(Set dst (MulReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "movdqu $tmp,$src1\n\t" - "mulss $tmp,$src2\n\t" - "pshufd $tmp2,$src2,0x01\n\t" - "mulss $tmp,$tmp2\n\t" - "pshufd $tmp2,$src2,0x02\n\t" - "mulss $tmp,$tmp2\n\t" - "pshufd $tmp2,$src2,0x03\n\t" - "mulss $tmp,$tmp2\n\t" - "movdqu $dst,$tmp\t! mul reduction4F" %} - ins_encode %{ - __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); - __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); - __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); - __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); - __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); - __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); + match(Set dst (MulReductionVF dst src2)); + effect(TEMP dst, TEMP tmp); + format %{ "mulss $dst,$src2\n\t" + "pshufd $tmp,$src2,0x01\n\t" + "mulss $dst,$tmp\n\t" + "pshufd $tmp,$src2,0x02\n\t" + "mulss $dst,$tmp\n\t" + "pshufd $tmp,$src2,0x03\n\t" + "mulss $dst,$tmp\t! mul reduction4F" %} + ins_encode %{ + __ mulss($dst$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); + __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); + __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); + __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ +instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ predicate(UseAVX > 0); - match(Set dst (MulReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "vmulss $tmp2,$src1,$src2\n\t" + match(Set dst (MulReductionVF dst src2)); + effect(TEMP tmp, TEMP dst); + format %{ "vmulss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" + "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" + "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" - "vmulss $dst,$tmp2,$tmp\t! mul reduction4F" %} + "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} ins_encode %{ - __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); - __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ +instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0); - match(Set dst (MulReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vmulss $tmp2,$src1,$src2\n\t" + match(Set dst (MulReductionVF dst src2)); + effect(TEMP tmp, TEMP dst, TEMP tmp2); + format %{ "vmulss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" + "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" + "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "vextractf128 $tmp3,$src2\n\t" - "vmulss $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0x01\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x02\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x03\n\t" - "vmulss $dst,$tmp2,$tmp\t! mul reduction8F" %} + "vmulss $dst,$dst,$tmp\n\t" + "vextractf128 $tmp2,$src2\n\t" + "vmulss $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0x01\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x02\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x03\n\t" + "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} ins_encode %{ - __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); - __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ +instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2); - match(Set dst (MulReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vmulss $tmp2,$src1,$src2\n\t" + match(Set dst (MulReductionVF dst src2)); + effect(TEMP tmp, TEMP dst, TEMP tmp2); + format %{ "vmulss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" + "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" + "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "vextractf32x4 $tmp3,$src2, 0x1\n\t" - "vmulss $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0x01\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x02\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x03\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "vextractf32x4 $tmp3,$src2, 0x2\n\t" - "vmulss $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0x01\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x02\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x03\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "vextractf32x4 $tmp3,$src2, 0x3\n\t" - "vmulss $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0x01\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x02\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x03\n\t" - "vmulss $dst,$tmp2,$tmp\t! mul reduction16F" %} + "vmulss $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vmulss $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0x01\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x02\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x03\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vmulss $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0x01\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x02\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x03\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vmulss $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0x01\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x02\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x03\n\t" + "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} ins_encode %{ - __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); - __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ +instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); - match(Set dst (MulReductionVD src1 src2)); - effect(TEMP tmp, TEMP dst); - format %{ "movdqu $tmp,$src1\n\t" - "mulsd $tmp,$src2\n\t" - "pshufd $dst,$src2,0xE\n\t" + match(Set dst (MulReductionVD dst src2)); + effect(TEMP dst, TEMP tmp); + format %{ "mulsd $dst,$src2\n\t" + "pshufd $tmp,$src2,0xE\n\t" "mulsd $dst,$tmp\t! mul reduction2D" %} ins_encode %{ - __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); - __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister); - __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); + __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ +instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ predicate(UseAVX > 0); - match(Set dst (MulReductionVD src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "vmulsd $tmp2,$src1,$src2\n\t" + match(Set dst (MulReductionVD dst src2)); + effect(TEMP tmp, TEMP dst); + format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" - "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %} + "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} ins_encode %{ - __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); - __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ +instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ predicate(UseAVX > 0); - match(Set dst (MulReductionVD src1 src2)); - effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vmulsd $tmp2,$src1,$src2\n\t" + match(Set dst (MulReductionVD dst src2)); + effect(TEMP tmp, TEMP dst, TEMP tmp2); + format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" - "vmulsd $tmp2,$tmp2,$tmp\n\t" - "vextractf128 $tmp3,$src2\n\t" - "vmulsd $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0xE\n\t" - "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %} + "vmulsd $dst,$dst,$tmp\n\t" + "vextractf128 $tmp2,$src2\n\t" + "vmulsd $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0xE\n\t" + "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} ins_encode %{ - __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); - __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); - __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); - __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ +instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ predicate(UseAVX > 2); - match(Set dst (MulReductionVD src1 src2)); - effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vmulsd $tmp2,$src1,$src2\n\t" + match(Set dst (MulReductionVD dst src2)); + effect(TEMP tmp, TEMP dst, TEMP tmp2); + format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" - "vmulsd $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x1\n\t" - "vmulsd $tmp2,$tmp2,$tmp3\n\t" + "vmulsd $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$src2,0xE\n\t" - "vmulsd $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x2\n\t" - "vmulsd $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0xE\n\t" - "vmulsd $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x3\n\t" - "vmulsd $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0xE\n\t" - "vmulsd $dst,$tmp2,$tmp\t! mul reduction8D" %} + "vmulsd $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vmulsd $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0xE\n\t" + "vmulsd $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vmulsd $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0xE\n\t" + "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} ins_encode %{ - __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); - __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); - __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); - __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); - __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); - __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); - __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); - __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -5608,7 +5709,7 @@ // Bytes vector add instruct vadd4B(vecS dst, vecS src) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (AddVB dst src)); format %{ "paddb $dst,$src\t! add packed4B" %} ins_encode %{ @@ -5617,8 +5718,8 @@ ins_pipe( pipe_slow ); %} -instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} ins_encode %{ @@ -5628,42 +5729,44 @@ ins_pipe( pipe_slow ); %} -instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (AddVB src (LoadVector mem))); - format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} +instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (AddVB src1 src2)); + format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} ins_encode %{ int vector_len = 0; - __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd8B(vecD dst, vecD src) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (AddVB dst src)); - format %{ "paddb $dst,$src\t! add packed8B" %} +instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (AddVB dst src2)); + effect(TEMP src1); + format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} ins_encode %{ - __ paddb($dst$$XMMRegister, $src$$XMMRegister); + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); - match(Set dst (AddVB src1 src2)); - format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} +instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (AddVB src (LoadVector mem))); + format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} ins_encode %{ int vector_len = 0; - __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); match(Set dst (AddVB src (LoadVector mem))); - format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} + format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} ins_encode %{ int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); @@ -5671,20 +5774,32 @@ ins_pipe( pipe_slow ); %} -instruct vadd16B(vecX dst, vecX src) %{ - predicate(n->as_Vector()->length() == 16); +instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (AddVB dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8B(vecD dst, vecD src) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (AddVB dst src)); - format %{ "paddb $dst,$src\t! add packed16B" %} + format %{ "paddb $dst,$src\t! add packed8B" %} ins_encode %{ __ paddb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 16); +instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); match(Set dst (AddVB src1 src2)); - format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} + format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} ins_encode %{ int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); @@ -5692,732 +5807,1426 @@ ins_pipe( pipe_slow ); %} -instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 16); - match(Set dst (AddVB src (LoadVector mem))); - format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} +instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); + match(Set dst (AddVB src1 src2)); + format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} ins_encode %{ int vector_len = 0; - __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 32); - match(Set dst (AddVB src1 src2)); - format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} +instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (AddVB dst src2)); + effect(TEMP src1); + format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} ins_encode %{ - int vector_len = 1; + int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 32); +instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); match(Set dst (AddVB src (LoadVector mem))); - format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} + format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} ins_encode %{ - int vector_len = 1; + int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 64); - match(Set dst (AddVB src1 src2)); - format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} +instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); + match(Set dst (AddVB src (LoadVector mem))); + format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} ins_encode %{ - int vector_len = 2; - __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 64); - match(Set dst (AddVB src (LoadVector mem))); - format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} +instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); + match(Set dst (AddVB dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} ins_encode %{ - int vector_len = 2; + int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -// Shorts/Chars vector add -instruct vadd2S(vecS dst, vecS src) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (AddVS dst src)); - format %{ "paddw $dst,$src\t! add packed2S" %} +instruct vadd16B(vecX dst, vecX src) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 16); + match(Set dst (AddVB dst src)); + format %{ "paddb $dst,$src\t! add packed16B" %} ins_encode %{ - __ paddw($dst$$XMMRegister, $src$$XMMRegister); + __ paddb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (AddVS src1 src2)); - format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} - ins_encode %{ +instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); + match(Set dst (AddVB src1 src2)); + format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} + ins_encode %{ int vector_len = 0; - __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (AddVS src (LoadVector mem))); - format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} +instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (AddVB src1 src2)); + format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} ins_encode %{ int vector_len = 0; - __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd4S(vecD dst, vecD src) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (AddVS dst src)); - format %{ "paddw $dst,$src\t! add packed4S" %} +instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (AddVB dst src2)); + effect(TEMP src1); + format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} ins_encode %{ - __ paddw($dst$$XMMRegister, $src$$XMMRegister); + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (AddVS src1 src2)); - format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} +instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); + match(Set dst (AddVB src (LoadVector mem))); + format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} ins_encode %{ int vector_len = 0; - __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (AddVS src (LoadVector mem))); - format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} +instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (AddVB src (LoadVector mem))); + format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} ins_encode %{ int vector_len = 0; - __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd8S(vecX dst, vecX src) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (AddVS dst src)); - format %{ "paddw $dst,$src\t! add packed8S" %} +instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (AddVB dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} ins_encode %{ - __ paddw($dst$$XMMRegister, $src$$XMMRegister); + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); - match(Set dst (AddVS src1 src2)); - format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} +instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); + match(Set dst (AddVB src1 src2)); + format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} ins_encode %{ - int vector_len = 0; - __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + int vector_len = 1; + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); - match(Set dst (AddVS src (LoadVector mem))); - format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} +instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); + match(Set dst (AddVB src1 src2)); + format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} ins_encode %{ - int vector_len = 0; - __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + int vector_len = 1; + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); - match(Set dst (AddVS src1 src2)); - format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} +instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); + match(Set dst (AddVB dst src2)); + effect(TEMP src1); + format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} ins_encode %{ int vector_len = 1; - __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); - match(Set dst (AddVS src (LoadVector mem))); - format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} +instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); + match(Set dst (AddVB src (LoadVector mem))); + format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} ins_encode %{ int vector_len = 1; - __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); - match(Set dst (AddVS src1 src2)); - format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} +instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); + match(Set dst (AddVB src (LoadVector mem))); + format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} ins_encode %{ - int vector_len = 2; - __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + int vector_len = 1; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); - match(Set dst (AddVS src (LoadVector mem))); - format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} +instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); + match(Set dst (AddVB dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} ins_encode %{ - int vector_len = 2; - __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + int vector_len = 1; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -// Integers vector add -instruct vadd2I(vecD dst, vecD src) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (AddVI dst src)); - format %{ "paddd $dst,$src\t! add packed2I" %} +instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); + match(Set dst (AddVB src1 src2)); + format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} ins_encode %{ - __ paddd($dst$$XMMRegister, $src$$XMMRegister); + int vector_len = 2; + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (AddVI src1 src2)); - format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} +instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); + match(Set dst (AddVB src (LoadVector mem))); + format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} ins_encode %{ - int vector_len = 0; - __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + int vector_len = 2; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (AddVI src (LoadVector mem))); - format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} +// Shorts/Chars vector add +instruct vadd2S(vecS dst, vecS src) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVS dst src)); + format %{ "paddw $dst,$src\t! add packed2S" %} ins_encode %{ - int vector_len = 0; - __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ paddw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vadd4I(vecX dst, vecX src) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (AddVI dst src)); - format %{ "paddd $dst,$src\t! add packed4I" %} +instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); + match(Set dst (AddVS src1 src2)); + format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} ins_encode %{ - __ paddd($dst$$XMMRegister, $src$$XMMRegister); + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (AddVI src1 src2)); - format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} +instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (AddVS src1 src2)); + format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} ins_encode %{ int vector_len = 0; - __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (AddVI src (LoadVector mem))); - format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} +instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (AddVS dst src2)); + effect(TEMP src1); + format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} ins_encode %{ int vector_len = 0; - __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 8); - match(Set dst (AddVI src1 src2)); - format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} +instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} ins_encode %{ - int vector_len = 1; - __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 8); - match(Set dst (AddVI src (LoadVector mem))); - format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} +instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} ins_encode %{ - int vector_len = 1; - __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 16); - match(Set dst (AddVI src1 src2)); - format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} +instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (AddVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} ins_encode %{ - int vector_len = 2; - __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 16); - match(Set dst (AddVI src (LoadVector mem))); - format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} +instruct vadd4S(vecD dst, vecD src) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); + match(Set dst (AddVS dst src)); + format %{ "paddw $dst,$src\t! add packed4S" %} ins_encode %{ - int vector_len = 2; - __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ paddw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} -// Longs vector add -instruct vadd2L(vecX dst, vecX src) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (AddVL dst src)); - format %{ "paddq $dst,$src\t! add packed2L" %} +instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (AddVS src1 src2)); + format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} ins_encode %{ - __ paddq($dst$$XMMRegister, $src$$XMMRegister); + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (AddVL src1 src2)); - format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} +instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (AddVS src1 src2)); + format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} ins_encode %{ int vector_len = 0; - __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (AddVL src (LoadVector mem))); - format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} +instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (AddVS dst src2)); + effect(TEMP src1); + format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} ins_encode %{ int vector_len = 0; - __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 4); - match(Set dst (AddVL src1 src2)); - format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} +instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} ins_encode %{ - int vector_len = 1; - __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 4); - match(Set dst (AddVL src (LoadVector mem))); - format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} +instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} ins_encode %{ - int vector_len = 1; - __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 8); - match(Set dst (AddVL src1 src2)); - format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} +instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (AddVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} ins_encode %{ - int vector_len = 2; - __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 8); - match(Set dst (AddVL src (LoadVector mem))); - format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} +instruct vadd8S(vecX dst, vecX src) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); + match(Set dst (AddVS dst src)); + format %{ "paddw $dst,$src\t! add packed8S" %} ins_encode %{ - int vector_len = 2; - __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ paddw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} -// Floats vector add -instruct vadd2F(vecD dst, vecD src) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (AddVF dst src)); - format %{ "addps $dst,$src\t! add packed2F" %} +instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (AddVS src1 src2)); + format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} ins_encode %{ - __ addps($dst$$XMMRegister, $src$$XMMRegister); + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (AddVF src1 src2)); - format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} +instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); + match(Set dst (AddVS src1 src2)); + format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} ins_encode %{ int vector_len = 0; - __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (AddVF src (LoadVector mem))); - format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} +instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (AddVS dst src2)); + effect(TEMP src1); + format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} ins_encode %{ int vector_len = 0; - __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd4F(vecX dst, vecX src) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (AddVF dst src)); - format %{ "addps $dst,$src\t! add packed4F" %} +instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} ins_encode %{ - __ addps($dst$$XMMRegister, $src$$XMMRegister); + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (AddVF src1 src2)); - format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} +instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} ins_encode %{ int vector_len = 0; - __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (AddVF src (LoadVector mem))); - format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} +instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); + match(Set dst (AddVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} ins_encode %{ int vector_len = 0; - __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); - match(Set dst (AddVF src1 src2)); - format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} +instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); + match(Set dst (AddVS src1 src2)); + format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} ins_encode %{ int vector_len = 1; - __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); - match(Set dst (AddVF src (LoadVector mem))); - format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} +instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (AddVS src1 src2)); + format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} ins_encode %{ int vector_len = 1; - __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 16); - match(Set dst (AddVF src1 src2)); - format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} +instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (AddVS dst src2)); + effect(TEMP src1); + format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (AddVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); + match(Set dst (AddVS src1 src2)); + format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} ins_encode %{ int vector_len = 2; - __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 16); - match(Set dst (AddVF src (LoadVector mem))); - format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} +instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} ins_encode %{ int vector_len = 2; - __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -// Doubles vector add -instruct vadd2D(vecX dst, vecX src) %{ +// Integers vector add +instruct vadd2I(vecD dst, vecD src) %{ predicate(n->as_Vector()->length() == 2); - match(Set dst (AddVD dst src)); - format %{ "addpd $dst,$src\t! add packed2D" %} + match(Set dst (AddVI dst src)); + format %{ "paddd $dst,$src\t! add packed2I" %} ins_encode %{ - __ addpd($dst$$XMMRegister, $src$$XMMRegister); + __ paddd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ +instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (AddVD src1 src2)); - format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} + match(Set dst (AddVI src1 src2)); + format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} ins_encode %{ int vector_len = 0; - __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ +instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (AddVD src (LoadVector mem))); - format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} + match(Set dst (AddVI src (LoadVector mem))); + format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} ins_encode %{ int vector_len = 0; - __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (AddVD src1 src2)); - format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} +instruct vadd4I(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI dst src)); + format %{ "paddd $dst,$src\t! add packed4I" %} ins_encode %{ - int vector_len = 1; - __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + __ paddd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ +instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (AddVD src (LoadVector mem))); - format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} + match(Set dst (AddVI src1 src2)); + format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} ins_encode %{ - int vector_len = 1; - __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + int vector_len = 0; + __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 8); - match(Set dst (AddVD src1 src2)); - format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} +instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AddVI src (LoadVector mem))); + format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} ins_encode %{ - int vector_len = 2; - __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + int vector_len = 0; + __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 8); - match(Set dst (AddVD src (LoadVector mem))); - format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} +instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); + match(Set dst (AddVI src1 src2)); + format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} ins_encode %{ - int vector_len = 2; - __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + int vector_len = 1; + __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -// --------------------------------- SUB -------------------------------------- - -// Bytes vector sub -instruct vsub4B(vecS dst, vecS src) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (SubVB dst src)); - format %{ "psubb $dst,$src\t! sub packed4B" %} +instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); + match(Set dst (AddVI src (LoadVector mem))); + format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} ins_encode %{ - __ psubb($dst$$XMMRegister, $src$$XMMRegister); + int vector_len = 1; + __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (SubVB src1 src2)); - format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} +instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 16); + match(Set dst (AddVI src1 src2)); + format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} ins_encode %{ - int vector_len = 0; - __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + int vector_len = 2; + __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (SubVB src (LoadVector mem))); - format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} +instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 16); + match(Set dst (AddVI src (LoadVector mem))); + format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} ins_encode %{ - int vector_len = 0; - __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + int vector_len = 2; + __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub8B(vecD dst, vecD src) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (SubVB dst src)); - format %{ "psubb $dst,$src\t! sub packed8B" %} +// Longs vector add +instruct vadd2L(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVL dst src)); + format %{ "paddq $dst,$src\t! add packed2L" %} ins_encode %{ - __ psubb($dst$$XMMRegister, $src$$XMMRegister); + __ paddq($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); - match(Set dst (SubVB src1 src2)); - format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} +instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVL src1 src2)); + format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} ins_encode %{ int vector_len = 0; - __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); - match(Set dst (SubVB src (LoadVector mem))); - format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} +instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVL src (LoadVector mem))); + format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} ins_encode %{ int vector_len = 0; - __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub16B(vecX dst, vecX src) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (SubVB dst src)); - format %{ "psubb $dst,$src\t! sub packed16B" %} +instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); + match(Set dst (AddVL src1 src2)); + format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} + ins_encode %{ + int vector_len = 1; + __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); + match(Set dst (AddVL src (LoadVector mem))); + format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} + ins_encode %{ + int vector_len = 1; + __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8); + match(Set dst (AddVL src1 src2)); + format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} + ins_encode %{ + int vector_len = 2; + __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8); + match(Set dst (AddVL src (LoadVector mem))); + format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} + ins_encode %{ + int vector_len = 2; + __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// Floats vector add +instruct vadd2F(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVF dst src)); + format %{ "addps $dst,$src\t! add packed2F" %} + ins_encode %{ + __ addps($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVF src1 src2)); + format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} + ins_encode %{ + int vector_len = 0; + __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVF src (LoadVector mem))); + format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} + ins_encode %{ + int vector_len = 0; + __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4F(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVF dst src)); + format %{ "addps $dst,$src\t! add packed4F" %} + ins_encode %{ + __ addps($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AddVF src1 src2)); + format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} + ins_encode %{ + int vector_len = 0; + __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AddVF src (LoadVector mem))); + format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} + ins_encode %{ + int vector_len = 0; + __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (AddVF src1 src2)); + format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} + ins_encode %{ + int vector_len = 1; + __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (AddVF src (LoadVector mem))); + format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} + ins_encode %{ + int vector_len = 1; + __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 16); + match(Set dst (AddVF src1 src2)); + format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} + ins_encode %{ + int vector_len = 2; + __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 16); + match(Set dst (AddVF src (LoadVector mem))); + format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} + ins_encode %{ + int vector_len = 2; + __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// Doubles vector add +instruct vadd2D(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVD dst src)); + format %{ "addpd $dst,$src\t! add packed2D" %} + ins_encode %{ + __ addpd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVD src1 src2)); + format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} + ins_encode %{ + int vector_len = 0; + __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVD src (LoadVector mem))); + format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} + ins_encode %{ + int vector_len = 0; + __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AddVD src1 src2)); + format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} + ins_encode %{ + int vector_len = 1; + __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AddVD src (LoadVector mem))); + format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} + ins_encode %{ + int vector_len = 1; + __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8); + match(Set dst (AddVD src1 src2)); + format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} + ins_encode %{ + int vector_len = 2; + __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8); + match(Set dst (AddVD src (LoadVector mem))); + format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} + ins_encode %{ + int vector_len = 2; + __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- SUB -------------------------------------- + +// Bytes vector sub +instruct vsub4B(vecS dst, vecS src) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); + match(Set dst (SubVB dst src)); + format %{ "psubb $dst,$src\t! sub packed4B" %} + ins_encode %{ + __ psubb($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (SubVB src1 src2)); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (SubVB src1 src2)); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (SubVB dst src2)); + effect(TEMP src1); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (SubVB dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8B(vecD dst, vecD src) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); + match(Set dst (SubVB dst src)); + format %{ "psubb $dst,$src\t! sub packed8B" %} + ins_encode %{ + __ psubb($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (SubVB src1 src2)); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); + match(Set dst (SubVB src1 src2)); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (SubVB dst src2)); + effect(TEMP src1); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (SubVB dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16B(vecX dst, vecX src) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 16); + match(Set dst (SubVB dst src)); + format %{ "psubb $dst,$src\t! sub packed16B" %} + ins_encode %{ + __ psubb($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); + match(Set dst (SubVB src1 src2)); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (SubVB src1 src2)); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (SubVB dst src2)); + effect(TEMP src1); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (SubVB dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); + match(Set dst (SubVB src1 src2)); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); + match(Set dst (SubVB src1 src2)); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); + match(Set dst (SubVB dst src2)); + effect(TEMP src1); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); + match(Set dst (SubVB dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); + match(Set dst (SubVB src1 src2)); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} + ins_encode %{ + int vector_len = 2; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} + ins_encode %{ + int vector_len = 2; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// Shorts/Chars vector sub +instruct vsub2S(vecS dst, vecS src) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 2); + match(Set dst (SubVS dst src)); + format %{ "psubw $dst,$src\t! sub packed2S" %} + ins_encode %{ + __ psubw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); + match(Set dst (SubVS src1 src2)); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (SubVS src1 src2)); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (SubVS dst src2)); + effect(TEMP src1); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); + match(Set dst (SubVS src (LoadVector mem))); + format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (SubVS src (LoadVector mem))); + format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (SubVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4S(vecD dst, vecD src) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); + match(Set dst (SubVS dst src)); + format %{ "psubw $dst,$src\t! sub packed4S" %} ins_encode %{ - __ psubb($dst$$XMMRegister, $src$$XMMRegister); + __ psubw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 16); - match(Set dst (SubVB src1 src2)); - format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} +instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (SubVS src1 src2)); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} ins_encode %{ int vector_len = 0; - __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 16); - match(Set dst (SubVB src (LoadVector mem))); - format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} +instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (SubVS src1 src2)); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} ins_encode %{ int vector_len = 0; - __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 32); - match(Set dst (SubVB src1 src2)); - format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} +instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (SubVS dst src2)); + effect(TEMP src1); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} ins_encode %{ - int vector_len = 1; - __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 32); - match(Set dst (SubVB src (LoadVector mem))); - format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} +instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (SubVS src (LoadVector mem))); + format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} ins_encode %{ - int vector_len = 1; - __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 64); - match(Set dst (SubVB src1 src2)); - format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} +instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (SubVS src (LoadVector mem))); + format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} ins_encode %{ - int vector_len = 2; - __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 64); - match(Set dst (SubVB src (LoadVector mem))); - format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} +instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (SubVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} ins_encode %{ - int vector_len = 2; - __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -// Shorts/Chars vector sub -instruct vsub2S(vecS dst, vecS src) %{ - predicate(n->as_Vector()->length() == 2); +instruct vsub8S(vecX dst, vecX src) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (SubVS dst src)); - format %{ "psubw $dst,$src\t! sub packed2S" %} + format %{ "psubw $dst,$src\t! sub packed8S" %} ins_encode %{ __ psubw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); match(Set dst (SubVS src1 src2)); - format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} + format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} ins_encode %{ int vector_len = 0; __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); @@ -6425,42 +7234,44 @@ ins_pipe( pipe_slow ); %} -instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (SubVS src (LoadVector mem))); - format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} +instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); + match(Set dst (SubVS src1 src2)); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} ins_encode %{ int vector_len = 0; - __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub4S(vecD dst, vecD src) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (SubVS dst src)); - format %{ "psubw $dst,$src\t! sub packed4S" %} +instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (SubVS dst src2)); + effect(TEMP src1); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} ins_encode %{ - __ psubw($dst$$XMMRegister, $src$$XMMRegister); + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (SubVS src1 src2)); - format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} +instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (SubVS src (LoadVector mem))); + format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} ins_encode %{ int vector_len = 0; - __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (SubVS src (LoadVector mem))); - format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} + format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} ins_encode %{ int vector_len = 0; __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); @@ -6468,41 +7279,44 @@ ins_pipe( pipe_slow ); %} -instruct vsub8S(vecX dst, vecX src) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (SubVS dst src)); - format %{ "psubw $dst,$src\t! sub packed8S" %} +instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (SubVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} ins_encode %{ - __ psubw($dst$$XMMRegister, $src$$XMMRegister); + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); match(Set dst (SubVS src1 src2)); - format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} + format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} ins_encode %{ - int vector_len = 0; + int vector_len = 1; __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); - match(Set dst (SubVS src (LoadVector mem))); - format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} +instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (SubVS src1 src2)); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} ins_encode %{ - int vector_len = 0; - __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + int vector_len = 1; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); - match(Set dst (SubVS src1 src2)); +instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (SubVS dst src2)); + effect(TEMP src1); format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} ins_encode %{ int vector_len = 1; @@ -6511,8 +7325,19 @@ ins_pipe( pipe_slow ); %} -instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); + match(Set dst (SubVS src (LoadVector mem))); + format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} ins_encode %{ @@ -6522,8 +7347,20 @@ ins_pipe( pipe_slow ); %} +instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (SubVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} ins_encode %{ @@ -6534,7 +7371,7 @@ %} instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} ins_encode %{ @@ -6883,56 +7720,212 @@ ins_pipe( pipe_slow ); %} -instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (SubVD src (LoadVector mem))); - format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} +instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (SubVD src (LoadVector mem))); + format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} + ins_encode %{ + int vector_len = 1; + __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8); + match(Set dst (SubVD src1 src2)); + format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} + ins_encode %{ + int vector_len = 2; + __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8); + match(Set dst (SubVD src (LoadVector mem))); + format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} + ins_encode %{ + int vector_len = 2; + __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- MUL -------------------------------------- + +// Shorts/Chars vector mul +instruct vmul2S(vecS dst, vecS src) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 2); + match(Set dst (MulVS dst src)); + format %{ "pmullw $dst,$src\t! mul packed2S" %} + ins_encode %{ + __ pmullw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); + match(Set dst (MulVS src1 src2)); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (MulVS src1 src2)); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (MulVS dst src2)); + effect(TEMP src1); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); + match(Set dst (MulVS src (LoadVector mem))); + format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (MulVS src (LoadVector mem))); + format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (MulVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4S(vecD dst, vecD src) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); + match(Set dst (MulVS dst src)); + format %{ "pmullw $dst,$src\t! mul packed4S" %} + ins_encode %{ + __ pmullw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (MulVS src1 src2)); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (MulVS src1 src2)); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (MulVS dst src2)); + effect(TEMP src1); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (MulVS src (LoadVector mem))); + format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} ins_encode %{ - int vector_len = 1; - __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 8); - match(Set dst (SubVD src1 src2)); - format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} +instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (MulVS src (LoadVector mem))); + format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} ins_encode %{ - int vector_len = 2; - __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 8); - match(Set dst (SubVD src (LoadVector mem))); - format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} +instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (MulVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} ins_encode %{ - int vector_len = 2; - __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -// --------------------------------- MUL -------------------------------------- - -// Shorts/Chars vector mul -instruct vmul2S(vecS dst, vecS src) %{ - predicate(n->as_Vector()->length() == 2); +instruct vmul8S(vecX dst, vecX src) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (MulVS dst src)); - format %{ "pmullw $dst,$src\t! mul packed2S" %} + format %{ "pmullw $dst,$src\t! mul packed8S" %} ins_encode %{ __ pmullw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); match(Set dst (MulVS src1 src2)); - format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} + format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} ins_encode %{ int vector_len = 0; __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); @@ -6940,42 +7933,44 @@ ins_pipe( pipe_slow ); %} -instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (MulVS src (LoadVector mem))); - format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} +instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); + match(Set dst (MulVS src1 src2)); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} ins_encode %{ int vector_len = 0; - __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vmul4S(vecD dst, vecD src) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (MulVS dst src)); - format %{ "pmullw $dst,$src\t! mul packed4S" %} +instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (MulVS dst src2)); + effect(TEMP src1); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} ins_encode %{ - __ pmullw($dst$$XMMRegister, $src$$XMMRegister); + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (MulVS src1 src2)); - format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} +instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (MulVS src (LoadVector mem))); + format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} ins_encode %{ int vector_len = 0; - __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (MulVS src (LoadVector mem))); - format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} + format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} ins_encode %{ int vector_len = 0; __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); @@ -6983,41 +7978,44 @@ ins_pipe( pipe_slow ); %} -instruct vmul8S(vecX dst, vecX src) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (MulVS dst src)); - format %{ "pmullw $dst,$src\t! mul packed8S" %} +instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (MulVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} ins_encode %{ - __ pmullw($dst$$XMMRegister, $src$$XMMRegister); + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); match(Set dst (MulVS src1 src2)); - format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} + format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} ins_encode %{ - int vector_len = 0; + int vector_len = 1; __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); - match(Set dst (MulVS src (LoadVector mem))); - format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} +instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (MulVS src1 src2)); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} ins_encode %{ - int vector_len = 0; - __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + int vector_len = 1; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); - match(Set dst (MulVS src1 src2)); +instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (MulVS dst src2)); + effect(TEMP src1); format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} ins_encode %{ int vector_len = 1; @@ -7026,8 +8024,19 @@ ins_pipe( pipe_slow ); %} -instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); + match(Set dst (MulVS src (LoadVector mem))); + format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} ins_encode %{ @@ -7037,8 +8046,20 @@ ins_pipe( pipe_slow ); %} +instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (MulVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} ins_encode %{ @@ -7049,7 +8070,7 @@ %} instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} ins_encode %{ @@ -7679,61 +8700,237 @@ match(Set dst (SqrtVD (LoadVector mem))); format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} ins_encode %{ - int vector_len = 1; - __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); + int vector_len = 1; + __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8); + match(Set dst (SqrtVD src)); + format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} + ins_encode %{ + int vector_len = 2; + __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsqrt8D_mem(vecZ dst, memory mem) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8); + match(Set dst (SqrtVD (LoadVector mem))); + format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} + ins_encode %{ + int vector_len = 2; + __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// ------------------------------ LeftShift ----------------------------------- + +// Shorts/Chars vector left shift +instruct vsll2S(vecS dst, vecS shift) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 2); + match(Set dst (LShiftVS dst shift)); + format %{ "psllw $dst,$shift\t! left shift packed2S" %} + ins_encode %{ + __ psllw($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2S_imm(vecS dst, immI8 shift) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 2); + match(Set dst (LShiftVS dst shift)); + format %{ "psllw $dst,$shift\t! left shift packed2S" %} + ins_encode %{ + __ psllw($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (LShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (LShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4S(vecD dst, vecS shift) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); + match(Set dst (LShiftVS dst shift)); + format %{ "psllw $dst,$shift\t! left shift packed4S" %} + ins_encode %{ + __ psllw($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4S_imm(vecD dst, immI8 shift) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); + match(Set dst (LShiftVS dst shift)); + format %{ "psllw $dst,$shift\t! left shift packed4S" %} + ins_encode %{ + __ psllw($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (LShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 8); - match(Set dst (SqrtVD src)); - format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} +instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ins_encode %{ - int vector_len = 2; - __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsqrt8D_mem(vecZ dst, memory mem) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 8); - match(Set dst (SqrtVD (LoadVector mem))); - format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} +instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (LShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ins_encode %{ - int vector_len = 2; - __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} -// ------------------------------ LeftShift ----------------------------------- - -// Shorts/Chars vector left shift -instruct vsll2S(vecS dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 2); +instruct vsll8S(vecX dst, vecS shift) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (LShiftVS dst shift)); - format %{ "psllw $dst,$shift\t! left shift packed2S" %} + format %{ "psllw $dst,$shift\t! left shift packed8S" %} ins_encode %{ __ psllw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vsll2S_imm(vecS dst, immI8 shift) %{ - predicate(n->as_Vector()->length() == 2); +instruct vsll8S_imm(vecX dst, immI8 shift) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (LShiftVS dst shift)); - format %{ "psllw $dst,$shift\t! left shift packed2S" %} + format %{ "psllw $dst,$shift\t! left shift packed8S" %} ins_encode %{ __ psllw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} -instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); - format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} + format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); @@ -7741,52 +8938,56 @@ ins_pipe( pipe_slow ); %} -instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); - format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} + format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ int vector_len = 0; - __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsll4S(vecD dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 4); +instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); match(Set dst (LShiftVS dst shift)); - format %{ "psllw $dst,$shift\t! left shift packed4S" %} + effect(TEMP src); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ - __ psllw($dst$$XMMRegister, $shift$$XMMRegister); + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsll4S_imm(vecD dst, immI8 shift) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (LShiftVS dst shift)); - format %{ "psllw $dst,$shift\t! left shift packed4S" %} +instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ - __ psllw($dst$$XMMRegister, (int)$shift$$constant); + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); - format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} + format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ int vector_len = 0; - __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (LShiftVS src shift)); - format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} +instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (LShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); @@ -7794,62 +8995,66 @@ ins_pipe( pipe_slow ); %} -instruct vsll8S(vecX dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (LShiftVS dst shift)); - format %{ "psllw $dst,$shift\t! left shift packed8S" %} +instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ - __ psllw($dst$$XMMRegister, $shift$$XMMRegister); + int vector_len = 1; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsll8S_imm(vecX dst, immI8 shift) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (LShiftVS dst shift)); - format %{ "psllw $dst,$shift\t! left shift packed8S" %} +instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ - __ psllw($dst$$XMMRegister, (int)$shift$$constant); + int vector_len = 1; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); - match(Set dst (LShiftVS src shift)); - format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} +instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (LShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ - int vector_len = 0; + int vector_len = 1; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); match(Set dst (LShiftVS src shift)); - format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} + format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ - int vector_len = 0; + int vector_len = 1; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ int vector_len = 1; - __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); - match(Set dst (LShiftVS src shift)); +instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (LShiftVS dst shift)); + effect(TEMP src); format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ int vector_len = 1; @@ -7859,7 +9064,7 @@ %} instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} ins_encode %{ @@ -7870,7 +9075,7 @@ %} instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} ins_encode %{ @@ -8079,54 +9284,230 @@ match(Set dst (LShiftVL src shift)); format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} ins_encode %{ - int vector_len = 2; - __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + int vector_len = 2; + __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8); + match(Set dst (LShiftVL src shift)); + format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} + ins_encode %{ + int vector_len = 2; + __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// ----------------------- LogicalRightShift ----------------------------------- + +// Shorts vector logical right shift produces incorrect Java result +// for negative data because java code convert short value into int with +// sign extension before a shift. But char vectors are fine since chars are +// unsigned values. + +instruct vsrl2S(vecS dst, vecS shift) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 2); + match(Set dst (URShiftVS dst shift)); + format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} + ins_encode %{ + __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2S_imm(vecS dst, immI8 shift) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 2); + match(Set dst (URShiftVS dst shift)); + format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} + ins_encode %{ + __ psrlw($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (URShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (URShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4S(vecD dst, vecS shift) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); + match(Set dst (URShiftVS dst shift)); + format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} + ins_encode %{ + __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4S_imm(vecD dst, immI8 shift) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); + match(Set dst (URShiftVS dst shift)); + format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} + ins_encode %{ + __ psrlw($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (URShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 8); - match(Set dst (LShiftVL src shift)); - format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} +instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (URShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ins_encode %{ - int vector_len = 2; - __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} -// ----------------------- LogicalRightShift ----------------------------------- - -// Shorts vector logical right shift produces incorrect Java result -// for negative data because java code convert short value into int with -// sign extension before a shift. But char vectors are fine since chars are -// unsigned values. - -instruct vsrl2S(vecS dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 2); +instruct vsrl8S(vecX dst, vecS shift) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (URShiftVS dst shift)); - format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} + format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} ins_encode %{ __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vsrl2S_imm(vecS dst, immI8 shift) %{ - predicate(n->as_Vector()->length() == 2); +instruct vsrl8S_imm(vecX dst, immI8 shift) %{ + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (URShiftVS dst shift)); - format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} + format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} ins_encode %{ __ psrlw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} -instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); - format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); @@ -8134,52 +9515,56 @@ ins_pipe( pipe_slow ); %} -instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); - format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ int vector_len = 0; - __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsrl4S(vecD dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 4); +instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); match(Set dst (URShiftVS dst shift)); - format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} + effect(TEMP src); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ - __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsrl4S_imm(vecD dst, immI8 shift) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (URShiftVS dst shift)); - format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} +instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ - __ psrlw($dst$$XMMRegister, (int)$shift$$constant); + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); - format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ int vector_len = 0; - __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (URShiftVS src shift)); - format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} +instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (URShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); @@ -8187,62 +9572,66 @@ ins_pipe( pipe_slow ); %} -instruct vsrl8S(vecX dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (URShiftVS dst shift)); - format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} +instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ - __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); + int vector_len = 1; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsrl8S_imm(vecX dst, immI8 shift) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (URShiftVS dst shift)); - format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} +instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ - __ psrlw($dst$$XMMRegister, (int)$shift$$constant); + int vector_len = 1; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); - match(Set dst (URShiftVS src shift)); - format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} +instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (URShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ - int vector_len = 0; + int vector_len = 1; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); match(Set dst (URShiftVS src shift)); - format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ - int vector_len = 0; + int vector_len = 1; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ int vector_len = 1; - __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); - match(Set dst (URShiftVS src shift)); +instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (URShiftVS dst shift)); + effect(TEMP src); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ int vector_len = 1; @@ -8252,7 +9641,7 @@ %} instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} ins_encode %{ @@ -8263,7 +9652,7 @@ %} instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} ins_encode %{ @@ -8493,7 +9882,7 @@ // Shorts/Chars vector arithmetic right shift instruct vsra2S(vecS dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ @@ -8512,8 +9901,8 @@ ins_pipe( pipe_slow ); %} -instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ @@ -8523,8 +9912,42 @@ ins_pipe( pipe_slow ); %} -instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ @@ -8534,8 +9957,20 @@ ins_pipe( pipe_slow ); %} +instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsra4S(vecD dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ @@ -8545,7 +9980,7 @@ %} instruct vsra4S_imm(vecD dst, immI8 shift) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ @@ -8554,8 +9989,8 @@ ins_pipe( pipe_slow ); %} -instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ @@ -8565,8 +10000,42 @@ ins_pipe( pipe_slow ); %} -instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ @@ -8576,8 +10045,20 @@ ins_pipe( pipe_slow ); %} +instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsra8S(vecX dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 8); + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ @@ -8587,7 +10068,7 @@ %} instruct vsra8S_imm(vecX dst, immI8 shift) %{ - predicate(n->as_Vector()->length() == 8); + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ @@ -8596,8 +10077,8 @@ ins_pipe( pipe_slow ); %} -instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ @@ -8607,8 +10088,42 @@ ins_pipe( pipe_slow ); %} -instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ @@ -8618,8 +10133,31 @@ ins_pipe( pipe_slow ); %} -instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ins_encode %{ @@ -8629,8 +10167,31 @@ ins_pipe( pipe_slow ); %} -instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ins_encode %{ @@ -8640,8 +10201,20 @@ ins_pipe( pipe_slow ); %} +instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} ins_encode %{ @@ -8652,7 +10225,7 @@ %} instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} ins_encode %{ --- old/src/cpu/x86/vm/x86_32.ad 2015-11-07 22:07:26.723446700 -0800 +++ new/src/cpu/x86/vm/x86_32.ad 2015-11-07 22:07:26.494446700 -0800 @@ -291,9 +291,7 @@ size += 6; // fldcw } if (C->max_vector_size() > 16) { - if(UseAVX <= 2) { - size += 3; // vzeroupper - } + size += 3; // vzeroupper } return size; } @@ -1915,7 +1913,7 @@ if (stub == NULL) { ciEnv::current()->record_failure("CodeCache is full"); return; - } + } } %} --- old/src/cpu/x86/vm/x86_64.ad 2015-11-07 22:07:28.484446700 -0800 +++ new/src/cpu/x86/vm/x86_64.ad 2015-11-07 22:07:28.264446700 -0800 @@ -536,11 +536,7 @@ #define __ _masm. static int clear_avx_size() { - if(UseAVX > 2) { - return 0; // vzeroupper is ignored - } else { - return (Compile::current()->max_vector_size() > 16) ? 3 : 0; // vzeroupper - } + return (Compile::current()->max_vector_size() > 16) ? 3 : 0; // vzeroupper } // !!!!! Special hack to get all types of calls to specify the byte offset @@ -871,7 +867,7 @@ if (framesize > 0) { st->print("\n\t"); st->print("addq rbp, #%d", framesize); - } + } } } --- old/src/share/vm/opto/c2_globals.hpp 2015-11-07 22:07:30.235446700 -0800 +++ new/src/share/vm/opto/c2_globals.hpp 2015-11-07 22:07:30.022446700 -0800 @@ -186,9 +186,9 @@ "Maximum number of unrolls for main loop") \ range(0, max_jint) \ \ - product(bool, SuperWordLoopUnrollAnalysis, false, \ - "Map number of unrolls for main loop via " \ - "Superword Level Parallelism analysis") \ + product_pd(bool, SuperWordLoopUnrollAnalysis, \ + "Map number of unrolls for main loop via " \ + "Superword Level Parallelism analysis") \ \ notproduct(bool, TraceSuperWordLoopUnrollAnalysis, false, \ "Trace what Superword Level Parallelism analysis applies") \ --- old/src/share/vm/opto/matcher.hpp 2015-11-07 22:07:31.615446700 -0800 +++ new/src/share/vm/opto/matcher.hpp 2015-11-07 22:07:31.404446700 -0800 @@ -269,6 +269,10 @@ // should generate this one. static const bool match_rule_supported(int opcode); + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + static const bool match_rule_supported_vector(int opcode, int vlen); + // Some uarchs have different sized float register resources static const int float_pressure(int default_pressure_threshold); --- old/src/share/vm/opto/superword.cpp 2015-11-07 22:07:33.011446700 -0800 +++ new/src/share/vm/opto/superword.cpp 2015-11-07 22:07:32.795446700 -0800 @@ -2247,7 +2247,10 @@ NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte)); // For atomic unrolled loops which are vector mapped, instigate more unrolling. cl->set_notpassed_slp(); - C->set_major_progress(); + // if vector resources are limited, do not allow additional unrolling + if (FLOATPRESSURE > 8) { + C->set_major_progress(); + } cl->mark_do_unroll_only(); } } --- old/src/share/vm/opto/vectornode.cpp 2015-11-07 22:07:34.515446700 -0800 +++ new/src/share/vm/opto/vectornode.cpp 2015-11-07 22:07:34.304446700 -0800 @@ -188,7 +188,7 @@ (vlen > 1) && is_power_of_2(vlen) && Matcher::vector_size_supported(bt, vlen)) { int vopc = VectorNode::opcode(opc, bt); - return vopc > 0 && Matcher::match_rule_supported(vopc) && (vopc != Op_CMoveD || vlen == 4); + return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen); } return false; }