< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page

        

*** 1370,1387 **** --- 1370,1613 ---- static address float_signmask() { return (address)float_signmask_pool; } static address float_signflip() { return (address)float_signflip_pool; } static address double_signmask() { return (address)double_signmask_pool; } static address double_signflip() { return (address)double_signflip_pool; } #endif + static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } + static address vector_float_signmask() { return StubRoutines::x86::vector_float_sign_mask(); } + static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip(); } + static address vector_double_signmask() { return StubRoutines::x86::vector_double_sign_mask(); } + static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip(); } + static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } + static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } + + //============================================================================= + + + typedef void (MacroAssembler::*XX_Inst)(XMMRegister, XMMRegister); + typedef void (MacroAssembler::*XAR_Inst)(XMMRegister, AddressLiteral, Register); + typedef void (MacroAssembler::*XXI_Inst)(XMMRegister, XMMRegister, int); + typedef void (MacroAssembler::*XXAIR_Inst)(XMMRegister, XMMRegister, AddressLiteral, int, Register); + typedef void (MacroAssembler::*XXXI_Inst)(XMMRegister, XMMRegister, XMMRegister, int); + + XX_Inst get_xx_inst(int opcode) { + XX_Inst inst; + switch(opcode) { + case Op_RShiftVB: + case Op_RShiftVS: + return &MacroAssembler::psraw; + case Op_LShiftVB: + case Op_LShiftVS: + return &MacroAssembler::psllw; + case Op_URShiftVB: + case Op_URShiftVS: + return &MacroAssembler::psrlw; + case Op_RShiftVI: + return &MacroAssembler::psrad; + case Op_LShiftVI: + return &MacroAssembler::pslld; + case Op_URShiftVI: + return &MacroAssembler::psrld; + case Op_LShiftVL: + return &MacroAssembler::psllq; + case Op_RShiftVL: + case Op_URShiftVL: + return &MacroAssembler::psrlq; + default: + return NULL; + } + } + + XAR_Inst get_xar_inst(int opcode) { + XAR_Inst inst; + switch(opcode) { + case Op_AbsVF: + return &MacroAssembler::andps; + case Op_AbsVD: + return &MacroAssembler::andpd; + case Op_NegVF: + return &MacroAssembler::xorps; + case Op_NegVD: + return &MacroAssembler::xorpd; + default: + return NULL; + } + } + + XXAIR_Inst get_xxair_inst(int opcode) { + XXAIR_Inst inst; + switch(opcode) { + case Op_AbsVF: + return &MacroAssembler::vandps; + case Op_AbsVD: + return &MacroAssembler::vandpd; + case Op_NegVF: + return &MacroAssembler::vxorps; + case Op_NegVD: + return &MacroAssembler::vxorpd; + default: + return NULL; + } + } + + XXXI_Inst get_xxxi_inst(int opcode) { + XXXI_Inst inst; + switch(opcode) { + case Op_RShiftVB: + case Op_RShiftVS: + return &MacroAssembler::vpsraw; + case Op_LShiftVB: + case Op_LShiftVS: + return &MacroAssembler::vpsllw; + case Op_URShiftVB: + case Op_URShiftVS: + return &MacroAssembler::vpsrlw; + case Op_RShiftVI: + return &MacroAssembler::vpsrad; + case Op_LShiftVI: + return &MacroAssembler::vpslld; + case Op_URShiftVI: + return &MacroAssembler::vpsrld; + case Op_RShiftVL: + return &MacroAssembler::evpsraq; + case Op_LShiftVL: + return &MacroAssembler::vpsllq; + case Op_URShiftVL: + return &MacroAssembler::vpsrlq; + default: + return NULL; + } + } + + XX_Inst get_extend_inst(bool sign) { + XX_Inst inst; + if (sign) + inst = &MacroAssembler::pmovsxbw; + else + inst = &MacroAssembler::pmovzxbw; + return inst; + } + + XXI_Inst get_avx_extend_inst(bool sign) { + XXI_Inst inst; + if (sign) + inst = &MacroAssembler::vpmovsxbw; + else + inst = &MacroAssembler::vpmovzxbw; + return inst; + } + + AddressLiteral get_mask(int opcode) { + switch(opcode) { + case Op_AbsVF: + return ExternalAddress(vector_float_signmask()); + case Op_AbsVD: + return ExternalAddress(vector_double_signmask()); + case Op_NegVF: + return ExternalAddress(vector_float_signflip()); + case Op_NegVD: + return ExternalAddress(vector_double_signflip()); + default: + return ExternalAddress(vector_double_signflip()); + } + } + // need a scratch register to load mask TBD + void emit_vshift4Bor8B_code(MacroAssembler& _masm, int opcode, XMMRegister dst, + XMMRegister src, XMMRegister shift, + XMMRegister tmp, Register scratch) { + XX_Inst extendinst = get_extend_inst(opcode == Op_URShiftVB ? false : true); + XX_Inst shiftinst = get_xx_inst(opcode); + + (_masm.*extendinst)(tmp, src); + (_masm.*shiftinst)(tmp, shift); + __ movdqu(dst, ExternalAddress(vector_short_to_byte_mask()), scratch); + __ pand(dst, tmp); + __ packuswb(dst, dst); + } + + // need a scratch register to load mask TBD + void emit_vshift16B_code(MacroAssembler& _masm, int opcode, XMMRegister dst, + XMMRegister src, XMMRegister shift, + XMMRegister tmp1, XMMRegister tmp2, Register scratch) { + XX_Inst extendinst = get_extend_inst(opcode == Op_URShiftVB ? false : true); + XX_Inst shiftinst = get_xx_inst(opcode); + + (_masm.*extendinst)(tmp1, src); + (_masm.*shiftinst)(tmp1, shift); + __ pshufd(tmp2, src, 0xE); + (_masm.*extendinst)(tmp2, tmp2); + (_masm.*shiftinst)(tmp2, shift); + __ movdqu(dst, ExternalAddress(vector_short_to_byte_mask()), scratch); + __ pand(tmp2, dst); + __ pand(dst, tmp1); + __ packuswb(dst, tmp2); + } + void emit_vshift16B_avx_code(MacroAssembler& _masm, int opcode, XMMRegister dst, + XMMRegister src, XMMRegister shift, + XMMRegister tmp, Register scratch) { + XXI_Inst extendinst = get_avx_extend_inst(opcode == Op_URShiftVB ? false : true); + XXXI_Inst shiftinst = get_xxxi_inst(opcode); + + int vector_len = 1; + (_masm.*extendinst)(tmp, src, vector_len); + (_masm.*shiftinst)(tmp, tmp, shift, vector_len); + __ vpand(tmp, tmp, ExternalAddress(vector_short_to_byte_mask()), vector_len, scratch); + __ vextracti128_high(dst, tmp); + __ vpackuswb(dst, tmp, dst, 0); + } + + void emit_vshift32B_avx_code(MacroAssembler& _masm, int opcode, XMMRegister dst, + XMMRegister src, XMMRegister shift, + XMMRegister tmp, Register scratch) { + XXI_Inst extendinst = get_avx_extend_inst(opcode == Op_URShiftVB ? false : true); + XXXI_Inst shiftinst = get_xxxi_inst(opcode); + + int vector_len = 1; + __ vextracti128_high(tmp, src); + (_masm.*extendinst)(tmp, tmp, vector_len); + (_masm.*extendinst)(dst, src, vector_len); + (_masm.*shiftinst)(tmp, tmp, shift, vector_len); + (_masm.*shiftinst)(dst, dst, shift, vector_len); + __ vpand(tmp, tmp, ExternalAddress(vector_short_to_byte_mask()), vector_len, scratch); + __ vpand(dst, dst, ExternalAddress(vector_short_to_byte_mask()), vector_len, scratch); + __ vpackuswb(dst, dst, tmp, vector_len); + __ vpermq(dst, dst, 0xD8, vector_len); + } + + void emit_vshift64B_avx_code(MacroAssembler& _masm, int opcode, XMMRegister dst, + XMMRegister src, XMMRegister shift, + XMMRegister tmp1, XMMRegister tmp2, Register scratch) { + XXI_Inst extendinst = get_avx_extend_inst(opcode == Op_URShiftVB ? false : true); + XXXI_Inst shiftinst = get_xxxi_inst(opcode); + + int vector_len = 2; + __ vextracti64x4(tmp1, src, 1); + (_masm.*extendinst)(tmp1, tmp1, vector_len); + (_masm.*extendinst)(tmp2, src, vector_len); + (_masm.*shiftinst)(tmp1, tmp1, shift, vector_len); + (_masm.*shiftinst)(tmp2, tmp2, shift, vector_len); + __ vmovdqu(dst, ExternalAddress(vector_short_to_byte_mask()), scratch); + __ vpbroadcastd(dst, dst, vector_len); + __ vpand(tmp1, tmp1, dst, vector_len); + __ vpand(tmp2, tmp2, dst, vector_len); + __ vpackuswb(dst, tmp1, tmp2, vector_len); + __ evmovdquq(tmp2, ExternalAddress(vector_byte_perm_mask()), vector_len, scratch); + __ vpermq(dst, tmp2, dst, vector_len); + } + + //============================================================================= const bool Matcher::match_rule_supported(int opcode) { if (!has_match_rule(opcode)) return false; bool ret_value = true; switch (opcode) { + case Op_AbsVL: + if (UseAVX < 3) + ret_value = false; case Op_PopCountI: case Op_PopCountL: if (!UsePopCountInstruction) ret_value = false; break;
*** 1400,1409 **** --- 1626,1638 ---- break; case Op_AddReductionVL: if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here ret_value = false; break; + case Op_AbsVB: + case Op_AbsVS: + case Op_AbsVI: case Op_AddReductionVI: if (UseSSE < 3) // requires at least SSE3 ret_value = false; break; case Op_MulReductionVI:
*** 1445,1457 **** --- 1674,1696 ---- case Op_OnSpinWait: if (VM_Version::supports_on_spin_wait() == false) ret_value = false; break; case Op_MulAddVS2VI: + case Op_RShiftVL: + case Op_AbsVD: + case Op_NegVD: if (UseSSE < 2) ret_value = false; break; + case Op_MulVB: + case Op_LShiftVB: + case Op_RShiftVB: + case Op_URShiftVB: + if (UseSSE < 4) + ret_value = false; + break; #ifdef _LP64 case Op_MaxD: case Op_MaxF: case Op_MinD: case Op_MinF:
*** 1468,1495 **** // identify extra cases that we might want to provide match rules for // e.g. Op_ vector nodes and other intrinsics while guarding with vlen bool ret_value = match_rule_supported(opcode); if (ret_value) { switch (opcode) { case Op_AddVB: case Op_SubVB: if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) ret_value = false; break; ! case Op_URShiftVS: ! case Op_RShiftVS: ! case Op_LShiftVS: ! case Op_MulVS: case Op_AddVS: case Op_SubVS: if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) ret_value = false; break; case Op_CMoveVF: if (vlen != 8) ret_value = false; break; case Op_CMoveVD: if (vlen != 4) ret_value = false; break; } --- 1707,1752 ---- // identify extra cases that we might want to provide match rules for // e.g. Op_ vector nodes and other intrinsics while guarding with vlen bool ret_value = match_rule_supported(opcode); if (ret_value) { switch (opcode) { + case Op_AbsVB: case Op_AddVB: case Op_SubVB: if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) ret_value = false; break; ! case Op_AbsVS: case Op_AddVS: case Op_SubVS: + case Op_MulVS: + case Op_LShiftVS: + case Op_RShiftVS: + case Op_URShiftVS: if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) ret_value = false; break; + case Op_MulVB: + case Op_LShiftVB: + case Op_RShiftVB: + case Op_URShiftVB: + if ((vlen == 32 && UseAVX < 2) || + ((vlen == 64) && (VM_Version::supports_avx512bw() == false))) + ret_value = false; + break; + case Op_NegVF: + if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) + ret_value = false; + break; case Op_CMoveVF: if (vlen != 8) ret_value = false; break; + case Op_NegVD: + if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) + ret_value = false; + break; case Op_CMoveVD: if (vlen != 4) ret_value = false; break; }
*** 7300,7309 **** --- 7557,7746 ---- ins_pipe( pipe_slow ); %} // --------------------------------- MUL -------------------------------------- + // Byte vector mul + instruct mul4B_reg(vecS dst, vecS src1, vecS src2, vecS tmp, rRegI scratch) %{ + predicate(UseSSE > 3 && n->as_Vector()->length() == 4); + match(Set dst (MulVB src1 src2)); + effect(TEMP dst, TEMP tmp, TEMP scratch); + format %{"pmovsxbw $tmp,$src1\n\t" + "pmovsxbw $dst,$src2\n\t" + "pmullw $tmp,$dst\n\t" + "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" + "pand $dst,$tmp\n\t" + "packuswb $dst,$dst\t! mul packed4B" %} + ins_encode %{ + __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); + __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); + __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); + __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ pand($dst$$XMMRegister, $tmp$$XMMRegister); + __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct mul8B_reg(vecD dst, vecD src1, vecD src2, vecD tmp, rRegI scratch) %{ + predicate(UseSSE > 3 && n->as_Vector()->length() == 8); + match(Set dst (MulVB src1 src2)); + effect(TEMP dst, TEMP tmp, TEMP scratch); + format %{"pmovsxbw $tmp,$src1\n\t" + "pmovsxbw $dst,$src2\n\t" + "pmullw $tmp,$dst\n\t" + "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" + "pand $dst,$tmp\n\t" + "packuswb $dst,$dst\t! mul packed8B" %} + ins_encode %{ + __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); + __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); + __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); + __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ pand($dst$$XMMRegister, $tmp$$XMMRegister); + __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct mul16B_reg(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2, rRegI scratch) %{ + predicate(UseSSE > 3 && n->as_Vector()->length() == 16); + match(Set dst (MulVB src1 src2)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); + format %{"pmovsxbw $tmp1,$src1\n\t" + "pmovsxbw $tmp2,$src2\n\t" + "pmullw $tmp1,$tmp2\n\t" + "pshufd $tmp2,$src1,0xEE\n\t" + "pshufd $dst,$src2,0xEE\n\t" + "pmovsxbw $tmp2,$tmp2\n\t" + "pmovsxbw $dst,$dst\n\t" + "pmullw $tmp2,$dst\n\t" + "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" + "pand $tmp2,$dst\n\t" + "pand $dst,$tmp1\n\t" + "packuswb $dst,$tmp2\t! mul packed16B" %} + ins_encode %{ + __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister); + __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); + __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE); + __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE); + __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); + __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); + __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); + __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); + __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); + __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct vmul16B_reg_avx(vecX dst, vecX src1, vecX src2, vecX tmp, rRegI scratch) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); + match(Set dst (MulVB src1 src2)); + effect(TEMP dst, TEMP tmp, TEMP scratch); + format %{"vpmovsxbw $tmp,$src1\n\t" + "vpmovsxbw $dst,$src2\n\t" + "vpmullw $tmp,$tmp,$dst\n\t" + "vmovdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" + "vpand $dst,$dst,$tmp\n\t" + "vextracti128_high $tmp,$dst\n\t" + "vpackuswb $dst,$dst,$dst\n\t! mul packed16B" %} + ins_encode %{ + int vector_len = 1; + __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len); + __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); + __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); + %} + ins_pipe( pipe_slow ); + %} + + instruct vmul32B_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2, rRegI scratch) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 32); + match(Set dst (MulVB src1 src2)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); + format %{"vextracti128_high $tmp1,$src1\n\t" + "vextracti128_high $dst,$src2\n\t" + "vpmovsxbw $tmp1,$tmp1\n\t" + "vpmovsxbw $dst,$dst\n\t" + "vpmullw $tmp1,$tmp1,$dst\n\t" + "vpmovsxbw $tmp2,$src1\n\t" + "vpmovsxbw $dst,$src2\n\t" + "vpmullw $tmp2,$tmp2,$dst\n\t" + "vmovdqu $dst, [0x00ff00ff0x00ff00ff]\n\t" + "vpbroadcastd $dst, $dst\n\t" + "vpand $tmp1,$tmp1,$dst\n\t" + "vpand $dst,$dst,$tmp2\n\t" + "vpackuswb $dst,$dst,$tmp1\n\t" + "vpermq $dst, $dst, 0xD8\t! mul packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); + __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister); + __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); + __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); + __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); + __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vector_len); + __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); + %} + ins_pipe( pipe_slow ); + %} + + instruct vmul64B_reg_avx(vecZ dst, vecZ src1, vecZ src2, vecZ tmp1, vecZ tmp2, rRegI scratch) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 64); + match(Set dst (MulVB src1 src2)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); + format %{"vextracti64x4_high $tmp1,$src1\n\t" + "vextracti64x4_high $dst,$src2\n\t" + "vpmovsxbw $tmp1,$tmp1\n\t" + "vpmovsxbw $dst,$dst\n\t" + "vpmullw $tmp1,$tmp1,$dst\n\t" + "vpmovsxbw $tmp2,$src1\n\t" + "vpmovsxbw $dst,$src2\n\t" + "vpmullw $tmp2,$tmp2,$dst\n\t" + "vmovdqu $dst, [0x00ff00ff0x00ff00ff]\n\t" + "vpbroadcastd $dst, $dst\n\t" + "vpand $tmp1,$tmp1,$dst\n\t" + "vpand $tmp2,$tmp2,$dst\n\t" + "vpackuswb $dst,$tmp1,$tmp2\n\t" + "evmovdquq $tmp2,[0x0604020007050301]\n\t" + "vpermq $dst,$tmp2,$dst,0x01\t! mul packed64B" %} + + ins_encode %{ + int vector_len = 2; + __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); + __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister); + __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); + __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); + __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); + __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); + __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); + + %} + ins_pipe( pipe_slow ); + %} + // Shorts/Chars vector mul instruct vmul2S(vecS dst, vecS src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (MulVS dst src)); format %{ "pmullw $dst,$src\t! mul packed2S" %}
*** 8022,8045 **** __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} - // ------------------------------ Shift --------------------------------------- - - // Left and right shift count vectors are the same on x86 - // (only lowest bits of xmm reg are used for count). - instruct vshiftcnt(vecS dst, rRegI cnt) %{ - match(Set dst (LShiftCntV cnt)); - match(Set dst (RShiftCntV cnt)); - format %{ "movd $dst,$cnt\t! load shift count" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $cnt$$Register); - %} - ins_pipe( pipe_slow ); - %} - // --------------------------------- Sqrt -------------------------------------- // Floating point vector sqrt instruct vsqrt2D_reg(vecX dst, vecX src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); --- 8459,8468 ----
*** 8193,9308 **** __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! // ------------------------------ LeftShift ----------------------------------- ! ! // Shorts/Chars vector left shift ! instruct vsll2S(vecS dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed2S" %} ! ins_encode %{ ! __ psllw($dst$$XMMRegister, $shift$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll2S_imm(vecS dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed2S" %} ! ins_encode %{ ! __ psllw($dst$$XMMRegister, (int)$shift$$constant); ! %} ! ins_pipe( pipe_slow ); ! %} ! instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll4S(vecD dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed4S" %} ins_encode %{ ! __ psllw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsll4S_imm(vecD dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed4S" %} ins_encode %{ ! __ psllw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll8S(vecX dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 8); ! match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed8S" %} ! ins_encode %{ ! __ psllw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsll8S_imm(vecX dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 8); ! match(Set dst (LShiftVS dst shift)); ! format %{ "psllw $dst,$shift\t! left shift packed8S" %} ins_encode %{ ! __ psllw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ int vector_len = 1; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} ins_encode %{ int vector_len = 2; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} ins_encode %{ ! int vector_len = 2; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! // Integers vector left shift ! instruct vsll2I(vecD dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVI dst shift)); ! format %{ "pslld $dst,$shift\t! left shift packed2I" %} ins_encode %{ ! __ pslld($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsll2I_imm(vecD dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVI dst shift)); ! format %{ "pslld $dst,$shift\t! left shift packed2I" %} ins_encode %{ ! __ pslld($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVI src shift)); ! format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} ins_encode %{ ! int vector_len = 0; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVI src shift)); ! format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} ins_encode %{ int vector_len = 0; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll4I(vecX dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVI dst shift)); ! format %{ "pslld $dst,$shift\t! left shift packed4I" %} ins_encode %{ ! __ pslld($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsll4I_imm(vecX dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVI dst shift)); ! format %{ "pslld $dst,$shift\t! left shift packed4I" %} ins_encode %{ ! __ pslld($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} ! instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVI src shift)); ! format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} ins_encode %{ ! int vector_len = 0; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVI src shift)); ! format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} ins_encode %{ int vector_len = 0; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 8); ! match(Set dst (LShiftVI src shift)); ! format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} ins_encode %{ int vector_len = 1; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 8); ! match(Set dst (LShiftVI src shift)); ! format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} ins_encode %{ int vector_len = 1; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (LShiftVI src shift)); ! format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} ins_encode %{ ! int vector_len = 2; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (LShiftVI src shift)); ! format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} ins_encode %{ ! int vector_len = 2; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // Longs vector left shift ! instruct vsll2L(vecX dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVL dst shift)); ! format %{ "psllq $dst,$shift\t! left shift packed2L" %} ! ins_encode %{ ! __ psllq($dst$$XMMRegister, $shift$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll2L_imm(vecX dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVL dst shift)); ! format %{ "psllq $dst,$shift\t! left shift packed2L" %} ! ins_encode %{ ! __ psllq($dst$$XMMRegister, (int)$shift$$constant); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVL src shift)); ! format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (LShiftVL src shift)); ! format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVL src shift)); ! format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVL src shift)); ! format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (LShiftVL src shift)); ! format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (LShiftVL src shift)); ! format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // ----------------------- LogicalRightShift ----------------------------------- ! ! // Shorts vector logical right shift produces incorrect Java result ! // for negative data because java code convert short value into int with ! // sign extension before a shift. But char vectors are fine since chars are ! // unsigned values. ! ! instruct vsrl2S(vecS dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} ! ins_encode %{ ! __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl2S_imm(vecS dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} ! ins_encode %{ ! __ psrlw($dst$$XMMRegister, (int)$shift$$constant); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl4S(vecD dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} ! ins_encode %{ ! __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl4S_imm(vecD dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} ! ins_encode %{ ! __ psrlw($dst$$XMMRegister, (int)$shift$$constant); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl8S(vecX dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 8); ! match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} ! ins_encode %{ ! __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl8S_imm(vecX dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 8); ! match(Set dst (URShiftVS dst shift)); ! format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} ! ins_encode %{ ! __ psrlw($dst$$XMMRegister, (int)$shift$$constant); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // Integers vector logical right shift ! instruct vsrl2I(vecD dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVI dst shift)); ! format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} ! ins_encode %{ ! __ psrld($dst$$XMMRegister, $shift$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl2I_imm(vecD dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVI dst shift)); ! format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} ! ins_encode %{ ! __ psrld($dst$$XMMRegister, (int)$shift$$constant); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVI src shift)); ! format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVI src shift)); ! format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl4I(vecX dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVI dst shift)); ! format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} ! ins_encode %{ ! __ psrld($dst$$XMMRegister, $shift$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl4I_imm(vecX dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVI dst shift)); ! format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} ! ins_encode %{ ! __ psrld($dst$$XMMRegister, (int)$shift$$constant); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVI src shift)); ! format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVI src shift)); ! format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 8); ! match(Set dst (URShiftVI src shift)); ! format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 8); ! match(Set dst (URShiftVI src shift)); ! format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (URShiftVI src shift)); ! format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (URShiftVI src shift)); ! format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // Longs vector logical right shift ! instruct vsrl2L(vecX dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVL dst shift)); ! format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} ! ins_encode %{ ! __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl2L_imm(vecX dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVL dst shift)); ! format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} ! ins_encode %{ ! __ psrlq($dst$$XMMRegister, (int)$shift$$constant); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVL src shift)); ! format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (URShiftVL src shift)); ! format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVL src shift)); ! format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 4); ! match(Set dst (URShiftVL src shift)); ! format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (URShiftVL src shift)); ! format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (URShiftVL src shift)); ! format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // ------------------- ArithmeticRightShift ----------------------------------- ! ! // Shorts/Chars vector arithmetic right shift ! instruct vsra2S(vecS dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (RShiftVS dst shift)); ! format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} ! ins_encode %{ ! __ psraw($dst$$XMMRegister, $shift$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra2S_imm(vecS dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (RShiftVS dst shift)); ! format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} ! ins_encode %{ ! __ psraw($dst$$XMMRegister, (int)$shift$$constant); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra4S(vecD dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (RShiftVS dst shift)); ! format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} ! ins_encode %{ ! __ psraw($dst$$XMMRegister, $shift$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra4S_imm(vecD dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (RShiftVS dst shift)); ! format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} ! ins_encode %{ ! __ psraw($dst$$XMMRegister, (int)$shift$$constant); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra8S(vecX dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 8); ! match(Set dst (RShiftVS dst shift)); ! format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} ! ins_encode %{ ! __ psraw($dst$$XMMRegister, $shift$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra8S_imm(vecX dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 8); ! match(Set dst (RShiftVS dst shift)); ! format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} ! ins_encode %{ ! __ psraw($dst$$XMMRegister, (int)$shift$$constant); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 8); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // Integers vector arithmetic right shift ! instruct vsra2I(vecD dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (RShiftVI dst shift)); ! format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} ! ins_encode %{ ! __ psrad($dst$$XMMRegister, $shift$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra2I_imm(vecD dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 2); ! match(Set dst (RShiftVI dst shift)); ! format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} ! ins_encode %{ ! __ psrad($dst$$XMMRegister, (int)$shift$$constant); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (RShiftVI src shift)); ! format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 2); ! match(Set dst (RShiftVI src shift)); ! format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra4I(vecX dst, vecS shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (RShiftVI dst shift)); ! format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} ! ins_encode %{ ! __ psrad($dst$$XMMRegister, $shift$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra4I_imm(vecX dst, immI8 shift) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length() == 4); ! match(Set dst (RShiftVI dst shift)); ! format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} ! ins_encode %{ ! __ psrad($dst$$XMMRegister, (int)$shift$$constant); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (RShiftVI src shift)); ! format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 4); ! match(Set dst (RShiftVI src shift)); ! format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 8); ! match(Set dst (RShiftVI src shift)); ! format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 8); ! match(Set dst (RShiftVI src shift)); ! format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (RShiftVI src shift)); ! format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (RShiftVI src shift)); ! format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // There are no longs vector arithmetic right shift instructions. ! ! ! // --------------------------------- AND -------------------------------------- ! ! instruct vand4B(vecS dst, vecS src) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); ! match(Set dst (AndV dst src)); ! format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} ! ins_encode %{ ! __ pand($dst$$XMMRegister, $src$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); ! match(Set dst (AndV src1 src2)); ! format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ --- 8616,9069 ---- __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} ! // ------------------------------ Shift --------------------------------------- ! // Left and right shift count vectors are the same on x86 ! // (only lowest bits of xmm reg are used for count). ! instruct vshiftcnt(vecS dst, rRegI cnt) %{ ! match(Set dst (LShiftCntV cnt)); ! match(Set dst (RShiftCntV cnt)); ! format %{ "movdl $dst,$cnt\t! load shift count" %} ins_encode %{ ! __ movdl($dst$$XMMRegister, $cnt$$Register); %} ins_pipe( pipe_slow ); %} ! instruct vshiftcntimm(vecS dst, immI8 cnt, rRegI tmp) %{ ! match(Set dst cnt); ! effect(TEMP tmp); ! format %{ "movl $tmp,$cnt\t" ! "movdl $dst,$tmp\t! load shift count" %} ins_encode %{ ! __ movl($tmp$$Register, $cnt$$constant); ! __ movdl($dst$$XMMRegister, $tmp$$Register); %} ins_pipe( pipe_slow ); %} ! // Byte vector shift ! instruct vshift4B(vecS dst, vecS src, vecS shift, vecS tmp, rRegI scratch) %{ ! predicate(UseSSE > 3 && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVB src shift)); ! match(Set dst (RShiftVB src shift)); ! match(Set dst (URShiftVB src shift)); ! effect(TEMP dst, TEMP tmp, TEMP scratch); ! format %{"pmovxbw $tmp,$src\n\t" ! "shiftop $tmp,$shift\n\t" ! "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" ! "pand $dst,$tmp\n\t" ! "packuswb $dst,$dst\n\t ! packed4B shift" %} ins_encode %{ ! emit_vshift4Bor8B_code(_masm, this->as_Mach()->ideal_Opcode() , $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, $tmp$$XMMRegister, $scratch$$Register); %} ins_pipe( pipe_slow ); %} ! instruct vshift8B(vecD dst, vecD src, vecS shift, vecD tmp, rRegI scratch) %{ ! predicate(UseSSE > 3 && n->as_Vector()->length() == 8); ! match(Set dst (LShiftVB src shift)); ! match(Set dst (RShiftVB src shift)); ! match(Set dst (URShiftVB src shift)); ! effect(TEMP dst, TEMP tmp, TEMP scratch); ! format %{"pmovxbw $tmp,$src\n\t" ! "shiftop $tmp,$shift\n\t" ! "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" ! "pand $dst,$tmp\n\t" ! "packuswb $dst,$dst\n\t ! packed8B shift" %} ins_encode %{ ! emit_vshift4Bor8B_code(_masm, this->as_Mach()->ideal_Opcode() , $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, $tmp$$XMMRegister, $scratch$$Register); %} ins_pipe( pipe_slow ); %} ! instruct vshift16B(vecX dst, vecX src, vecS shift, vecX tmp1, vecX tmp2, rRegI scratch) %{ ! predicate(UseSSE > 3 && UseAVX <= 1 && n->as_Vector()->length() == 16); ! match(Set dst (LShiftVB src shift)); ! match(Set dst (RShiftVB src shift)); ! match(Set dst (URShiftVB src shift)); ! effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); ! format %{"pmovxbw $tmp1,$src\n\t" ! "shiftop $tmp1,$shift\n\t" ! "pshufd $tmp2,$src\n\t" ! "pmovxbw $tmp2,$tmp2\n\t" ! "shiftop $tmp2,$shift\n\t" ! "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" ! "pand $tmp2,$dst\n\t" ! "pand $dst,$tmp1\n\t" ! "packuswb $dst,$tmp2\n\t! packed16B shift" %} ins_encode %{ ! emit_vshift16B_code(_masm, this->as_Mach()->ideal_Opcode() , $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, $scratch$$Register); %} ins_pipe( pipe_slow ); %} ! instruct vshift16B_avx(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 16); ! match(Set dst (LShiftVB src shift)); ! match(Set dst (RShiftVB src shift)); ! match(Set dst (URShiftVB src shift)); ! effect(TEMP dst, TEMP tmp, TEMP scratch); ! format %{"vpmovxbw $tmp,$src\n\t" ! "shiftop $tmp,$tmp,$shift\n\t" ! "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" ! "vextracti128_high $dst,$tmp\n\t" ! "vpackuswb $dst,$tmp,$dst\n\t! packed16B shift" %} ins_encode %{ ! emit_vshift16B_avx_code(_masm, this->as_Mach()->ideal_Opcode() , $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, $tmp$$XMMRegister, $scratch$$Register); %} ins_pipe( pipe_slow ); %} ! instruct vshift32B_avx(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 32); ! match(Set dst (LShiftVB src shift)); ! match(Set dst (RShiftVB src shift)); ! match(Set dst (URShiftVB src shift)); ! effect(TEMP dst, TEMP tmp, TEMP scratch); ! format %{"vextracti128_high $tmp,$src\n\t" ! "vpmovxbw $tmp,$tmp\n\t" ! "vpmovxbw $dst,$src\n\t" ! "shiftop $tmp,$tmp,$shift\n\t" ! "shiftop $dst,$dst,$shift\n\t" ! "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" ! "vpand $dst,$dst,[0x00ff00ff0x00ff00ff]\n\t" ! "vpackuswb $dst,$dst,$tmp\n\t" ! "vpermq $dst,$dst,0xD8\n\t! packed32B shift" %} ! ins_encode %{ ! emit_vshift32B_avx_code(_masm, this->as_Mach()->ideal_Opcode() , $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, $tmp$$XMMRegister, $scratch$$Register); %} ins_pipe( pipe_slow ); %} ! instruct vshift64B_avx(vecZ dst, vecZ src, vecS shift, vecZ tmp1, vecZ tmp2, rRegI scratch) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 64); ! match(Set dst (LShiftVB src shift)); ! match(Set dst (RShiftVB src shift)); ! match(Set dst (URShiftVB src shift)); ! effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); ! format %{"vextracti64x4 $tmp1,$src\n\t" ! "vpmovxbw $tmp1,$tmp1\n\t" ! "vpmovxbw $tmp2,$src\n\t" ! "shiftop $tmp1,$tmp1,$shift\n\t" ! "shiftop $tmp2,$tmp2,$shift\n\t" ! "vmovdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" ! "vpbroadcastd $dst,$dst\n\t" ! "vpand $tmp1,$tmp1,$dst\n\t" ! "vpand $tmp2,$tmp2,$dst\n\t" ! "vpackuswb $dst,$tmp1,$tmp2\n\t" ! "evmovdquq $tmp2, [0x0604020007050301]\n\t" ! "vpermq $dst,$tmp2,$dst\n\t! packed64B shift" %} ins_encode %{ ! emit_vshift64B_avx_code(_masm, this->as_Mach()->ideal_Opcode() , $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, $scratch$$Register); %} ins_pipe( pipe_slow ); %} ! // Shorts vector logical right shift produces incorrect Java result ! // for negative data because java code convert short value into int with ! // sign extension before a shift. But char vectors are fine since chars are ! // unsigned values. ! // Shorts/Chars vector left shift ! instruct vshist2S(vecS dst, vecS src, vecS shift) %{ ! predicate(n->as_Vector()->length() == 2); match(Set dst (LShiftVS src shift)); ! match(Set dst (RShiftVS src shift)); ! match(Set dst (URShiftVS src shift)); ! format %{ "shiftop $dst,$src,$shift\t! shift packed2S" %} ins_encode %{ + if (UseAVX == 0) { + XX_Inst shiftinst = get_xx_inst(this->as_Mach()->ideal_Opcode()); + if ($dst$$XMMRegister != $src$$XMMRegister) + __ movflt($dst$$XMMRegister, $src$$XMMRegister); + (_masm.*shiftinst)($dst$$XMMRegister, $shift$$XMMRegister); + } else { int vector_len = 0; ! XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); ! (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! } %} ins_pipe( pipe_slow ); %} ! instruct vshift4S(vecD dst, vecD src, vecS shift) %{ ! predicate(n->as_Vector()->length() == 4); match(Set dst (LShiftVS src shift)); ! match(Set dst (RShiftVS src shift)); ! match(Set dst (URShiftVS src shift)); ! format %{ "shiftop $dst,$src,$shift\t! shift packed4S" %} ins_encode %{ + if (UseAVX == 0) { + XX_Inst shiftinst = get_xx_inst(this->as_Mach()->ideal_Opcode()); + if ($dst$$XMMRegister != $src$$XMMRegister) + __ movdbl($dst$$XMMRegister, $src$$XMMRegister); + (_masm.*shiftinst)($dst$$XMMRegister, $shift$$XMMRegister); + + } else { int vector_len = 0; ! XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); ! (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! } %} ins_pipe( pipe_slow ); %} ! instruct vshift8S(vecX dst, vecX src, vecS shift) %{ ! predicate(n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); ! match(Set dst (RShiftVS src shift)); ! match(Set dst (URShiftVS src shift)); ! format %{ "shiftop $dst,$src,$shift\t! shift packed8S" %} ins_encode %{ ! if (UseAVX == 0) { ! XX_Inst shiftinst = get_xx_inst(this->as_Mach()->ideal_Opcode()); ! if ($dst$$XMMRegister != $src$$XMMRegister) ! __ movdqu($dst$$XMMRegister, $src$$XMMRegister); ! (_masm.*shiftinst)($dst$$XMMRegister, $shift$$XMMRegister); ! } else { ! int vector_len = 0; ! XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); ! (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! } %} ins_pipe( pipe_slow ); %} ! instruct vshift16S(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (LShiftVS src shift)); ! match(Set dst (RShiftVS src shift)); ! match(Set dst (URShiftVS src shift)); ! format %{ "shiftop $dst,$src,$shift\t! shift packed16S" %} ins_encode %{ int vector_len = 1; ! XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); ! (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vshift32S(vecZ dst, vecZ src, vecS shift) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (LShiftVS src shift)); ! match(Set dst (RShiftVS src shift)); ! match(Set dst (URShiftVS src shift)); ! format %{ "shiftop $dst,$src,$shift\t! shift packed32S" %} ins_encode %{ int vector_len = 2; ! XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); ! (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! // Integers vector left shift ! instruct vshift2I(vecD dst, vecD src, vecS shift) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (LShiftVI src shift)); ! match(Set dst (RShiftVI src shift)); ! match(Set dst (URShiftVI src shift)); ! format %{ "shiftop $dst,$src,$shift\t! shift packed2I" %} ins_encode %{ ! if (UseAVX == 0) { ! XX_Inst shiftinst = get_xx_inst(this->as_Mach()->ideal_Opcode()); ! if ($dst$$XMMRegister != $src$$XMMRegister) ! __ movdbl($dst$$XMMRegister, $src$$XMMRegister); ! (_masm.*shiftinst)($dst$$XMMRegister, $shift$$XMMRegister); ! } else { ! int vector_len = 0; ! XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); ! (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! } %} ins_pipe( pipe_slow ); %} ! instruct vshift4I(vecX dst, vecX src, vecS shift) %{ ! predicate(n->as_Vector()->length() == 4); ! match(Set dst (LShiftVI src shift)); ! match(Set dst (RShiftVI src shift)); ! match(Set dst (URShiftVI src shift)); ! format %{ "shiftop $dst,$src,$shift\t! shift packed4I" %} ins_encode %{ ! if (UseAVX == 0) { ! XX_Inst shiftinst = get_xx_inst(this->as_Mach()->ideal_Opcode()); ! if ($dst$$XMMRegister != $src$$XMMRegister) ! __ movdqu($dst$$XMMRegister, $src$$XMMRegister); ! (_masm.*shiftinst)($dst$$XMMRegister, $shift$$XMMRegister); ! } else { ! int vector_len = 0; ! XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); ! (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! } %} ins_pipe( pipe_slow ); %} ! instruct vshift8I(vecY dst, vecY src, vecS shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 8); ! match(Set dst (LShiftVI src shift)); ! match(Set dst (RShiftVI src shift)); ! match(Set dst (URShiftVI src shift)); ! format %{ "shiftop $dst,$src,$shift\t! shift packed8I" %} ins_encode %{ ! int vector_len = 1; ! XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); ! (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vshift16I(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (LShiftVI src shift)); ! match(Set dst (RShiftVI src shift)); ! match(Set dst (URShiftVI src shift)); ! format %{ "shiftop $dst,$src,$shift\t! shift packed16I" %} ins_encode %{ ! int vector_len = 2; ! XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); ! (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! // Longs vector shift ! instruct vshift2L(vecX dst, vecX src, vecS shift) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (LShiftVL src shift)); ! match(Set dst (URShiftVL src shift)); ! format %{ "shiftop $dst,$src,$shift\t! shift packed2L" %} ins_encode %{ + if (UseAVX == 0) { + XX_Inst shiftinst = get_xx_inst(this->as_Mach()->ideal_Opcode()); + if ($dst$$XMMRegister != $src$$XMMRegister) + __ movdqu($dst$$XMMRegister, $src$$XMMRegister); + (_masm.*shiftinst)($dst$$XMMRegister, $shift$$XMMRegister); + } else { int vector_len = 0; ! XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); ! (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! } %} ins_pipe( pipe_slow ); %} ! instruct vshift4L(vecY dst, vecY src, vecS shift) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 4); ! match(Set dst (LShiftVL src shift)); ! match(Set dst (URShiftVL src shift)); ! format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} ins_encode %{ ! int vector_len = 1; ! XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); ! (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vshift8L(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (LShiftVL src shift)); ! match(Set dst (RShiftVL src shift)); ! match(Set dst (URShiftVL src shift)); ! format %{ "shiftop $dst,$src,$shift\t! shift packed8L" %} ins_encode %{ ! int vector_len = 2; ! XXXI_Inst shiftinst = get_xxxi_inst(this->as_Mach()->ideal_Opcode()); ! (_masm.*shiftinst)($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! // -------------------ArithmeticRightShift ----------------------------------- ! // Long vector arithmetic right shift ! instruct vsra2L_reg(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{ ! predicate(UseSSE >= 2 && n->as_Vector()->length() == 2); ! match(Set dst (RShiftVL src shift)); ! effect(TEMP dst, TEMP tmp, TEMP scratch); ! format %{ "movdqu $dst,$src\n\t" ! "psrlq $dst,$shift\n\t" ! "movdqu $tmp,[0x8000000000000000]\n\t" ! "psrlq $tmp,$shift\n\t" ! "pxor $dst,$tmp\n\t" ! "psubq $dst,$tmp\t! arithmetic right shift packed2L" %} ins_encode %{ ! __ movdqu($dst$$XMMRegister, $src$$XMMRegister); ! __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); ! __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); ! __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); ! __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); ! __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vsra2L_reg_evex(vecX dst, vecX src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 2); ! match(Set dst (RShiftVL src shift)); ! format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} ins_encode %{ int vector_len = 0; ! __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsra4L_reg(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{ ! predicate(UseAVX > 1 && n->as_Vector()->length() == 4); ! match(Set dst (RShiftVL src shift)); ! effect(TEMP dst, TEMP tmp, TEMP scratch); ! format %{ "vpsrlq $dst,$src,$shift\n\t" ! "vmovdqu $tmp,[0x8000000000000000]\n\t" ! "vpsrlq $tmp,$tmp,$shift\n\t" ! "vpxor $dst,$dst,$tmp\n\t" ! "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed4L" %} ins_encode %{ int vector_len = 1; ! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); ! __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); ! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); ! __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! instruct vsra4L_reg_evex(vecY dst, vecY src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 4); ! match(Set dst (RShiftVL src shift)); ! format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed4L" %} ins_encode %{ int vector_len = 1; ! __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} ! // --------------------------------- AND -------------------------------------- ! ! instruct vand4B(vecS dst, vecS src) %{ ! predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); ! match(Set dst (AndV dst src)); ! format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} ins_encode %{ ! __ pand($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); ! match(Set dst (AndV src1 src2)); ! format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} ins_encode %{ ! int vector_len = 0; ! __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vand4B_mem(vecS dst, vecS src, memory mem) %{
*** 9706,9715 **** --- 9467,9775 ---- __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} + // --------------------------------- ABS -------------------------------------- + // a = |a| + instruct vabs4B_reg(vecS dst, vecS src) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 4); + match(Set dst (AbsVB src)); + format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed4B" %} + ins_encode %{ + __ pabsb($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabs8B_reg(vecD dst, vecD src) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 8); + match(Set dst (AbsVB src)); + format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed8B" %} + ins_encode %{ + __ pabsb($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabs16B_reg(vecX dst, vecX src) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 16); + match(Set dst (AbsVB src)); + format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed16B" %} + ins_encode %{ + __ pabsb($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabs32B_reg(vecY dst, vecY src) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 32); + match(Set dst (AbsVB src)); + format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabs64B_reg(vecZ dst, vecZ src) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 64); + match(Set dst (AbsVB src)); + format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed64B" %} + ins_encode %{ + int vector_len = 2; + __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabs2S_reg(vecD dst, vecD src) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 2); + match(Set dst (AbsVS src)); + format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed2S" %} + ins_encode %{ + __ pabsw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabs4S_reg(vecD dst, vecD src) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 4); + match(Set dst (AbsVS src)); + format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed4S" %} + ins_encode %{ + __ pabsw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabs8S_reg(vecX dst, vecX src) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 8); + match(Set dst (AbsVS src)); + format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed8S" %} + ins_encode %{ + __ pabsw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabs16S_reg(vecY dst, vecY src) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); + match(Set dst (AbsVS src)); + format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabs32S_reg(vecZ dst, vecZ src) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + match(Set dst (AbsVS src)); + format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed32S" %} + ins_encode %{ + int vector_len = 2; + __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabs2I_reg(vecD dst, vecD src) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 2); + match(Set dst (AbsVI src)); + format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed2I" %} + ins_encode %{ + __ pabsd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabs4I_reg(vecX dst, vecX src) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 4); + match(Set dst (AbsVI src)); + format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed4I" %} + ins_encode %{ + __ pabsd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabs8I_reg(vecY dst, vecY src) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (AbsVI src)); + format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed8I" %} + ins_encode %{ + int vector_len = 1; + __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabs16I_reg(vecZ dst, vecZ src) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 16); + match(Set dst (AbsVI src)); + format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed16I" %} + ins_encode %{ + int vector_len = 2; + __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabs2L_reg(vecX dst, vecX src) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 2); + match(Set dst (AbsVL src)); + format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed2L" %} + ins_encode %{ + int vector_len = 0; + __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabs4L_reg(vecY dst, vecY src) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 4); + match(Set dst (AbsVL src)); + format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed4L" %} + ins_encode %{ + int vector_len = 1; + __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabs8L_reg(vecZ dst, vecZ src) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8); + match(Set dst (AbsVL src)); + format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed8L" %} + ins_encode %{ + int vector_len = 2; + __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); + %} + + // --------------------------------- ABSNEG -------------------------------------- + + instruct vabsneg2D(vecX dst, vecX src, rRegI scratch) %{ + predicate(UseSSE >= 2 && n->as_Vector()->length() == 2); + match(Set dst (AbsVD src)); + match(Set dst (NegVD src)); + effect(TEMP scratch); + format %{ "and(xor)pd $dst,$src,[mask]\t# absneg packed2D" %} + ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + XAR_Inst opinst = get_xar_inst(opcode); + AddressLiteral adr = get_mask(opcode); + if ($dst$$XMMRegister != $src$$XMMRegister) + __ movdqu($dst$$XMMRegister, $src$$XMMRegister); + (_masm.*opinst)($dst$$XMMRegister, adr, $scratch$$Register); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabsneg4D(vecY dst, vecY src, rRegI scratch) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AbsVD src)); + match(Set dst (NegVD src)); + effect(TEMP scratch); + format %{ "vand(xor)pd $dst,$src,[mask]\t# absneg packed4D" %} + ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + XXAIR_Inst opinst = get_xxair_inst(opcode); + AddressLiteral adr = get_mask(opcode); + int vector_len = 1; + (_masm.*opinst)($dst$$XMMRegister, $src$$XMMRegister, adr, vector_len, $scratch$$Register); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabsneg8D(vecZ dst, vecZ src, rRegI scratch) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8); + match(Set dst (AbsVD src)); + match(Set dst (NegVD src)); + effect(TEMP scratch); + format %{ "vand(xor)pd $dst,$src,[mask]\t# absneg packed8D" %} + ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + XXAIR_Inst opinst = get_xxair_inst(opcode); + AddressLiteral adr = get_mask(opcode); + int vector_len = 2; + (_masm.*opinst)($dst$$XMMRegister, $src$$XMMRegister, adr, vector_len, $scratch$$Register); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabsneg2F(vecD dst, vecD src, rRegI scratch) %{ + predicate(UseSSE > 0 && n->as_Vector()->length() == 2); + match(Set dst (AbsVF src)); + match(Set dst (NegVF src)); + effect(TEMP scratch); + format %{ "and(xor)ps $dst,$src,[mask]\t# absneg packed2F" %} + ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + XAR_Inst opinst = get_xar_inst(opcode); + AddressLiteral adr = get_mask(opcode); + if ($dst$$XMMRegister != $src$$XMMRegister) + __ movdqu($dst$$XMMRegister, $src$$XMMRegister); + (_masm.*opinst)($dst$$XMMRegister, adr, $scratch$$Register); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabsneg4F(vecX dst, rRegI scratch) %{ + predicate(UseSSE > 0 && n->as_Vector()->length() == 4); + match(Set dst (AbsVF dst)); + match(Set dst (NegVF dst)); + effect(TEMP scratch); + format %{ "vand(xor)ps $dst,[mask]\t# absneg packed4F" %} + ins_cost(150); + ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + XAR_Inst opinst = get_xar_inst(opcode); + AddressLiteral adr = get_mask(opcode); + (_masm.*opinst)($dst$$XMMRegister, adr, $scratch$$Register); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabsneg8F(vecY dst, vecY src, rRegI scratch) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (AbsVF src)); + match(Set dst (NegVF src)); + effect(TEMP scratch); + format %{ "vand(xor)ps $dst,$src,[mask]\t# absneg packed8F" %} + ins_cost(150); + ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + XXAIR_Inst opinst = get_xxair_inst(opcode); + AddressLiteral adr = get_mask(opcode); + int vector_len = 1; + (_masm.*opinst)($dst$$XMMRegister, $src$$XMMRegister, adr, vector_len, $scratch$$Register); + %} + ins_pipe( pipe_slow ); + %} + + instruct vabsneg16F(vecZ dst, vecZ src, rRegI scratch) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 16); + match(Set dst (AbsVF src)); + match(Set dst (NegVF src)); + effect(TEMP scratch); + format %{ "vand(xor)ps $dst,$src,[mask]\t# absneg packed16F" %} + ins_cost(150); + ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + XXAIR_Inst opinst = get_xxair_inst(opcode); + AddressLiteral adr = get_mask(opcode); + int vector_len = 2; + (_masm.*opinst)($dst$$XMMRegister, $src$$XMMRegister, adr, vector_len, $scratch$$Register); + %} + ins_pipe( pipe_slow ); + %} + // --------------------------------- FMA -------------------------------------- // a * b + c instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ predicate(UseFMA && n->as_Vector()->length() == 2);
< prev index next >