--- old/src/hotspot/cpu/x86/macroAssembler_x86.cpp 2019-05-03 16:36:32.331366968 -0700 +++ new/src/hotspot/cpu/x86/macroAssembler_x86.cpp 2019-05-03 16:36:32.183366972 -0700 @@ -1003,25 +1003,25 @@ } } -void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { +void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) { // Used in sign-masking with aligned address. assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); if (reachable(src)) { Assembler::andpd(dst, as_Address(src)); } else { - lea(rscratch1, src); - Assembler::andpd(dst, Address(rscratch1, 0)); + lea(scratch_reg, src); + Assembler::andpd(dst, Address(scratch_reg, 0)); } } -void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { +void MacroAssembler::andps(XMMRegister dst, AddressLiteral src, Register scratch_reg) { // Used in sign-masking with aligned address. assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); if (reachable(src)) { Assembler::andps(dst, as_Address(src)); } else { - lea(rscratch1, src); - Assembler::andps(dst, Address(rscratch1, 0)); + lea(scratch_reg, src); + Assembler::andps(dst, Address(scratch_reg, 0)); } } @@ -3340,13 +3340,13 @@ Assembler::vmovdqu(dst, src); } -void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src) { +void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) { if (reachable(src)) { vmovdqu(dst, as_Address(src)); } else { - lea(rscratch1, src); - vmovdqu(dst, Address(rscratch1, 0)); + lea(scratch_reg, src); + vmovdqu(dst, Address(scratch_reg, 0)); } } @@ -3698,14 +3698,14 @@ } } -void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { +void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) { // Used in sign-bit flipping with aligned address. assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); if (reachable(src)) { Assembler::xorpd(dst, as_Address(src)); } else { - lea(rscratch1, src); - Assembler::xorpd(dst, Address(rscratch1, 0)); + lea(scratch_reg, src); + Assembler::xorpd(dst, Address(scratch_reg, 0)); } } @@ -3726,14 +3726,14 @@ } } -void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { +void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg) { // Used in sign-bit flipping with aligned address. assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); if (reachable(src)) { Assembler::xorps(dst, as_Address(src)); } else { - lea(rscratch1, src); - Assembler::xorps(dst, Address(rscratch1, 0)); + lea(scratch_reg, src); + Assembler::xorps(dst, Address(scratch_reg, 0)); } } @@ -3799,12 +3799,12 @@ Assembler::vpaddw(dst, nds, src, vector_len); } -void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { +void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { if (reachable(src)) { Assembler::vpand(dst, nds, as_Address(src), vector_len); } else { - lea(rscratch1, src); - Assembler::vpand(dst, nds, Address(rscratch1, 0), vector_len); + lea(scratch_reg, src); + Assembler::vpand(dst, nds, Address(scratch_reg, 0), vector_len); } } @@ -3873,6 +3873,22 @@ Assembler::vpsraw(dst, nds, shift, vector_len); } +void MacroAssembler::evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { + assert(UseAVX > 2,""); + if (!VM_Version::supports_avx512vl() && vector_len < 2) { + vector_len = 2; + } + Assembler::evpsraq(dst, nds, shift, vector_len); +} + +void MacroAssembler::evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { + assert(UseAVX > 2,""); + if (!VM_Version::supports_avx512vl() && vector_len < 2) { + vector_len = 2; + } + Assembler::evpsraq(dst, nds, shift, vector_len); +} + void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15"); Assembler::vpsrlw(dst, nds, shift, vector_len); @@ -3913,21 +3929,21 @@ Assembler::pshuflw(dst, src, mode); } -void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { +void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { if (reachable(src)) { vandpd(dst, nds, as_Address(src), vector_len); } else { - lea(rscratch1, src); - vandpd(dst, nds, Address(rscratch1, 0), vector_len); + lea(scratch_reg, src); + vandpd(dst, nds, Address(scratch_reg, 0), vector_len); } } -void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { +void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { if (reachable(src)) { vandps(dst, nds, as_Address(src), vector_len); } else { - lea(rscratch1, src); - vandps(dst, nds, Address(rscratch1, 0), vector_len); + lea(scratch_reg, src); + vandps(dst, nds, Address(scratch_reg, 0), vector_len); } } @@ -3995,24 +4011,162 @@ vxorpd(dst, nds, src, Assembler::AVX_128bit); } -void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { +void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { if (reachable(src)) { vxorpd(dst, nds, as_Address(src), vector_len); } else { - lea(rscratch1, src); - vxorpd(dst, nds, Address(rscratch1, 0), vector_len); + lea(scratch_reg, src); + vxorpd(dst, nds, Address(scratch_reg, 0), vector_len); } } -void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { +void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { if (reachable(src)) { vxorps(dst, nds, as_Address(src), vector_len); } else { - lea(rscratch1, src); - vxorps(dst, nds, Address(rscratch1, 0), vector_len); + lea(scratch_reg, src); + vxorps(dst, nds, Address(scratch_reg, 0), vector_len); + } +} + +void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { + if (UseAVX > 1 || (vector_len < 1)) { + if (reachable(src)) { + Assembler::vpxor(dst, nds, as_Address(src), vector_len); + } else { + lea(scratch_reg, src); + Assembler::vpxor(dst, nds, Address(scratch_reg, 0), vector_len); + } + } + else { + MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg); + } +} + +//------------------------------------------------------------------------------------------- +#ifdef COMPILER2 +// Generic instructions support for use in .ad files C2 code generation + +void MacroAssembler::vabsnegd(int opcode, XMMRegister dst, Register scr) { + if (opcode == Op_AbsVD) { + andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr); + } else { + assert((opcode == Op_NegVD),"opcode should be Op_NegD"); + xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr); + } +} + +void MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) { + if (opcode == Op_AbsVD) { + vandpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), vector_len, scr); + } else { + assert((opcode == Op_NegVD),"opcode should be Op_NegD"); + vxorpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), vector_len, scr); + } +} + +void MacroAssembler::vabsnegf(int opcode, XMMRegister dst, Register scr) { + if (opcode == Op_AbsVF) { + andps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), scr); + } else { + assert((opcode == Op_NegVF),"opcode should be Op_NegF"); + xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scr); } } +void MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) { + if (opcode == Op_AbsVF) { + vandps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), vector_len, scr); + } else { + assert((opcode == Op_NegVF),"opcode should be Op_NegF"); + vxorps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), vector_len, scr); + } +} + +void MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src) { + if (sign) { + pmovsxbw(dst, src); + } else { + pmovzxbw(dst, src); + } +} + +void MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len) { + if (sign) { + vpmovsxbw(dst, src, vector_len); + } else { + vpmovzxbw(dst, src, vector_len); + } +} + +void MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister src) { + if (opcode == Op_RShiftVI) { + psrad(dst, src); + } else if (opcode == Op_LShiftVI) { + pslld(dst, src); + } else { + assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI"); + psrld(dst, src); + } +} + +void MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + if (opcode == Op_RShiftVI) { + vpsrad(dst, nds, src, vector_len); + } else if (opcode == Op_LShiftVI) { + vpslld(dst, nds, src, vector_len); + } else { + assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI"); + vpsrld(dst, nds, src, vector_len); + } +} + +void MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister src) { + if ((opcode == Op_RShiftVS) || (opcode == Op_RShiftVB)) { + psraw(dst, src); + } else if ((opcode == Op_LShiftVS) || (opcode == Op_LShiftVB)) { + psllw(dst, src); + } else { + assert(((opcode == Op_URShiftVS) || (opcode == Op_URShiftVB)),"opcode should be one of Op_URShiftVS or Op_URShiftVB"); + psrlw(dst, src); + } +} + +void MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + if ((opcode == Op_RShiftVS) || (opcode == Op_RShiftVB)) { + vpsraw(dst, nds, src, vector_len); + } else if ((opcode == Op_LShiftVS) || (opcode == Op_LShiftVB)) { + vpsllw(dst, nds, src, vector_len); + } else { + assert(((opcode == Op_URShiftVS) || (opcode == Op_URShiftVB)),"opcode should be one of Op_URShiftVS or Op_URShiftVB"); + vpsrlw(dst, nds, src, vector_len); + } +} + +void MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister src) { + if (opcode == Op_RShiftVL) { + psrlq(dst, src); // using srl to implement sra on pre-avs512 systems + } else if (opcode == Op_LShiftVL) { + psllq(dst, src); + } else { + assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL"); + psrlq(dst, src); + } +} + +void MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + if (opcode == Op_RShiftVL) { + evpsraq(dst, nds, src, vector_len); + } else if (opcode == Op_LShiftVL) { + vpsllq(dst, nds, src, vector_len); + } else { + assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL"); + vpsrlq(dst, nds, src, vector_len); + } +} +#endif +//------------------------------------------------------------------------------------------- + void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code