--- old/src/hotspot/cpu/x86/macroAssembler_x86.cpp 2020-04-02 18:03:45.492854099 -0700 +++ new/src/hotspot/cpu/x86/macroAssembler_x86.cpp 2020-04-02 18:03:45.316854099 -0700 @@ -112,6 +112,7 @@ cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); } + void MacroAssembler::cmpklass(Register src1, Metadata* obj) { cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); } @@ -2500,6 +2501,7 @@ void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) { assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); + if (dst->encoding() == src->encoding()) return; Assembler::movdqu(dst, src); } @@ -2524,6 +2526,7 @@ void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) { assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); + if (dst->encoding() == src->encoding()) return; Assembler::vmovdqu(dst, src); } @@ -2537,6 +2540,64 @@ } } + +void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) { + if (reachable(src)) { + kmovwl(dst, as_Address(src)); + } else { + lea(scratch_reg, src); + kmovwl(dst, Address(scratch_reg, 0)); + } +} + +void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, + int vector_len, Register scratch_reg) { + if (reachable(src)) { + if (mask == k0) { + Assembler::evmovdqub(dst, as_Address(src), merge, vector_len); + } else { + Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len); + } + } else { + lea(scratch_reg, src); + if (mask == k0) { + Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len); + } else { + Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len); + } + } +} + +void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, + int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len); + } +} + +void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, + int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len); + } +} + +void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, + int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len); + } +} + void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) { if (reachable(src)) { Assembler::evmovdquq(dst, as_Address(src), vector_len); @@ -3023,6 +3084,98 @@ Assembler::vpcmpeqw(dst, nds, src, vector_len); } +void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, + AddressLiteral src, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len); + } +} + +void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len); + } +} + +void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len); + } +} + +void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len); + } +} + +void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len); + } +} + +void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) { + if (width == Assembler::Q) { + Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len); + } else { + Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len); + } +} + +void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) { + int eq_cond_enc = 0x29; + int gt_cond_enc = 0x37; + if (width != Assembler::Q) { + eq_cond_enc = 0x74 + width; + gt_cond_enc = 0x64 + width; + } + switch (cond) { + case eq: + vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len); + break; + case neq: + vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len); + vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); + break; + case le: + vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len); + vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); + break; + case nlt: + vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len); + vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); + break; + case lt: + vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len); + break; + case nle: + vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len); + break; + default: + assert(false, "Should not reach here"); + } +} + void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) { assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15"); Assembler::vpmovzxbw(dst, src, vector_len); @@ -3147,6 +3300,16 @@ } } +void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, + bool merge, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len); + } +} + void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { if (reachable(src)) { vdivsd(dst, nds, as_Address(src)); @@ -3243,7 +3406,14 @@ } } -//------------------------------------------------------------------------------------------- +void MacroAssembler::vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::vpermd(dst, nds, as_Address(src), vector_len); + } else { + lea(scratch_reg, src); + Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len); + } +} void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); @@ -5773,7 +5943,7 @@ bind(VECTOR64_LOOP); // AVX512 code to compare 64 byte vectors. - evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit); + evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit); evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit); kortestql(k7, k7); jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch @@ -5792,7 +5962,7 @@ notq(tmp2); kmovql(k3, tmp2); - evmovdqub(rymm0, k3, Address(obja, result), Assembler::AVX_512bit); + evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit); evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit); ktestql(k7, k3); @@ -7231,7 +7401,7 @@ notl(result); kmovdl(k3, result); - evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit); + evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit); evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); ktestd(k2, k3); jcc(Assembler::carryClear, return_zero); @@ -7256,7 +7426,7 @@ negptr(len); bind(copy_32_loop); - evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit); + evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit); evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); kortestdl(k2, k2); jcc(Assembler::carryClear, return_zero); @@ -7281,7 +7451,7 @@ kmovdl(k3, result); - evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit); + evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit); evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); ktestd(k2, k3); jcc(Assembler::carryClear, return_zero); @@ -7426,7 +7596,7 @@ // inflate 32 chars per iter bind(copy_32_loop); vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit); - evmovdquw(Address(dst, len, Address::times_2), tmp1, Assembler::AVX_512bit); + evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit); addptr(len, 32); jcc(Assembler::notZero, copy_32_loop); @@ -7441,7 +7611,7 @@ notl(tmp3_aliased); kmovdl(k2, tmp3_aliased); evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit); - evmovdquw(Address(dst, 0), k2, tmp1, Assembler::AVX_512bit); + evmovdquw(Address(dst, 0), k2, tmp1, /*merge*/ true, Assembler::AVX_512bit); jmp(done); bind(avx3_threshold);