< prev index next >

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Print this page
rev 61868 : manual merge with default

*** 110,119 **** --- 110,120 ---- void MacroAssembler::cmpklass(Address src1, Metadata* obj) { cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); } + void MacroAssembler::cmpklass(Register src1, Metadata* obj) { cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); } void MacroAssembler::cmpoop_raw(Address src1, jobject obj) {
*** 2500,2509 **** --- 2501,2511 ---- Assembler::movdqu(dst, src); } void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) { assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); + if (dst->encoding() == src->encoding()) return; Assembler::movdqu(dst, src); } void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) { if (reachable(src)) {
*** 2524,2533 **** --- 2526,2536 ---- Assembler::vmovdqu(dst, src); } void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) { assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); + if (dst->encoding() == src->encoding()) return; Assembler::vmovdqu(dst, src); } void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) { if (reachable(src)) {
*** 2537,2546 **** --- 2540,2607 ---- lea(scratch_reg, src); vmovdqu(dst, Address(scratch_reg, 0)); } } + + void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) { + if (reachable(src)) { + kmovwl(dst, as_Address(src)); + } else { + lea(scratch_reg, src); + kmovwl(dst, Address(scratch_reg, 0)); + } + } + + void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, + int vector_len, Register scratch_reg) { + if (reachable(src)) { + if (mask == k0) { + Assembler::evmovdqub(dst, as_Address(src), merge, vector_len); + } else { + Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len); + } + } else { + lea(scratch_reg, src); + if (mask == k0) { + Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len); + } else { + Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len); + } + } + } + + void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, + int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len); + } + } + + void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, + int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len); + } + } + + void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, + int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len); + } + } + void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) { if (reachable(src)) { Assembler::evmovdquq(dst, as_Address(src), vector_len); } else { lea(rscratch, src);
*** 3023,3032 **** --- 3084,3185 ---- void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15"); Assembler::vpcmpeqw(dst, nds, src, vector_len); } + void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, + AddressLiteral src, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len); + } + } + + void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len); + } + } + + void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len); + } + } + + void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len); + } + } + + void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len); + } + } + + void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) { + if (width == Assembler::Q) { + Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len); + } else { + Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len); + } + } + + void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) { + int eq_cond_enc = 0x29; + int gt_cond_enc = 0x37; + if (width != Assembler::Q) { + eq_cond_enc = 0x74 + width; + gt_cond_enc = 0x64 + width; + } + switch (cond) { + case eq: + vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len); + break; + case neq: + vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len); + vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); + break; + case le: + vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len); + vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); + break; + case nlt: + vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len); + vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); + break; + case lt: + vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len); + break; + case nle: + vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len); + break; + default: + assert(false, "Should not reach here"); + } + } + void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) { assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15"); Assembler::vpmovzxbw(dst, src, vector_len); }
*** 3147,3156 **** --- 3300,3319 ---- lea(scratch_reg, src); vandps(dst, nds, Address(scratch_reg, 0), vector_len); } } + void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, + bool merge, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len); + } + } + void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { if (reachable(src)) { vdivsd(dst, nds, as_Address(src)); } else { lea(rscratch1, src);
*** 3243,3253 **** else { MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg); } } ! //------------------------------------------------------------------------------------------- void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask); STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code // The inverted mask is sign-extended --- 3406,3423 ---- else { MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg); } } ! void MacroAssembler::vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { ! if (reachable(src)) { ! Assembler::vpermd(dst, nds, as_Address(src), vector_len); ! } else { ! lea(scratch_reg, src); ! Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len); ! } ! } void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask); STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code // The inverted mask is sign-extended
*** 5769,5779 **** andq(tmp1, 0x3F); // tail count andq(length, ~(0x3F)); //vector count bind(VECTOR64_LOOP); // AVX512 code to compare 64 byte vectors. ! evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit); evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit); kortestql(k7, k7); jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch addq(result, 64); subq(length, 64); --- 5939,5949 ---- andq(tmp1, 0x3F); // tail count andq(length, ~(0x3F)); //vector count bind(VECTOR64_LOOP); // AVX512 code to compare 64 byte vectors. ! evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit); evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit); kortestql(k7, k7); jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch addq(result, 64); subq(length, 64);
*** 5788,5798 **** mov64(tmp2, 0xFFFFFFFFFFFFFFFF); shlxq(tmp2, tmp2, tmp1); notq(tmp2); kmovql(k3, tmp2); ! evmovdqub(rymm0, k3, Address(obja, result), Assembler::AVX_512bit); evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit); ktestql(k7, k3); jcc(Assembler::below, SAME_TILL_END); // not mismatch --- 5958,5968 ---- mov64(tmp2, 0xFFFFFFFFFFFFFFFF); shlxq(tmp2, tmp2, tmp1); notq(tmp2); kmovql(k3, tmp2); ! evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit); evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit); ktestql(k7, k3); jcc(Assembler::below, SAME_TILL_END); // not mismatch
*** 7583,7593 **** movl(result, 0xFFFFFFFF); shlxl(result, result, tmp5); notl(result); kmovdl(k3, result); ! evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit); evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); ktestd(k2, k3); jcc(Assembler::carryClear, return_zero); evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit); --- 7753,7763 ---- movl(result, 0xFFFFFFFF); shlxl(result, result, tmp5); notl(result); kmovdl(k3, result); ! evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit); evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); ktestd(k2, k3); jcc(Assembler::carryClear, return_zero); evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
*** 7608,7618 **** lea(src, Address(src, len, Address::times_2)); lea(dst, Address(dst, len, Address::times_1)); negptr(len); bind(copy_32_loop); ! evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit); evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); kortestdl(k2, k2); jcc(Assembler::carryClear, return_zero); // All elements in current processed chunk are valid candidates for --- 7778,7788 ---- lea(src, Address(src, len, Address::times_2)); lea(dst, Address(dst, len, Address::times_1)); negptr(len); bind(copy_32_loop); ! evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit); evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); kortestdl(k2, k2); jcc(Assembler::carryClear, return_zero); // All elements in current processed chunk are valid candidates for
*** 7633,7643 **** shlxl(result, result, len); notl(result); kmovdl(k3, result); ! evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit); evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); ktestd(k2, k3); jcc(Assembler::carryClear, return_zero); evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit); --- 7803,7813 ---- shlxl(result, result, len); notl(result); kmovdl(k3, result); ! evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit); evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); ktestd(k2, k3); jcc(Assembler::carryClear, return_zero); evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
*** 7778,7788 **** // inflate 32 chars per iter bind(copy_32_loop); vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit); ! evmovdquw(Address(dst, len, Address::times_2), tmp1, Assembler::AVX_512bit); addptr(len, 32); jcc(Assembler::notZero, copy_32_loop); bind(copy_tail); // bail out when there is nothing to be done --- 7948,7958 ---- // inflate 32 chars per iter bind(copy_32_loop); vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit); ! evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit); addptr(len, 32); jcc(Assembler::notZero, copy_32_loop); bind(copy_tail); // bail out when there is nothing to be done
*** 7793,7803 **** movl(tmp3_aliased, -1); shlxl(tmp3_aliased, tmp3_aliased, tmp2); notl(tmp3_aliased); kmovdl(k2, tmp3_aliased); evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit); ! evmovdquw(Address(dst, 0), k2, tmp1, Assembler::AVX_512bit); jmp(done); bind(avx3_threshold); } if (UseSSE42Intrinsics) { --- 7963,7973 ---- movl(tmp3_aliased, -1); shlxl(tmp3_aliased, tmp3_aliased, tmp2); notl(tmp3_aliased); kmovdl(k2, tmp3_aliased); evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit); ! evmovdquw(Address(dst, 0), k2, tmp1, /*merge*/ true, Assembler::AVX_512bit); jmp(done); bind(avx3_threshold); } if (UseSSE42Intrinsics) {
< prev index next >