< prev index next >

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Print this page
rev 60516 : manual merge with default

*** 110,119 **** --- 110,120 ---- void MacroAssembler::cmpklass(Address src1, Metadata* obj) { cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); } + void MacroAssembler::cmpklass(Register src1, Metadata* obj) { cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); } void MacroAssembler::cmpoop_raw(Address src1, jobject obj) {
*** 2498,2507 **** --- 2499,2509 ---- Assembler::movdqu(dst, src); } void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) { assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); + if (dst->encoding() == src->encoding()) return; Assembler::movdqu(dst, src); } void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) { if (reachable(src)) {
*** 2522,2531 **** --- 2524,2534 ---- Assembler::vmovdqu(dst, src); } void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) { assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); + if (dst->encoding() == src->encoding()) return; Assembler::vmovdqu(dst, src); } void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) { if (reachable(src)) {
*** 2535,2544 **** --- 2538,2605 ---- lea(scratch_reg, src); vmovdqu(dst, Address(scratch_reg, 0)); } } + + void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) { + if (reachable(src)) { + kmovwl(dst, as_Address(src)); + } else { + lea(scratch_reg, src); + kmovwl(dst, Address(scratch_reg, 0)); + } + } + + void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, + int vector_len, Register scratch_reg) { + if (reachable(src)) { + if (mask == k0) { + Assembler::evmovdqub(dst, as_Address(src), merge, vector_len); + } else { + Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len); + } + } else { + lea(scratch_reg, src); + if (mask == k0) { + Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len); + } else { + Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len); + } + } + } + + void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, + int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len); + } + } + + void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, + int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len); + } + } + + void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, + int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len); + } + } + void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) { if (reachable(src)) { Assembler::evmovdquq(dst, as_Address(src), vector_len); } else { lea(rscratch, src);
*** 3021,3030 **** --- 3082,3183 ---- void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15"); Assembler::vpcmpeqw(dst, nds, src, vector_len); } + void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, + AddressLiteral src, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len); + } + } + + void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len); + } + } + + void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len); + } + } + + void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len); + } + } + + void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len); + } + } + + void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) { + if (width == Assembler::Q) { + Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len); + } else { + Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len); + } + } + + void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) { + int eq_cond_enc = 0x29; + int gt_cond_enc = 0x37; + if (width != Assembler::Q) { + eq_cond_enc = 0x74 + width; + gt_cond_enc = 0x64 + width; + } + switch (cond) { + case eq: + vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len); + break; + case neq: + vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len); + vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); + break; + case le: + vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len); + vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); + break; + case nlt: + vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len); + vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); + break; + case lt: + vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len); + break; + case nle: + vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len); + break; + default: + assert(false, "Should not reach here"); + } + } + void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) { assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15"); Assembler::vpmovzxbw(dst, src, vector_len); }
*** 3145,3154 **** --- 3298,3317 ---- lea(scratch_reg, src); vandps(dst, nds, Address(scratch_reg, 0), vector_len); } } + void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, + bool merge, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len); + } + } + void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { if (reachable(src)) { vdivsd(dst, nds, as_Address(src)); } else { lea(rscratch1, src);
*** 3241,3251 **** else { MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg); } } ! //------------------------------------------------------------------------------------------- void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask); STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code // The inverted mask is sign-extended --- 3404,3421 ---- else { MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg); } } ! void MacroAssembler::vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { ! if (reachable(src)) { ! Assembler::vpermd(dst, nds, as_Address(src), vector_len); ! } else { ! lea(scratch_reg, src); ! Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len); ! } ! } void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask); STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code // The inverted mask is sign-extended
*** 5771,5781 **** andq(tmp1, 0x3F); // tail count andq(length, ~(0x3F)); //vector count bind(VECTOR64_LOOP); // AVX512 code to compare 64 byte vectors. ! evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit); evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit); kortestql(k7, k7); jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch addq(result, 64); subq(length, 64); --- 5941,5951 ---- andq(tmp1, 0x3F); // tail count andq(length, ~(0x3F)); //vector count bind(VECTOR64_LOOP); // AVX512 code to compare 64 byte vectors. ! evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit); evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit); kortestql(k7, k7); jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch addq(result, 64); subq(length, 64);
*** 5790,5800 **** mov64(tmp2, 0xFFFFFFFFFFFFFFFF); shlxq(tmp2, tmp2, tmp1); notq(tmp2); kmovql(k3, tmp2); ! evmovdqub(rymm0, k3, Address(obja, result), Assembler::AVX_512bit); evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit); ktestql(k7, k3); jcc(Assembler::below, SAME_TILL_END); // not mismatch --- 5960,5970 ---- mov64(tmp2, 0xFFFFFFFFFFFFFFFF); shlxq(tmp2, tmp2, tmp1); notq(tmp2); kmovql(k3, tmp2); ! evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit); evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit); ktestql(k7, k3); jcc(Assembler::below, SAME_TILL_END); // not mismatch
*** 7229,7239 **** movl(result, 0xFFFFFFFF); shlxl(result, result, tmp5); notl(result); kmovdl(k3, result); ! evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit); evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); ktestd(k2, k3); jcc(Assembler::carryClear, return_zero); evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit); --- 7399,7409 ---- movl(result, 0xFFFFFFFF); shlxl(result, result, tmp5); notl(result); kmovdl(k3, result); ! evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit); evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); ktestd(k2, k3); jcc(Assembler::carryClear, return_zero); evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
*** 7254,7264 **** lea(src, Address(src, len, Address::times_2)); lea(dst, Address(dst, len, Address::times_1)); negptr(len); bind(copy_32_loop); ! evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit); evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); kortestdl(k2, k2); jcc(Assembler::carryClear, return_zero); // All elements in current processed chunk are valid candidates for --- 7424,7434 ---- lea(src, Address(src, len, Address::times_2)); lea(dst, Address(dst, len, Address::times_1)); negptr(len); bind(copy_32_loop); ! evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit); evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); kortestdl(k2, k2); jcc(Assembler::carryClear, return_zero); // All elements in current processed chunk are valid candidates for
*** 7279,7289 **** shlxl(result, result, len); notl(result); kmovdl(k3, result); ! evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit); evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); ktestd(k2, k3); jcc(Assembler::carryClear, return_zero); evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit); --- 7449,7459 ---- shlxl(result, result, len); notl(result); kmovdl(k3, result); ! evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit); evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); ktestd(k2, k3); jcc(Assembler::carryClear, return_zero); evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
*** 7424,7434 **** // inflate 32 chars per iter bind(copy_32_loop); vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit); ! evmovdquw(Address(dst, len, Address::times_2), tmp1, Assembler::AVX_512bit); addptr(len, 32); jcc(Assembler::notZero, copy_32_loop); bind(copy_tail); // bail out when there is nothing to be done --- 7594,7604 ---- // inflate 32 chars per iter bind(copy_32_loop); vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit); ! evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit); addptr(len, 32); jcc(Assembler::notZero, copy_32_loop); bind(copy_tail); // bail out when there is nothing to be done
*** 7439,7449 **** movl(tmp3_aliased, -1); shlxl(tmp3_aliased, tmp3_aliased, tmp2); notl(tmp3_aliased); kmovdl(k2, tmp3_aliased); evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit); ! evmovdquw(Address(dst, 0), k2, tmp1, Assembler::AVX_512bit); jmp(done); bind(avx3_threshold); } if (UseSSE42Intrinsics) { --- 7609,7619 ---- movl(tmp3_aliased, -1); shlxl(tmp3_aliased, tmp3_aliased, tmp2); notl(tmp3_aliased); kmovdl(k2, tmp3_aliased); evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit); ! evmovdquw(Address(dst, 0), k2, tmp1, /*merge*/ true, Assembler::AVX_512bit); jmp(done); bind(avx3_threshold); } if (UseSSE42Intrinsics) {
< prev index next >