< prev index next >
src/hotspot/cpu/x86/macroAssembler_x86.cpp
Print this page
rev 61868 : manual merge with default
*** 110,119 ****
--- 110,120 ----
void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
}
+
void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
}
void MacroAssembler::cmpoop_raw(Address src1, jobject obj) {
*** 2500,2509 ****
--- 2501,2511 ----
Assembler::movdqu(dst, src);
}
void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
+ if (dst->encoding() == src->encoding()) return;
Assembler::movdqu(dst, src);
}
void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) {
if (reachable(src)) {
*** 2524,2533 ****
--- 2526,2536 ----
Assembler::vmovdqu(dst, src);
}
void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
+ if (dst->encoding() == src->encoding()) return;
Assembler::vmovdqu(dst, src);
}
void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
if (reachable(src)) {
*** 2537,2546 ****
--- 2540,2607 ----
lea(scratch_reg, src);
vmovdqu(dst, Address(scratch_reg, 0));
}
}
+
+ void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) {
+ if (reachable(src)) {
+ kmovwl(dst, as_Address(src));
+ } else {
+ lea(scratch_reg, src);
+ kmovwl(dst, Address(scratch_reg, 0));
+ }
+ }
+
+ void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
+ int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ if (mask == k0) {
+ Assembler::evmovdqub(dst, as_Address(src), merge, vector_len);
+ } else {
+ Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len);
+ }
+ } else {
+ lea(scratch_reg, src);
+ if (mask == k0) {
+ Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len);
+ } else {
+ Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len);
+ }
+ }
+ }
+
+ void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
+ int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len);
+ }
+ }
+
+ void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
+ int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len);
+ }
+ }
+
+ void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
+ int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len);
+ }
+ }
+
void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
if (reachable(src)) {
Assembler::evmovdquq(dst, as_Address(src), vector_len);
} else {
lea(rscratch, src);
*** 3023,3032 ****
--- 3084,3185 ----
void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
Assembler::vpcmpeqw(dst, nds, src, vector_len);
}
+ void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds,
+ AddressLiteral src, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len);
+ }
+ }
+
+ void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
+ }
+ }
+
+ void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
+ }
+ }
+
+ void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
+ }
+ }
+
+ void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
+ }
+ }
+
+ void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) {
+ if (width == Assembler::Q) {
+ Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len);
+ } else {
+ Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len);
+ }
+ }
+
+ void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) {
+ int eq_cond_enc = 0x29;
+ int gt_cond_enc = 0x37;
+ if (width != Assembler::Q) {
+ eq_cond_enc = 0x74 + width;
+ gt_cond_enc = 0x64 + width;
+ }
+ switch (cond) {
+ case eq:
+ vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
+ break;
+ case neq:
+ vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
+ vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
+ break;
+ case le:
+ vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
+ vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
+ break;
+ case nlt:
+ vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
+ vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
+ break;
+ case lt:
+ vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
+ break;
+ case nle:
+ vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
+ break;
+ default:
+ assert(false, "Should not reach here");
+ }
+ }
+
void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
Assembler::vpmovzxbw(dst, src, vector_len);
}
*** 3147,3156 ****
--- 3300,3319 ----
lea(scratch_reg, src);
vandps(dst, nds, Address(scratch_reg, 0), vector_len);
}
}
+ void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ bool merge, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len);
+ }
+ }
+
void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
if (reachable(src)) {
vdivsd(dst, nds, as_Address(src));
} else {
lea(rscratch1, src);
*** 3243,3253 ****
else {
MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg);
}
}
! //-------------------------------------------------------------------------------------------
void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
// The inverted mask is sign-extended
--- 3406,3423 ----
else {
MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg);
}
}
! void MacroAssembler::vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
! if (reachable(src)) {
! Assembler::vpermd(dst, nds, as_Address(src), vector_len);
! } else {
! lea(scratch_reg, src);
! Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len);
! }
! }
void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
// The inverted mask is sign-extended
*** 5769,5779 ****
andq(tmp1, 0x3F); // tail count
andq(length, ~(0x3F)); //vector count
bind(VECTOR64_LOOP);
// AVX512 code to compare 64 byte vectors.
! evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit);
evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
kortestql(k7, k7);
jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch
addq(result, 64);
subq(length, 64);
--- 5939,5949 ----
andq(tmp1, 0x3F); // tail count
andq(length, ~(0x3F)); //vector count
bind(VECTOR64_LOOP);
// AVX512 code to compare 64 byte vectors.
! evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit);
evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
kortestql(k7, k7);
jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch
addq(result, 64);
subq(length, 64);
*** 5788,5798 ****
mov64(tmp2, 0xFFFFFFFFFFFFFFFF);
shlxq(tmp2, tmp2, tmp1);
notq(tmp2);
kmovql(k3, tmp2);
! evmovdqub(rymm0, k3, Address(obja, result), Assembler::AVX_512bit);
evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit);
ktestql(k7, k3);
jcc(Assembler::below, SAME_TILL_END); // not mismatch
--- 5958,5968 ----
mov64(tmp2, 0xFFFFFFFFFFFFFFFF);
shlxq(tmp2, tmp2, tmp1);
notq(tmp2);
kmovql(k3, tmp2);
! evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit);
evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit);
ktestql(k7, k3);
jcc(Assembler::below, SAME_TILL_END); // not mismatch
*** 7583,7593 ****
movl(result, 0xFFFFFFFF);
shlxl(result, result, tmp5);
notl(result);
kmovdl(k3, result);
! evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(k2, k3);
jcc(Assembler::carryClear, return_zero);
evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
--- 7753,7763 ----
movl(result, 0xFFFFFFFF);
shlxl(result, result, tmp5);
notl(result);
kmovdl(k3, result);
! evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(k2, k3);
jcc(Assembler::carryClear, return_zero);
evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
*** 7608,7618 ****
lea(src, Address(src, len, Address::times_2));
lea(dst, Address(dst, len, Address::times_1));
negptr(len);
bind(copy_32_loop);
! evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
kortestdl(k2, k2);
jcc(Assembler::carryClear, return_zero);
// All elements in current processed chunk are valid candidates for
--- 7778,7788 ----
lea(src, Address(src, len, Address::times_2));
lea(dst, Address(dst, len, Address::times_1));
negptr(len);
bind(copy_32_loop);
! evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
kortestdl(k2, k2);
jcc(Assembler::carryClear, return_zero);
// All elements in current processed chunk are valid candidates for
*** 7633,7643 ****
shlxl(result, result, len);
notl(result);
kmovdl(k3, result);
! evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(k2, k3);
jcc(Assembler::carryClear, return_zero);
evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
--- 7803,7813 ----
shlxl(result, result, len);
notl(result);
kmovdl(k3, result);
! evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(k2, k3);
jcc(Assembler::carryClear, return_zero);
evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
*** 7778,7788 ****
// inflate 32 chars per iter
bind(copy_32_loop);
vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
! evmovdquw(Address(dst, len, Address::times_2), tmp1, Assembler::AVX_512bit);
addptr(len, 32);
jcc(Assembler::notZero, copy_32_loop);
bind(copy_tail);
// bail out when there is nothing to be done
--- 7948,7958 ----
// inflate 32 chars per iter
bind(copy_32_loop);
vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
! evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit);
addptr(len, 32);
jcc(Assembler::notZero, copy_32_loop);
bind(copy_tail);
// bail out when there is nothing to be done
*** 7793,7803 ****
movl(tmp3_aliased, -1);
shlxl(tmp3_aliased, tmp3_aliased, tmp2);
notl(tmp3_aliased);
kmovdl(k2, tmp3_aliased);
evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit);
! evmovdquw(Address(dst, 0), k2, tmp1, Assembler::AVX_512bit);
jmp(done);
bind(avx3_threshold);
}
if (UseSSE42Intrinsics) {
--- 7963,7973 ----
movl(tmp3_aliased, -1);
shlxl(tmp3_aliased, tmp3_aliased, tmp2);
notl(tmp3_aliased);
kmovdl(k2, tmp3_aliased);
evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit);
! evmovdquw(Address(dst, 0), k2, tmp1, /*merge*/ true, Assembler::AVX_512bit);
jmp(done);
bind(avx3_threshold);
}
if (UseSSE42Intrinsics) {
< prev index next >