< prev index next >
src/cpu/x86/vm/macroAssembler_x86.cpp
Print this page
*** 3947,3956 ****
--- 3947,4186 ----
void MacroAssembler::testl(Register dst, AddressLiteral src) {
assert(reachable(src), "Address should be reachable");
testl(dst, as_Address(src));
}
+ void MacroAssembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::pcmpeqb(dst, src);
+ } else if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::pcmpeqb(dst, src);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pcmpeqb(xmm0, src);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::pcmpeqb(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::pcmpeqb(xmm1, xmm0);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+ }
+
+ void MacroAssembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::pcmpeqw(dst, src);
+ } else if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::pcmpeqw(dst, src);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pcmpeqw(xmm0, src);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::pcmpeqw(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::pcmpeqw(xmm1, xmm0);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+ }
+
+ void MacroAssembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
+ int dst_enc = dst->encoding();
+ if (dst_enc < 16) {
+ Assembler::pcmpestri(dst, src, imm8);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pcmpestri(xmm0, src, imm8);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+ }
+
+ void MacroAssembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::pcmpestri(dst, src, imm8);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pcmpestri(xmm0, src, imm8);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::pcmpestri(dst, xmm0, imm8);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::pcmpestri(xmm1, xmm0, imm8);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+ }
+
+ void MacroAssembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::pmovzxbw(dst, src);
+ } else if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::pmovzxbw(dst, src);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pmovzxbw(xmm0, src);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::pmovzxbw(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::pmovzxbw(xmm1, xmm0);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+ }
+
+ void MacroAssembler::pmovzxbw(XMMRegister dst, Address src) {
+ int dst_enc = dst->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::pmovzxbw(dst, src);
+ } else if (dst_enc < 16) {
+ Assembler::pmovzxbw(dst, src);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pmovzxbw(xmm0, src);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+ }
+
+ void MacroAssembler::pmovmskb(Register dst, XMMRegister src) {
+ int src_enc = src->encoding();
+ if (src_enc < 16) {
+ Assembler::pmovmskb(dst, src);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::pmovmskb(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+ }
+
+ void MacroAssembler::ptest(XMMRegister dst, XMMRegister src) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::ptest(dst, src);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::ptest(xmm0, src);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::ptest(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::ptest(xmm1, xmm0);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+ }
+
void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::sqrtsd(dst, as_Address(src));
} else {
lea(rscratch1, src);
*** 4254,4437 ****
Assembler::vpaddw(xmm0, xmm0, src, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
! void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
int src_enc = src->encoding();
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
! Assembler::vpsubb(dst, nds, src, vector_len);
} else if ((dst_enc < 16) && (src_enc < 16)) {
! Assembler::vpsubb(dst, dst, src, vector_len);
} else if ((dst_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for src
evmovdqul(nds, src, Assembler::AVX_512bit);
! Assembler::vpsubb(dst, dst, nds, vector_len);
} else if ((src_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for dst
evmovdqul(nds, dst, Assembler::AVX_512bit);
! Assembler::vpsubb(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else if (dst_enc < 16) {
// use nds as scatch for xmm0 to hold src
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, src, Assembler::AVX_512bit);
! Assembler::vpsubb(dst, dst, xmm0, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs are in the upper bank
subptr(rsp, 64);
evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm1, src, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
! Assembler::vpsubb(xmm0, xmm0, xmm1, vector_len);
evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
addptr(rsp, 64);
}
}
! void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
! Assembler::vpsubb(dst, nds, src, vector_len);
} else if (dst_enc < 16) {
! Assembler::vpsubb(dst, dst, src, vector_len);
} else if (nds_enc < 16) {
// implies dst_enc in upper bank with src as scratch
evmovdqul(nds, dst, Assembler::AVX_512bit);
! Assembler::vpsubb(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs in upper bank
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
! Assembler::vpsubw(xmm0, xmm0, src, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
! void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
int src_enc = src->encoding();
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
! Assembler::vpsubw(dst, nds, src, vector_len);
} else if ((dst_enc < 16) && (src_enc < 16)) {
! Assembler::vpsubw(dst, dst, src, vector_len);
} else if ((dst_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for src
evmovdqul(nds, src, Assembler::AVX_512bit);
! Assembler::vpsubw(dst, dst, nds, vector_len);
} else if ((src_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for dst
evmovdqul(nds, dst, Assembler::AVX_512bit);
! Assembler::vpsubw(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else if (dst_enc < 16) {
// use nds as scatch for xmm0 to hold src
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, src, Assembler::AVX_512bit);
! Assembler::vpsubw(dst, dst, xmm0, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs are in the upper bank
subptr(rsp, 64);
evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm1, src, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
! Assembler::vpsubw(xmm0, xmm0, xmm1, vector_len);
evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
addptr(rsp, 64);
}
}
! void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
! Assembler::vpsubw(dst, nds, src, vector_len);
} else if (dst_enc < 16) {
! Assembler::vpsubw(dst, dst, src, vector_len);
} else if (nds_enc < 16) {
// implies dst_enc in upper bank with src as scratch
evmovdqul(nds, dst, Assembler::AVX_512bit);
! Assembler::vpsubw(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs in upper bank
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
Assembler::vpsubw(xmm0, xmm0, src, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
!
! void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
int src_enc = src->encoding();
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
! Assembler::vpmullw(dst, nds, src, vector_len);
} else if ((dst_enc < 16) && (src_enc < 16)) {
! Assembler::vpmullw(dst, dst, src, vector_len);
} else if ((dst_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for src
evmovdqul(nds, src, Assembler::AVX_512bit);
! Assembler::vpmullw(dst, dst, nds, vector_len);
} else if ((src_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for dst
evmovdqul(nds, dst, Assembler::AVX_512bit);
! Assembler::vpmullw(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else if (dst_enc < 16) {
// use nds as scatch for xmm0 to hold src
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, src, Assembler::AVX_512bit);
! Assembler::vpmullw(dst, dst, xmm0, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs are in the upper bank
subptr(rsp, 64);
evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm1, src, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
! Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len);
evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
addptr(rsp, 64);
}
}
! void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
! Assembler::vpmullw(dst, nds, src, vector_len);
} else if (dst_enc < 16) {
! Assembler::vpmullw(dst, dst, src, vector_len);
} else if (nds_enc < 16) {
// implies dst_enc in upper bank with src as scratch
evmovdqul(nds, dst, Assembler::AVX_512bit);
! Assembler::vpmullw(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs in upper bank
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
! Assembler::vpmullw(xmm0, xmm0, src, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
--- 4484,4815 ----
Assembler::vpaddw(xmm0, xmm0, src, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
! void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
! int dst_enc = dst->encoding();
! int src_enc = src->encoding();
! if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
! Assembler::vpbroadcastw(dst, src);
! } else if ((dst_enc < 16) && (src_enc < 16)) {
! Assembler::vpbroadcastw(dst, src);
! } else if (src_enc < 16) {
! subptr(rsp, 64);
! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
! evmovdqul(xmm0, dst, Assembler::AVX_512bit);
! Assembler::vpbroadcastw(xmm0, src);
! movdqu(dst, xmm0);
! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
! addptr(rsp, 64);
! } else if (dst_enc < 16) {
! subptr(rsp, 64);
! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
! evmovdqul(xmm0, src, Assembler::AVX_512bit);
! Assembler::vpbroadcastw(dst, xmm0);
! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
! addptr(rsp, 64);
! } else {
! subptr(rsp, 64);
! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
! subptr(rsp, 64);
! evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
! movdqu(xmm0, src);
! movdqu(xmm1, dst);
! Assembler::vpbroadcastw(xmm1, xmm0);
! movdqu(dst, xmm1);
! evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
! addptr(rsp, 64);
! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
! addptr(rsp, 64);
! }
! }
!
! void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
int src_enc = src->encoding();
+ assert(dst_enc == nds_enc, "");
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
! Assembler::vpcmpeqb(dst, nds, src, vector_len);
} else if ((dst_enc < 16) && (src_enc < 16)) {
! Assembler::vpcmpeqb(dst, nds, src, vector_len);
! } else if (src_enc < 16) {
! subptr(rsp, 64);
! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
! evmovdqul(xmm0, dst, Assembler::AVX_512bit);
! Assembler::vpcmpeqb(xmm0, xmm0, src, vector_len);
! movdqu(dst, xmm0);
! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
! addptr(rsp, 64);
! } else if (dst_enc < 16) {
! subptr(rsp, 64);
! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
! evmovdqul(xmm0, src, Assembler::AVX_512bit);
! Assembler::vpcmpeqb(dst, dst, xmm0, vector_len);
! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
! addptr(rsp, 64);
! } else {
! subptr(rsp, 64);
! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
! subptr(rsp, 64);
! evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
! movdqu(xmm0, src);
! movdqu(xmm1, dst);
! Assembler::vpcmpeqb(xmm1, xmm1, xmm0, vector_len);
! movdqu(dst, xmm1);
! evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
! addptr(rsp, 64);
! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
! addptr(rsp, 64);
! }
! }
!
! void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
! int dst_enc = dst->encoding();
! int nds_enc = nds->encoding();
! int src_enc = src->encoding();
! assert(dst_enc == nds_enc, "");
! if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
! Assembler::vpcmpeqw(dst, nds, src, vector_len);
! } else if ((dst_enc < 16) && (src_enc < 16)) {
! Assembler::vpcmpeqw(dst, nds, src, vector_len);
! } else if (src_enc < 16) {
! subptr(rsp, 64);
! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
! evmovdqul(xmm0, dst, Assembler::AVX_512bit);
! Assembler::vpcmpeqw(xmm0, xmm0, src, vector_len);
! movdqu(dst, xmm0);
! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
! addptr(rsp, 64);
! } else if (dst_enc < 16) {
! subptr(rsp, 64);
! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
! evmovdqul(xmm0, src, Assembler::AVX_512bit);
! Assembler::vpcmpeqw(dst, dst, xmm0, vector_len);
! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
! addptr(rsp, 64);
! } else {
! subptr(rsp, 64);
! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
! subptr(rsp, 64);
! evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
! movdqu(xmm0, src);
! movdqu(xmm1, dst);
! Assembler::vpcmpeqw(xmm1, xmm1, xmm0, vector_len);
! movdqu(dst, xmm1);
! evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
! addptr(rsp, 64);
! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
! addptr(rsp, 64);
! }
! }
!
! void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
! int dst_enc = dst->encoding();
! if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
! Assembler::vpmovzxbw(dst, src, vector_len);
! } else if (dst_enc < 16) {
! Assembler::vpmovzxbw(dst, src, vector_len);
! } else {
! subptr(rsp, 64);
! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
! evmovdqul(xmm0, dst, Assembler::AVX_512bit);
! Assembler::vpmovzxbw(xmm0, src, vector_len);
! movdqu(dst, xmm0);
! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
! addptr(rsp, 64);
! }
! }
!
! void MacroAssembler::vpmovmskb(Register dst, XMMRegister src) {
! int src_enc = src->encoding();
! if (src_enc < 16) {
! Assembler::vpmovmskb(dst, src);
! } else {
! subptr(rsp, 64);
! evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
! evmovdqul(xmm0, src, Assembler::AVX_512bit);
! Assembler::vpmovmskb(dst, xmm0);
! evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
! addptr(rsp, 64);
! }
! }
!
! void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
! int dst_enc = dst->encoding();
! int nds_enc = nds->encoding();
! int src_enc = src->encoding();
! if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
! Assembler::vpmullw(dst, nds, src, vector_len);
! } else if ((dst_enc < 16) && (src_enc < 16)) {
! Assembler::vpmullw(dst, dst, src, vector_len);
} else if ((dst_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for src
evmovdqul(nds, src, Assembler::AVX_512bit);
! Assembler::vpmullw(dst, dst, nds, vector_len);
} else if ((src_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for dst
evmovdqul(nds, dst, Assembler::AVX_512bit);
! Assembler::vpmullw(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else if (dst_enc < 16) {
// use nds as scatch for xmm0 to hold src
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, src, Assembler::AVX_512bit);
! Assembler::vpmullw(dst, dst, xmm0, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs are in the upper bank
subptr(rsp, 64);
evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm1, src, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
! Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len);
evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
addptr(rsp, 64);
}
}
! void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
! Assembler::vpmullw(dst, nds, src, vector_len);
} else if (dst_enc < 16) {
! Assembler::vpmullw(dst, dst, src, vector_len);
} else if (nds_enc < 16) {
// implies dst_enc in upper bank with src as scratch
evmovdqul(nds, dst, Assembler::AVX_512bit);
! Assembler::vpmullw(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs in upper bank
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
! Assembler::vpmullw(xmm0, xmm0, src, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
! void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
int src_enc = src->encoding();
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
! Assembler::vpsubb(dst, nds, src, vector_len);
} else if ((dst_enc < 16) && (src_enc < 16)) {
! Assembler::vpsubb(dst, dst, src, vector_len);
} else if ((dst_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for src
evmovdqul(nds, src, Assembler::AVX_512bit);
! Assembler::vpsubb(dst, dst, nds, vector_len);
} else if ((src_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for dst
evmovdqul(nds, dst, Assembler::AVX_512bit);
! Assembler::vpsubb(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else if (dst_enc < 16) {
// use nds as scatch for xmm0 to hold src
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, src, Assembler::AVX_512bit);
! Assembler::vpsubb(dst, dst, xmm0, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs are in the upper bank
subptr(rsp, 64);
evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm1, src, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
! Assembler::vpsubb(xmm0, xmm0, xmm1, vector_len);
evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
addptr(rsp, 64);
}
}
! void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
! Assembler::vpsubb(dst, nds, src, vector_len);
} else if (dst_enc < 16) {
! Assembler::vpsubb(dst, dst, src, vector_len);
} else if (nds_enc < 16) {
// implies dst_enc in upper bank with src as scratch
evmovdqul(nds, dst, Assembler::AVX_512bit);
! Assembler::vpsubb(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs in upper bank
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
Assembler::vpsubw(xmm0, xmm0, src, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
! void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
int src_enc = src->encoding();
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
! Assembler::vpsubw(dst, nds, src, vector_len);
} else if ((dst_enc < 16) && (src_enc < 16)) {
! Assembler::vpsubw(dst, dst, src, vector_len);
} else if ((dst_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for src
evmovdqul(nds, src, Assembler::AVX_512bit);
! Assembler::vpsubw(dst, dst, nds, vector_len);
} else if ((src_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for dst
evmovdqul(nds, dst, Assembler::AVX_512bit);
! Assembler::vpsubw(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else if (dst_enc < 16) {
// use nds as scatch for xmm0 to hold src
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, src, Assembler::AVX_512bit);
! Assembler::vpsubw(dst, dst, xmm0, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs are in the upper bank
subptr(rsp, 64);
evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm1, src, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
! Assembler::vpsubw(xmm0, xmm0, xmm1, vector_len);
evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
addptr(rsp, 64);
}
}
! void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
! Assembler::vpsubw(dst, nds, src, vector_len);
} else if (dst_enc < 16) {
! Assembler::vpsubw(dst, dst, src, vector_len);
} else if (nds_enc < 16) {
// implies dst_enc in upper bank with src as scratch
evmovdqul(nds, dst, Assembler::AVX_512bit);
! Assembler::vpsubw(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs in upper bank
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
! Assembler::vpsubw(xmm0, xmm0, src, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
*** 4636,4645 ****
--- 5014,5057 ----
Assembler::vpsllw(xmm0, xmm0, shift, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
+ void MacroAssembler::vptest(XMMRegister dst, XMMRegister src) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::vptest(dst, src);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::vptest(xmm0, src);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::vptest(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::vptest(xmm1, xmm0);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+ }
+
// This instruction exists within macros, ergo we cannot control its input
// when emitted through those patterns.
void MacroAssembler::punpcklbw(XMMRegister dst, XMMRegister src) {
if (VM_Version::supports_avx512nobw()) {
int dst_enc = dst->encoding();
*** 7719,7729 ****
bind(COMPARE_WIDE_VECTORS_LOOP);
if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
vmovdqu(vec1, Address(str1, result, scale));
vpxor(vec1, Address(str2, result, scale));
} else {
! vpmovzxbw(vec1, Address(str1, result, scale1));
vpxor(vec1, Address(str2, result, scale2));
}
vptest(vec1, vec1);
jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
addptr(result, stride2);
--- 8131,8141 ----
bind(COMPARE_WIDE_VECTORS_LOOP);
if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
vmovdqu(vec1, Address(str1, result, scale));
vpxor(vec1, Address(str2, result, scale));
} else {
! vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_256bit);
vpxor(vec1, Address(str2, result, scale2));
}
vptest(vec1, vec1);
jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
addptr(result, stride2);
< prev index next >