< prev index next >
src/cpu/x86/vm/macroAssembler_x86.cpp
Print this page
@@ -3947,10 +3947,240 @@
void MacroAssembler::testl(Register dst, AddressLiteral src) {
assert(reachable(src), "Address should be reachable");
testl(dst, as_Address(src));
}
+void MacroAssembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::pcmpeqb(dst, src);
+ } else if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::pcmpeqb(dst, src);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pcmpeqb(xmm0, src);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::pcmpeqb(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::pcmpeqb(xmm1, xmm0);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::pcmpeqw(dst, src);
+ } else if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::pcmpeqw(dst, src);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pcmpeqw(xmm0, src);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::pcmpeqw(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::pcmpeqw(xmm1, xmm0);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
+ int dst_enc = dst->encoding();
+ if (dst_enc < 16) {
+ Assembler::pcmpestri(dst, src, imm8);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pcmpestri(xmm0, src, imm8);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::pcmpestri(dst, src, imm8);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pcmpestri(xmm0, src, imm8);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::pcmpestri(dst, xmm0, imm8);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::pcmpestri(xmm1, xmm0, imm8);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::pmovzxbw(dst, src);
+ } else if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::pmovzxbw(dst, src);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pmovzxbw(xmm0, src);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::pmovzxbw(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::pmovzxbw(xmm1, xmm0);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::pmovzxbw(XMMRegister dst, Address src) {
+ int dst_enc = dst->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::pmovzxbw(dst, src);
+ } else if (dst_enc < 16) {
+ Assembler::pmovzxbw(dst, src);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pmovzxbw(xmm0, src);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::pmovmskb(Register dst, XMMRegister src) {
+ int src_enc = src->encoding();
+ if (src_enc < 16) {
+ Assembler::pmovmskb(dst, src);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::pmovmskb(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::ptest(XMMRegister dst, XMMRegister src) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::ptest(dst, src);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::ptest(xmm0, src);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::ptest(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::ptest(xmm1, xmm0);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::sqrtsd(dst, as_Address(src));
} else {
lea(rscratch1, src);
@@ -4254,184 +4484,332 @@
Assembler::vpaddw(xmm0, xmm0, src, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
-void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::vpbroadcastw(dst, src);
+ } else if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::vpbroadcastw(dst, src);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::vpbroadcastw(xmm0, src);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::vpbroadcastw(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::vpbroadcastw(xmm1, xmm0);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
int src_enc = src->encoding();
+ assert(dst_enc == nds_enc, "");
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpsubb(dst, nds, src, vector_len);
+ Assembler::vpcmpeqb(dst, nds, src, vector_len);
} else if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::vpsubb(dst, dst, src, vector_len);
+ Assembler::vpcmpeqb(dst, nds, src, vector_len);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::vpcmpeqb(xmm0, xmm0, src, vector_len);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::vpcmpeqb(dst, dst, xmm0, vector_len);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::vpcmpeqb(xmm1, xmm1, xmm0, vector_len);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ int dst_enc = dst->encoding();
+ int nds_enc = nds->encoding();
+ int src_enc = src->encoding();
+ assert(dst_enc == nds_enc, "");
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::vpcmpeqw(dst, nds, src, vector_len);
+ } else if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::vpcmpeqw(dst, nds, src, vector_len);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::vpcmpeqw(xmm0, xmm0, src, vector_len);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::vpcmpeqw(dst, dst, xmm0, vector_len);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::vpcmpeqw(xmm1, xmm1, xmm0, vector_len);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
+ int dst_enc = dst->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::vpmovzxbw(dst, src, vector_len);
+ } else if (dst_enc < 16) {
+ Assembler::vpmovzxbw(dst, src, vector_len);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::vpmovzxbw(xmm0, src, vector_len);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::vpmovmskb(Register dst, XMMRegister src) {
+ int src_enc = src->encoding();
+ if (src_enc < 16) {
+ Assembler::vpmovmskb(dst, src);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::vpmovmskb(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ int dst_enc = dst->encoding();
+ int nds_enc = nds->encoding();
+ int src_enc = src->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::vpmullw(dst, nds, src, vector_len);
+ } else if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::vpmullw(dst, dst, src, vector_len);
} else if ((dst_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for src
evmovdqul(nds, src, Assembler::AVX_512bit);
- Assembler::vpsubb(dst, dst, nds, vector_len);
+ Assembler::vpmullw(dst, dst, nds, vector_len);
} else if ((src_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for dst
evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpsubb(nds, nds, src, vector_len);
+ Assembler::vpmullw(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else if (dst_enc < 16) {
// use nds as scatch for xmm0 to hold src
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::vpsubb(dst, dst, xmm0, vector_len);
+ Assembler::vpmullw(dst, dst, xmm0, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs are in the upper bank
subptr(rsp, 64);
evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm1, src, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpsubb(xmm0, xmm0, xmm1, vector_len);
+ Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len);
evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
addptr(rsp, 64);
}
}
-void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpsubb(dst, nds, src, vector_len);
+ Assembler::vpmullw(dst, nds, src, vector_len);
} else if (dst_enc < 16) {
- Assembler::vpsubb(dst, dst, src, vector_len);
+ Assembler::vpmullw(dst, dst, src, vector_len);
} else if (nds_enc < 16) {
// implies dst_enc in upper bank with src as scratch
evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpsubb(nds, nds, src, vector_len);
+ Assembler::vpmullw(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs in upper bank
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpsubw(xmm0, xmm0, src, vector_len);
+ Assembler::vpmullw(xmm0, xmm0, src, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
-void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
int src_enc = src->encoding();
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpsubw(dst, nds, src, vector_len);
+ Assembler::vpsubb(dst, nds, src, vector_len);
} else if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::vpsubw(dst, dst, src, vector_len);
+ Assembler::vpsubb(dst, dst, src, vector_len);
} else if ((dst_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for src
evmovdqul(nds, src, Assembler::AVX_512bit);
- Assembler::vpsubw(dst, dst, nds, vector_len);
+ Assembler::vpsubb(dst, dst, nds, vector_len);
} else if ((src_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for dst
evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpsubw(nds, nds, src, vector_len);
+ Assembler::vpsubb(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else if (dst_enc < 16) {
// use nds as scatch for xmm0 to hold src
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::vpsubw(dst, dst, xmm0, vector_len);
+ Assembler::vpsubb(dst, dst, xmm0, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs are in the upper bank
subptr(rsp, 64);
evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm1, src, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpsubw(xmm0, xmm0, xmm1, vector_len);
+ Assembler::vpsubb(xmm0, xmm0, xmm1, vector_len);
evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
addptr(rsp, 64);
}
}
-void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpsubw(dst, nds, src, vector_len);
+ Assembler::vpsubb(dst, nds, src, vector_len);
} else if (dst_enc < 16) {
- Assembler::vpsubw(dst, dst, src, vector_len);
+ Assembler::vpsubb(dst, dst, src, vector_len);
} else if (nds_enc < 16) {
// implies dst_enc in upper bank with src as scratch
evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpsubw(nds, nds, src, vector_len);
+ Assembler::vpsubb(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs in upper bank
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
Assembler::vpsubw(xmm0, xmm0, src, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
-
-void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
int src_enc = src->encoding();
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpmullw(dst, nds, src, vector_len);
+ Assembler::vpsubw(dst, nds, src, vector_len);
} else if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::vpmullw(dst, dst, src, vector_len);
+ Assembler::vpsubw(dst, dst, src, vector_len);
} else if ((dst_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for src
evmovdqul(nds, src, Assembler::AVX_512bit);
- Assembler::vpmullw(dst, dst, nds, vector_len);
+ Assembler::vpsubw(dst, dst, nds, vector_len);
} else if ((src_enc < 16) && (nds_enc < 16)) {
// use nds as scratch for dst
evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpmullw(nds, nds, src, vector_len);
+ Assembler::vpsubw(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else if (dst_enc < 16) {
// use nds as scatch for xmm0 to hold src
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::vpmullw(dst, dst, xmm0, vector_len);
+ Assembler::vpsubw(dst, dst, xmm0, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs are in the upper bank
subptr(rsp, 64);
evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm1, src, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len);
+ Assembler::vpsubw(xmm0, xmm0, xmm1, vector_len);
evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
addptr(rsp, 64);
}
}
-void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpmullw(dst, nds, src, vector_len);
+ Assembler::vpsubw(dst, nds, src, vector_len);
} else if (dst_enc < 16) {
- Assembler::vpmullw(dst, dst, src, vector_len);
+ Assembler::vpsubw(dst, dst, src, vector_len);
} else if (nds_enc < 16) {
// implies dst_enc in upper bank with src as scratch
evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpmullw(nds, nds, src, vector_len);
+ Assembler::vpsubw(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
} else {
// worse case scenario, all regs in upper bank
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpmullw(xmm0, xmm0, src, vector_len);
+ Assembler::vpsubw(xmm0, xmm0, src, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
@@ -4636,10 +5014,44 @@
Assembler::vpsllw(xmm0, xmm0, shift, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
+void MacroAssembler::vptest(XMMRegister dst, XMMRegister src) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::vptest(dst, src);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::vptest(xmm0, src);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::vptest(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::vptest(xmm1, xmm0);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
// This instruction exists within macros, ergo we cannot control its input
// when emitted through those patterns.
void MacroAssembler::punpcklbw(XMMRegister dst, XMMRegister src) {
if (VM_Version::supports_avx512nobw()) {
int dst_enc = dst->encoding();
@@ -7719,11 +8131,11 @@
bind(COMPARE_WIDE_VECTORS_LOOP);
if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
vmovdqu(vec1, Address(str1, result, scale));
vpxor(vec1, Address(str2, result, scale));
} else {
- vpmovzxbw(vec1, Address(str1, result, scale1));
+ vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_256bit);
vpxor(vec1, Address(str2, result, scale2));
}
vptest(vec1, vec1);
jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
addptr(result, stride2);
< prev index next >