< prev index next >
src/cpu/x86/vm/macroAssembler_x86.cpp
Print this page
@@ -3980,25 +3980,25 @@
lea(rscratch1, src);
vaddss(dst, nds, Address(rscratch1, 0));
}
}
-void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
if (reachable(src)) {
- vandpd(dst, nds, as_Address(src), vector256);
+ vandpd(dst, nds, as_Address(src), vector_len);
} else {
lea(rscratch1, src);
- vandpd(dst, nds, Address(rscratch1, 0), vector256);
+ vandpd(dst, nds, Address(rscratch1, 0), vector_len);
}
}
-void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
if (reachable(src)) {
- vandps(dst, nds, as_Address(src), vector256);
+ vandps(dst, nds, as_Address(src), vector_len);
} else {
lea(rscratch1, src);
- vandps(dst, nds, Address(rscratch1, 0), vector256);
+ vandps(dst, nds, Address(rscratch1, 0), vector_len);
}
}
void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
if (reachable(src)) {
@@ -4052,25 +4052,25 @@
lea(rscratch1, src);
vsubss(dst, nds, Address(rscratch1, 0));
}
}
-void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
if (reachable(src)) {
- vxorpd(dst, nds, as_Address(src), vector256);
+ vxorpd(dst, nds, as_Address(src), vector_len);
} else {
lea(rscratch1, src);
- vxorpd(dst, nds, Address(rscratch1, 0), vector256);
+ vxorpd(dst, nds, Address(rscratch1, 0), vector_len);
}
}
-void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
if (reachable(src)) {
- vxorps(dst, nds, as_Address(src), vector256);
+ vxorps(dst, nds, as_Address(src), vector_len);
} else {
lea(rscratch1, src);
- vxorps(dst, nds, Address(rscratch1, 0), vector256);
+ vxorps(dst, nds, Address(rscratch1, 0), vector_len);
}
}
//////////////////////////////////////////////////////////////////////////////////
@@ -4545,10 +4545,18 @@
movflt(Address(rsp,off++*sizeof(jdouble)),xmm4);
movflt(Address(rsp,off++*sizeof(jdouble)),xmm5);
movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
} else if (UseSSE >= 2) {
+ if (UseAVX > 2) {
+ movl(rbx, 0xffff);
+#ifdef _LP64
+ kmovql(k1, rbx);
+#else
+ kmovdl(k1, rbx);
+#endif
+ }
#ifdef COMPILER2
if (MaxVectorSize > 16) {
assert(UseAVX > 0, "256bit vectors are supported only with AVX");
// Save upper half of YMM registes
subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
@@ -7037,12 +7045,43 @@
}
BIND(L_fill_32_bytes);
{
assert( UseSSE >= 2, "supported cpu only" );
Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
+ if (UseAVX > 2) {
+ movl(rtmp, 0xffff);
+#ifdef _LP64
+ kmovql(k1, rtmp);
+#else
+ kmovdl(k1, rtmp);
+#endif
+ }
movdl(xtmp, value);
- if (UseAVX >= 2 && UseUnalignedLoadStores) {
+ if (UseAVX > 2 && UseUnalignedLoadStores) {
+ // Fill 64-byte chunks
+ Label L_fill_64_bytes_loop, L_check_fill_32_bytes;
+ evpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit);
+
+ subl(count, 16 << shift);
+ jcc(Assembler::less, L_check_fill_32_bytes);
+ align(16);
+
+ BIND(L_fill_64_bytes_loop);
+ evmovdqu(Address(to, 0), xtmp, Assembler::AVX_512bit);
+ addptr(to, 64);
+ subl(count, 16 << shift);
+ jcc(Assembler::greaterEqual, L_fill_64_bytes_loop);
+
+ BIND(L_check_fill_32_bytes);
+ addl(count, 8 << shift);
+ jccb(Assembler::less, L_check_fill_8_bytes);
+ evmovdqu(Address(to, 0), xtmp, Assembler::AVX_256bit);
+ addptr(to, 32);
+ subl(count, 8 << shift);
+
+ BIND(L_check_fill_8_bytes);
+ } else if (UseAVX == 2 && UseUnalignedLoadStores) {
// Fill 64-byte chunks
Label L_fill_64_bytes_loop, L_check_fill_32_bytes;
vpbroadcastd(xtmp, xtmp);
subl(count, 16 << shift);
@@ -7173,15 +7212,15 @@
jmpb(L_chars_32_check);
bind(L_copy_32_chars);
vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64));
vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32));
- vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ true);
+ vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1);
vptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector
jccb(Assembler::notZero, L_copy_32_chars_exit);
- vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector256 */ true);
- vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector256 */ true);
+ vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1);
+ vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector_len */ 1);
vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg);
bind(L_chars_32_check);
addptr(len, 32);
jccb(Assembler::lessEqual, L_copy_32_chars);
@@ -7200,17 +7239,17 @@
bind(L_copy_16_chars);
if (UseAVX >= 2) {
vmovdqu(tmp2Reg, Address(src, len, Address::times_2, -32));
vptest(tmp2Reg, tmp1Reg);
jccb(Assembler::notZero, L_copy_16_chars_exit);
- vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector256 */ true);
- vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector256 */ true);
+ vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector_len */ 1);
+ vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector_len */ 1);
} else {
if (UseAVX > 0) {
movdqu(tmp3Reg, Address(src, len, Address::times_2, -32));
movdqu(tmp4Reg, Address(src, len, Address::times_2, -16));
- vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ false);
+ vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 0);
} else {
movdqu(tmp3Reg, Address(src, len, Address::times_2, -32));
por(tmp2Reg, tmp3Reg);
movdqu(tmp4Reg, Address(src, len, Address::times_2, -16));
por(tmp2Reg, tmp4Reg);
@@ -7745,11 +7784,11 @@
*/
void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) {
if (UseAVX > 0) {
vpclmulhdq(xtmp, xK, xcrc); // [123:64]
vpclmulldq(xcrc, xK, xcrc); // [63:0]
- vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
+ vpxor(xcrc, xcrc, Address(buf, offset), 0 /* vector_len */);
pxor(xcrc, xtmp);
} else {
movdqa(xtmp, xcrc);
pclmulhdq(xtmp, xK); // [123:64]
pclmulldq(xcrc, xK); // [63:0]
@@ -7889,11 +7928,11 @@
// Fold 128 bits in xmm1 down into 32 bits in crc register.
BIND(L_fold_128b);
movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr()));
if (UseAVX > 0) {
vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
- vpand(xmm3, xmm0, xmm2, false /* vector256 */);
+ vpand(xmm3, xmm0, xmm2, 0 /* vector_len */);
vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
} else {
movdqa(xmm2, xmm0);
pclmulqdq(xmm2, xmm1, 0x1);
movdqa(xmm3, xmm0);
< prev index next >