src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File
*** old/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri May  8 11:59:17 2015
--- new/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri May  8 11:59:17 2015

*** 3994,4018 **** --- 3994,4018 ---- lea(rscratch1, src); vaddss(dst, nds, Address(rscratch1, 0)); } } ! void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { ! void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { if (reachable(src)) { ! vandpd(dst, nds, as_Address(src), vector256); ! vandpd(dst, nds, as_Address(src), vector_len); } else { lea(rscratch1, src); ! vandpd(dst, nds, Address(rscratch1, 0), vector256); ! vandpd(dst, nds, Address(rscratch1, 0), vector_len); } } ! void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { ! void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { if (reachable(src)) { ! vandps(dst, nds, as_Address(src), vector256); ! vandps(dst, nds, as_Address(src), vector_len); } else { lea(rscratch1, src); ! vandps(dst, nds, Address(rscratch1, 0), vector256); ! vandps(dst, nds, Address(rscratch1, 0), vector_len); } } void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { if (reachable(src)) {
*** 4066,4090 **** --- 4066,4090 ---- lea(rscratch1, src); vsubss(dst, nds, Address(rscratch1, 0)); } } ! void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { ! void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { if (reachable(src)) { ! vxorpd(dst, nds, as_Address(src), vector256); ! vxorpd(dst, nds, as_Address(src), vector_len); } else { lea(rscratch1, src); ! vxorpd(dst, nds, Address(rscratch1, 0), vector256); ! vxorpd(dst, nds, Address(rscratch1, 0), vector_len); } } ! void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { ! void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { if (reachable(src)) { ! vxorps(dst, nds, as_Address(src), vector256); ! vxorps(dst, nds, as_Address(src), vector_len); } else { lea(rscratch1, src); ! vxorps(dst, nds, Address(rscratch1, 0), vector256); ! vxorps(dst, nds, Address(rscratch1, 0), vector_len); } } //////////////////////////////////////////////////////////////////////////////////
*** 4559,4568 **** --- 4559,4576 ---- movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); } else if (UseSSE >= 2) { + if (UseAVX > 2) { + movl(rbx, 0xffff); + #ifdef _LP64 + kmovql(k1, rbx); + #else + kmovdl(k1, rbx); + #endif + } #ifdef COMPILER2 if (MaxVectorSize > 16) { assert(UseAVX > 0, "256bit vectors are supported only with AVX"); // Save upper half of YMM registes subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
*** 7061,7072 **** --- 7069,7111 ---- } BIND(L_fill_32_bytes); { assert( UseSSE >= 2, "supported cpu only" ); Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; + if (UseAVX > 2) { + movl(rtmp, 0xffff); + #ifdef _LP64 + kmovql(k1, rtmp); + #else + kmovdl(k1, rtmp); + #endif + } movdl(xtmp, value); - if (UseAVX >= 2 && UseUnalignedLoadStores) { + // Fill 64-byte chunks + Label L_fill_64_bytes_loop, L_check_fill_32_bytes; + evpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit); + + subl(count, 16 << shift); + jcc(Assembler::less, L_check_fill_32_bytes); + align(16); + + BIND(L_fill_64_bytes_loop); + evmovdqu(Address(to, 0), xtmp, Assembler::AVX_512bit); + addptr(to, 64); + subl(count, 16 << shift); + jcc(Assembler::greaterEqual, L_fill_64_bytes_loop); + + BIND(L_check_fill_32_bytes); + addl(count, 8 << shift); + jccb(Assembler::less, L_check_fill_8_bytes); + evmovdqu(Address(to, 0), xtmp, Assembler::AVX_256bit); + addptr(to, 32); + subl(count, 8 << shift); + + BIND(L_check_fill_8_bytes); + } else if (UseAVX == 2 && UseUnalignedLoadStores) { // Fill 64-byte chunks Label L_fill_64_bytes_loop, L_check_fill_32_bytes; vpbroadcastd(xtmp, xtmp); subl(count, 16 << shift);
*** 7198,7212 **** --- 7237,7251 ---- jmpb(L_chars_32_check); bind(L_copy_32_chars); vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64)); vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32)); ! vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ true); ! vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1); vptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector jccb(Assembler::notZero, L_copy_32_chars_exit); ! vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector256 */ true); ! vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector256 */ true); ! vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1); ! vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector_len */ 1); vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg); bind(L_chars_32_check); addptr(len, 32); jccb(Assembler::lessEqual, L_copy_32_chars);
*** 7225,7241 **** --- 7264,7280 ---- bind(L_copy_16_chars); if (UseAVX >= 2) { vmovdqu(tmp2Reg, Address(src, len, Address::times_2, -32)); vptest(tmp2Reg, tmp1Reg); jccb(Assembler::notZero, L_copy_16_chars_exit); ! vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector256 */ true); ! vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector256 */ true); ! vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector_len */ 1); ! vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector_len */ 1); } else { if (UseAVX > 0) { movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); ! vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ false); ! vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 0); } else { movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); por(tmp2Reg, tmp3Reg); movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); por(tmp2Reg, tmp4Reg);
*** 7774,7784 **** --- 7813,7823 ---- */ void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) { if (UseAVX > 0) { vpclmulhdq(xtmp, xK, xcrc); // [123:64] vpclmulldq(xcrc, xK, xcrc); // [63:0] ! vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */); ! vpxor(xcrc, xcrc, Address(buf, offset), 0 /* vector_len */); pxor(xcrc, xtmp); } else { movdqa(xtmp, xcrc); pclmulhdq(xtmp, xK); // [123:64] pclmulldq(xcrc, xK); // [63:0]
*** 7918,7928 **** --- 7957,7967 ---- // Fold 128 bits in xmm1 down into 32 bits in crc register. BIND(L_fold_128b); movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr())); if (UseAVX > 0) { vpclmulqdq(xmm2, xmm0, xmm1, 0x1); ! vpand(xmm3, xmm0, xmm2, false /* vector256 */); ! vpand(xmm3, xmm0, xmm2, 0 /* vector_len */); vpclmulqdq(xmm0, xmm0, xmm3, 0x1); } else { movdqa(xmm2, xmm0); pclmulqdq(xmm2, xmm1, 0x1); movdqa(xmm3, xmm0);

src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File