--- old/src/cpu/x86/vm/macroAssembler_x86.cpp 2015-04-16 17:00:06.170723642 -0700 +++ new/src/cpu/x86/vm/macroAssembler_x86.cpp 2015-04-16 17:00:06.114723642 -0700 @@ -6657,7 +6657,7 @@ subl(cnt2, stride2); jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); // clean upper bits of YMM registers - vzeroupper(); + vpxor(vec1, vec1); // compare wide vectors tail bind(COMPARE_WIDE_TAIL); @@ -6672,7 +6672,7 @@ // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. bind(VECTOR_NOT_EQUAL); // clean upper bits of YMM registers - vzeroupper(); + vpxor(vec1, vec1); lea(str1, Address(str1, result, scale)); lea(str2, Address(str2, result, scale)); jmp(COMPARE_16_CHARS); @@ -6931,7 +6931,8 @@ bind(DONE); if (UseAVX >= 2) { // clean upper bits of YMM registers - vzeroupper(); + vpxor(vec1, vec1); + vpxor(vec2, vec2); } } @@ -7065,7 +7066,8 @@ BIND(L_check_fill_8_bytes); // clean upper bits of YMM registers - vzeroupper(); + movdl(xtmp, value); + pshufd(xtmp, xtmp, 0); } else { // Fill 32-byte chunks pshufd(xtmp, xtmp, 0); @@ -7228,7 +7230,11 @@ bind(L_copy_16_chars_exit); if (UseAVX >= 2) { // clean upper bits of YMM registers - vzeroupper(); + vpxor(tmp2Reg, tmp2Reg); + vpxor(tmp3Reg, tmp3Reg); + vpxor(tmp4Reg, tmp4Reg); + movdl(tmp1Reg, tmp5); + pshufd(tmp1Reg, tmp1Reg, 0); } subptr(len, 8); jccb(Assembler::greater, L_copy_8_chars_exit); --- old/src/cpu/x86/vm/stubGenerator_x86_32.cpp 2015-04-16 17:00:06.702723637 -0700 +++ new/src/cpu/x86/vm/stubGenerator_x86_32.cpp 2015-04-16 17:00:06.638723636 -0700 @@ -835,7 +835,8 @@ if (UseUnalignedLoadStores && (UseAVX >= 2)) { // clean upper bits of YMM registers - __ vzeroupper(); + __ vpxor(xmm0, xmm0); + __ vpxor(xmm1, xmm1); } __ addl(qword_count, 8); __ jccb(Assembler::zero, L_exit); --- old/src/cpu/x86/vm/stubGenerator_x86_64.cpp 2015-04-16 17:00:06.990723632 -0700 +++ new/src/cpu/x86/vm/stubGenerator_x86_64.cpp 2015-04-16 17:00:06.938723634 -0700 @@ -1352,7 +1352,8 @@ __ BIND(L_end); if (UseAVX >= 2) { // clean upper bits of YMM registers - __ vzeroupper(); + __ vpxor(xmm0, xmm0); + __ vpxor(xmm1, xmm1); } } else { // Copy 32-bytes per iteration @@ -1429,7 +1430,8 @@ __ BIND(L_end); if (UseAVX >= 2) { // clean upper bits of YMM registers - __ vzeroupper(); + __ vpxor(xmm0, xmm0); + __ vpxor(xmm1, xmm1); } } else { // Copy 32-bytes per iteration