src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File
*** old/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Dec 27 17:06:11 2012
--- new/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Dec 27 17:06:11 2012

*** 5993,6004 **** --- 5993,6027 ---- } BIND(L_fill_32_bytes); { assert( UseSSE >= 2, "supported cpu only" ); Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; // Fill 32-byte chunks movdl(xtmp, value); + if (UseAVX >= 2 && UseUnalignedLoadStores) { + // Fill 64-byte chunks + Label L_fill_64_bytes_loop, L_check_fill_32_bytes; + vpbroadcastd(xtmp, xtmp); + + subl(count, 16 << shift); + jcc(Assembler::less, L_check_fill_32_bytes); + align(16); + + BIND(L_fill_64_bytes_loop); + vmovdqu(Address(to, 0), xtmp); + vmovdqu(Address(to, 32), xtmp); + addptr(to, 64); + subl(count, 16 << shift); + jcc(Assembler::greaterEqual, L_fill_64_bytes_loop); + + BIND(L_check_fill_32_bytes); + addl(count, 8 << shift); + jccb(Assembler::less, L_check_fill_8_bytes); + vmovdqu(Address(to, 0), xtmp); + addptr(to, 32); + subl(count, 8 << shift); + } else { + // Fill 32-byte chunks pshufd(xtmp, xtmp, 0); subl(count, 8 << shift); jcc(Assembler::less, L_check_fill_8_bytes); align(16);
*** 6016,6025 **** --- 6039,6049 ---- } addptr(to, 32); subl(count, 8 << shift); jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); + } BIND(L_check_fill_8_bytes); addl(count, 8 << shift); jccb(Assembler::zero, L_exit); jmpb(L_fill_8_bytes);

src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File