src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 8005544 Cdiff src/cpu/x86/vm/macroAssembler_x86.cpp

src/cpu/x86/vm/macroAssembler_x86.cpp

Print this page

        

*** 5993,6004 **** } BIND(L_fill_32_bytes); { assert( UseSSE >= 2, "supported cpu only" ); Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; - // Fill 32-byte chunks movdl(xtmp, value); pshufd(xtmp, xtmp, 0); subl(count, 8 << shift); jcc(Assembler::less, L_check_fill_8_bytes); align(16); --- 5993,6027 ---- } BIND(L_fill_32_bytes); { assert( UseSSE >= 2, "supported cpu only" ); Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; movdl(xtmp, value); + if (UseAVX >= 2 && UseUnalignedLoadStores) { + // Fill 64-byte chunks + Label L_fill_64_bytes_loop, L_check_fill_32_bytes; + vpbroadcastd(xtmp, xtmp); + + subl(count, 16 << shift); + jcc(Assembler::less, L_check_fill_32_bytes); + align(16); + + BIND(L_fill_64_bytes_loop); + vmovdqu(Address(to, 0), xtmp); + vmovdqu(Address(to, 32), xtmp); + addptr(to, 64); + subl(count, 16 << shift); + jcc(Assembler::greaterEqual, L_fill_64_bytes_loop); + + BIND(L_check_fill_32_bytes); + addl(count, 8 << shift); + jccb(Assembler::less, L_check_fill_8_bytes); + vmovdqu(Address(to, 0), xtmp); + addptr(to, 32); + subl(count, 8 << shift); + } else { + // Fill 32-byte chunks pshufd(xtmp, xtmp, 0); subl(count, 8 << shift); jcc(Assembler::less, L_check_fill_8_bytes); align(16);
*** 6016,6025 **** --- 6039,6049 ---- } addptr(to, 32); subl(count, 8 << shift); jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); + } BIND(L_check_fill_8_bytes); addl(count, 8 << shift); jccb(Assembler::zero, L_exit); jmpb(L_fill_8_bytes);
src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File