src/cpu/x86/vm/macroAssembler_x86.cpp
Index
Unified diffs
Context diffs
Sdiffs
Wdiffs
Patch
New
Old
Previous File
Next File
8005544 Cdiff src/cpu/x86/vm/macroAssembler_x86.cpp
src/cpu/x86/vm/macroAssembler_x86.cpp
Print this page
*** 5993,6004 ****
}
BIND(L_fill_32_bytes);
{
assert( UseSSE >= 2, "supported cpu only" );
Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
- // Fill 32-byte chunks
movdl(xtmp, value);
pshufd(xtmp, xtmp, 0);
subl(count, 8 << shift);
jcc(Assembler::less, L_check_fill_8_bytes);
align(16);
--- 5993,6027 ----
}
BIND(L_fill_32_bytes);
{
assert( UseSSE >= 2, "supported cpu only" );
Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
movdl(xtmp, value);
+ if (UseAVX >= 2 && UseUnalignedLoadStores) {
+ // Fill 64-byte chunks
+ Label L_fill_64_bytes_loop, L_check_fill_32_bytes;
+ vpbroadcastd(xtmp, xtmp);
+
+ subl(count, 16 << shift);
+ jcc(Assembler::less, L_check_fill_32_bytes);
+ align(16);
+
+ BIND(L_fill_64_bytes_loop);
+ vmovdqu(Address(to, 0), xtmp);
+ vmovdqu(Address(to, 32), xtmp);
+ addptr(to, 64);
+ subl(count, 16 << shift);
+ jcc(Assembler::greaterEqual, L_fill_64_bytes_loop);
+
+ BIND(L_check_fill_32_bytes);
+ addl(count, 8 << shift);
+ jccb(Assembler::less, L_check_fill_8_bytes);
+ vmovdqu(Address(to, 0), xtmp);
+ addptr(to, 32);
+ subl(count, 8 << shift);
+ } else {
+ // Fill 32-byte chunks
pshufd(xtmp, xtmp, 0);
subl(count, 8 << shift);
jcc(Assembler::less, L_check_fill_8_bytes);
align(16);
*** 6016,6025 ****
--- 6039,6049 ----
}
addptr(to, 32);
subl(count, 8 << shift);
jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
+ }
BIND(L_check_fill_8_bytes);
addl(count, 8 << shift);
jccb(Assembler::zero, L_exit);
jmpb(L_fill_8_bytes);
src/cpu/x86/vm/macroAssembler_x86.cpp
Index
Unified diffs
Context diffs
Sdiffs
Wdiffs
Patch
New
Old
Previous File
Next File