< prev index next >
src/cpu/x86/vm/stubGenerator_x86_64.cpp
Print this page
@@ -135,12 +135,14 @@
// 72(rbp): thread Thread*
//
// [ return_from_Java ] <--- rsp
// [ argument word n ]
// ...
- // -28 [ argument word 1 ]
- // -27 [ saved xmm15 ] <--- rsp_after_call
+ // -60 [ argument word 1 ]
+ // -59 [ saved xmm31 ] <--- rsp after_call
+ // [ saved xmm16-xmm30 ] (EVEX enabled, else the space is blank)
+ // -27 [ saved xmm15 ]
// [ saved xmm7-xmm14 ]
// -9 [ saved xmm6 ] (each xmm register takes 2 slots)
// -7 [ saved r15 ]
// -6 [ saved r14 ]
// -5 [ saved r13 ]
@@ -164,11 +166,11 @@
// Call stub stack layout word offsets from rbp
enum call_stub_layout {
#ifdef _WIN64
xmm_save_first = 6, // save from xmm6
- xmm_save_last = 15, // to xmm15
+ xmm_save_last = 31, // to xmm31
xmm_save_base = -9,
rsp_after_call_off = xmm_save_base - 2 * (xmm_save_last - xmm_save_first), // -27
r15_off = -7,
r14_off = -6,
r13_off = -5,
@@ -260,14 +262,24 @@
__ movptr(rbx_save, rbx);
__ movptr(r12_save, r12);
__ movptr(r13_save, r13);
__ movptr(r14_save, r14);
__ movptr(r15_save, r15);
+ if (UseAVX > 2) {
+ __ movl(rbx, 0xffff);
+ __ kmovql(k1, rbx);
+ }
#ifdef _WIN64
+ if (UseAVX > 2) {
+ for (int i = 6; i <= 31; i++) {
+ __ movdqu(xmm_save(i), as_XMMRegister(i));
+ }
+ } else {
for (int i = 6; i <= 15; i++) {
__ movdqu(xmm_save(i), as_XMMRegister(i));
}
+ }
const Address rdi_save(rbp, rdi_off * wordSize);
const Address rsi_save(rbp, rsi_off * wordSize);
__ movptr(rsi_save, rsi);
@@ -1316,11 +1328,14 @@
__ align(OptoLoopAlignment);
if (UseUnalignedLoadStores) {
Label L_end;
// Copy 64-bytes per iteration
__ BIND(L_loop);
- if (UseAVX >= 2) {
+ if (UseAVX > 2) {
+ __ evmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56), Assembler::AVX_512bit);
+ __ evmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0, Assembler::AVX_512bit);
+ } else if (UseAVX == 2) {
__ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
__ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
__ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
__ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
} else {
@@ -1392,11 +1407,14 @@
__ align(OptoLoopAlignment);
if (UseUnalignedLoadStores) {
Label L_end;
// Copy 64-bytes per iteration
__ BIND(L_loop);
- if (UseAVX >= 2) {
+ if (UseAVX > 2) {
+ __ evmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32), Assembler::AVX_512bit);
+ __ evmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0, Assembler::AVX_512bit);
+ } else if (UseAVX == 2) {
__ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
__ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
__ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
__ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
} else {
< prev index next >