< prev index next >
src/cpu/x86/vm/sharedRuntime_x86_64.cpp
Print this page
*** 84,94 ****
DEF_XMM_OFFS(11),
DEF_XMM_OFFS(12),
DEF_XMM_OFFS(13),
DEF_XMM_OFFS(14),
DEF_XMM_OFFS(15),
! fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
fpu_stateH_end,
r15_off, r15H_off,
r14_off, r14H_off,
r13_off, r13H_off,
r12_off, r12H_off,
--- 84,110 ----
DEF_XMM_OFFS(11),
DEF_XMM_OFFS(12),
DEF_XMM_OFFS(13),
DEF_XMM_OFFS(14),
DEF_XMM_OFFS(15),
! DEF_XMM_OFFS(16),
! DEF_XMM_OFFS(17),
! DEF_XMM_OFFS(18),
! DEF_XMM_OFFS(19),
! DEF_XMM_OFFS(20),
! DEF_XMM_OFFS(21),
! DEF_XMM_OFFS(22),
! DEF_XMM_OFFS(23),
! DEF_XMM_OFFS(24),
! DEF_XMM_OFFS(25),
! DEF_XMM_OFFS(26),
! DEF_XMM_OFFS(27),
! DEF_XMM_OFFS(28),
! DEF_XMM_OFFS(29),
! DEF_XMM_OFFS(30),
! DEF_XMM_OFFS(31),
! fpu_state_end = fpu_state_off + ((FPUStateSizeInWords - 1)*wordSize / BytesPerInt),
fpu_stateH_end,
r15_off, r15H_off,
r14_off, r14H_off,
r13_off, r13H_off,
r12_off, r12H_off,
*** 134,158 ****
static void restore_result_registers(MacroAssembler* masm);
};
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
int vect_words = 0;
#ifdef COMPILER2
if (save_vectors) {
! assert(UseAVX > 0, "256bit vectors are supported only with AVX");
! assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
! // Save upper half of YMM registes
! vect_words = 16 * 16 / wordSize;
additional_frame_words += vect_words;
}
#else
assert(!save_vectors, "vectors are generated only by C2");
#endif
// Always make the frame size 16-byte aligned
int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
! reg_save_size*BytesPerInt, 16);
// OopMap frame size is in compiler stack slots (jint's) not bytes or words
int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
// The caller will allocate additional_frame_words
int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
// CodeBlob frame size is in words.
--- 150,182 ----
static void restore_result_registers(MacroAssembler* masm);
};
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
int vect_words = 0;
+ int num_xmm_regs = 16;
+ if (UseAVX > 2) {
+ num_xmm_regs = 32;
+ }
#ifdef COMPILER2
if (save_vectors) {
! assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
! assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
! // Save upper half of YMM registers
! vect_words = 16 * num_xmm_regs / wordSize;
! additional_frame_words += vect_words;
! if (UseAVX > 2) {
! // Save upper half of ZMM registers as well
additional_frame_words += vect_words;
}
+ }
#else
assert(!save_vectors, "vectors are generated only by C2");
#endif
// Always make the frame size 16-byte aligned
int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
! reg_save_size*BytesPerInt, num_xmm_regs);
// OopMap frame size is in compiler stack slots (jint's) not bytes or words
int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
// The caller will allocate additional_frame_words
int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
// CodeBlob frame size is in words.
*** 167,194 ****
__ enter(); // rsp becomes 16-byte aligned here
__ push_CPU_state(); // Push a multiple of 16 bytes
if (vect_words > 0) {
! assert(vect_words*wordSize == 256, "");
! __ subptr(rsp, 256); // Save upper half of YMM registes
! __ vextractf128h(Address(rsp, 0),xmm0);
! __ vextractf128h(Address(rsp, 16),xmm1);
! __ vextractf128h(Address(rsp, 32),xmm2);
! __ vextractf128h(Address(rsp, 48),xmm3);
! __ vextractf128h(Address(rsp, 64),xmm4);
! __ vextractf128h(Address(rsp, 80),xmm5);
! __ vextractf128h(Address(rsp, 96),xmm6);
! __ vextractf128h(Address(rsp,112),xmm7);
! __ vextractf128h(Address(rsp,128),xmm8);
! __ vextractf128h(Address(rsp,144),xmm9);
! __ vextractf128h(Address(rsp,160),xmm10);
! __ vextractf128h(Address(rsp,176),xmm11);
! __ vextractf128h(Address(rsp,192),xmm12);
! __ vextractf128h(Address(rsp,208),xmm13);
! __ vextractf128h(Address(rsp,224),xmm14);
! __ vextractf128h(Address(rsp,240),xmm15);
}
if (frame::arg_reg_save_area_bytes != 0) {
// Allocate argument register save area
__ subptr(rsp, frame::arg_reg_save_area_bytes);
}
--- 191,271 ----
__ enter(); // rsp becomes 16-byte aligned here
__ push_CPU_state(); // Push a multiple of 16 bytes
if (vect_words > 0) {
! assert(vect_words*wordSize >= 256, "");
! __ subptr(rsp, 256); // Save upper half of YMM registes(0..15)
! __ vextractf128h(Address(rsp, 0), xmm0);
! __ vextractf128h(Address(rsp, 16), xmm1);
! __ vextractf128h(Address(rsp, 32), xmm2);
! __ vextractf128h(Address(rsp, 48), xmm3);
! __ vextractf128h(Address(rsp, 64), xmm4);
! __ vextractf128h(Address(rsp, 80), xmm5);
! __ vextractf128h(Address(rsp, 96), xmm6);
! __ vextractf128h(Address(rsp, 112), xmm7);
! __ vextractf128h(Address(rsp, 128), xmm8);
! __ vextractf128h(Address(rsp, 144), xmm9);
! __ vextractf128h(Address(rsp, 160), xmm10);
! __ vextractf128h(Address(rsp, 176), xmm11);
! __ vextractf128h(Address(rsp, 192), xmm12);
! __ vextractf128h(Address(rsp, 208), xmm13);
! __ vextractf128h(Address(rsp, 224), xmm14);
! __ vextractf128h(Address(rsp, 240), xmm15);
! if (UseAVX > 2) {
! __ subptr(rsp, 256); // Save upper half of YMM registes(16..31)
! __ vextractf128h(Address(rsp, 0), xmm16);
! __ vextractf128h(Address(rsp, 16), xmm17);
! __ vextractf128h(Address(rsp, 32), xmm18);
! __ vextractf128h(Address(rsp, 48), xmm19);
! __ vextractf128h(Address(rsp, 64), xmm20);
! __ vextractf128h(Address(rsp, 80), xmm21);
! __ vextractf128h(Address(rsp, 96), xmm22);
! __ vextractf128h(Address(rsp, 112), xmm23);
! __ vextractf128h(Address(rsp, 128), xmm24);
! __ vextractf128h(Address(rsp, 144), xmm25);
! __ vextractf128h(Address(rsp, 160), xmm26);
! __ vextractf128h(Address(rsp, 176), xmm27);
! __ vextractf128h(Address(rsp, 192), xmm28);
! __ vextractf128h(Address(rsp, 208), xmm29);
! __ vextractf128h(Address(rsp, 224), xmm30);
! __ vextractf128h(Address(rsp, 240), xmm31);
! // Now handle the ZMM registers (0..31)
! __ subptr(rsp, 1024); // Save upper half of ZMM registes
! __ vextractf64x4h(Address(rsp, 0), xmm0);
! __ vextractf64x4h(Address(rsp, 32), xmm1);
! __ vextractf64x4h(Address(rsp, 64), xmm2);
! __ vextractf64x4h(Address(rsp, 96), xmm3);
! __ vextractf64x4h(Address(rsp, 128), xmm4);
! __ vextractf64x4h(Address(rsp, 160), xmm5);
! __ vextractf64x4h(Address(rsp, 192), xmm6);
! __ vextractf64x4h(Address(rsp, 224), xmm7);
! __ vextractf64x4h(Address(rsp, 256), xmm8);
! __ vextractf64x4h(Address(rsp, 288), xmm9);
! __ vextractf64x4h(Address(rsp, 320), xmm10);
! __ vextractf64x4h(Address(rsp, 352), xmm11);
! __ vextractf64x4h(Address(rsp, 384), xmm12);
! __ vextractf64x4h(Address(rsp, 416), xmm13);
! __ vextractf64x4h(Address(rsp, 448), xmm14);
! __ vextractf64x4h(Address(rsp, 480), xmm15);
! __ vextractf64x4h(Address(rsp, 512), xmm16);
! __ vextractf64x4h(Address(rsp, 544), xmm17);
! __ vextractf64x4h(Address(rsp, 576), xmm18);
! __ vextractf64x4h(Address(rsp, 608), xmm19);
! __ vextractf64x4h(Address(rsp, 640), xmm20);
! __ vextractf64x4h(Address(rsp, 672), xmm21);
! __ vextractf64x4h(Address(rsp, 704), xmm22);
! __ vextractf64x4h(Address(rsp, 736), xmm23);
! __ vextractf64x4h(Address(rsp, 768), xmm24);
! __ vextractf64x4h(Address(rsp, 800), xmm25);
! __ vextractf64x4h(Address(rsp, 832), xmm26);
! __ vextractf64x4h(Address(rsp, 864), xmm27);
! __ vextractf64x4h(Address(rsp, 896), xmm28);
! __ vextractf64x4h(Address(rsp, 928), xmm29);
! __ vextractf64x4h(Address(rsp, 960), xmm30);
! __ vextractf64x4h(Address(rsp, 992), xmm31);
! }
}
if (frame::arg_reg_save_area_bytes != 0) {
// Allocate argument register save area
__ subptr(rsp, frame::arg_reg_save_area_bytes);
}
*** 233,242 ****
--- 310,337 ----
map->set_callee_saved(STACK_OFFSET(xmm11_off), xmm11->as_VMReg());
map->set_callee_saved(STACK_OFFSET(xmm12_off), xmm12->as_VMReg());
map->set_callee_saved(STACK_OFFSET(xmm13_off), xmm13->as_VMReg());
map->set_callee_saved(STACK_OFFSET(xmm14_off), xmm14->as_VMReg());
map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg());
+ if (UseAVX > 2) {
+ map->set_callee_saved(STACK_OFFSET(xmm16_off), xmm16->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm17_off), xmm17->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm18_off), xmm18->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm19_off), xmm19->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm20_off), xmm20->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm21_off), xmm21->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm22_off), xmm22->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm23_off), xmm23->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm24_off), xmm24->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm25_off), xmm25->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm26_off), xmm26->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm27_off), xmm27->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm28_off), xmm28->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm29_off), xmm29->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm30_off), xmm30->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm31_off), xmm31->as_VMReg());
+ }
// %%% These should all be a waste but we'll keep things as they were for now
if (true) {
map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next());
*** 267,276 ****
--- 362,389 ----
map->set_callee_saved(STACK_OFFSET(xmm11H_off), xmm11->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET(xmm12H_off), xmm12->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET(xmm13H_off), xmm13->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET(xmm14H_off), xmm14->as_VMReg()->next());
map->set_callee_saved(STACK_OFFSET(xmm15H_off), xmm15->as_VMReg()->next());
+ if (UseAVX > 2) {
+ map->set_callee_saved(STACK_OFFSET(xmm16H_off), xmm16->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm17H_off), xmm17->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm18H_off), xmm18->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm19H_off), xmm19->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm20H_off), xmm20->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm21H_off), xmm21->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm22H_off), xmm22->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm23H_off), xmm23->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm24H_off), xmm24->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm25H_off), xmm25->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm26H_off), xmm26->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm27H_off), xmm27->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm28H_off), xmm28->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm29H_off), xmm29->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm30H_off), xmm30->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm31H_off), xmm31->as_VMReg());
+ }
}
return map;
}
*** 279,291 ****
// Pop arg register save area
__ addptr(rsp, frame::arg_reg_save_area_bytes);
}
#ifdef COMPILER2
if (restore_vectors) {
! // Restore upper half of YMM registes.
! assert(UseAVX > 0, "256bit vectors are supported only with AVX");
! assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
__ vinsertf128h(xmm0, Address(rsp, 0));
__ vinsertf128h(xmm1, Address(rsp, 16));
__ vinsertf128h(xmm2, Address(rsp, 32));
__ vinsertf128h(xmm3, Address(rsp, 48));
__ vinsertf128h(xmm4, Address(rsp, 64));
--- 392,404 ----
// Pop arg register save area
__ addptr(rsp, frame::arg_reg_save_area_bytes);
}
#ifdef COMPILER2
if (restore_vectors) {
! // Restore upper half of YMM registes (0..15)
! assert(UseAVX > 0, "512bit vectors are supported only with AVX");
! assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
__ vinsertf128h(xmm0, Address(rsp, 0));
__ vinsertf128h(xmm1, Address(rsp, 16));
__ vinsertf128h(xmm2, Address(rsp, 32));
__ vinsertf128h(xmm3, Address(rsp, 48));
__ vinsertf128h(xmm4, Address(rsp, 64));
*** 299,308 ****
--- 412,475 ----
__ vinsertf128h(xmm12, Address(rsp,192));
__ vinsertf128h(xmm13, Address(rsp,208));
__ vinsertf128h(xmm14, Address(rsp,224));
__ vinsertf128h(xmm15, Address(rsp,240));
__ addptr(rsp, 256);
+ if (UseAVX > 2) {
+ // Restore upper half of YMM registes (16..31)
+ __ vinsertf128h(xmm16, Address(rsp, 0));
+ __ vinsertf128h(xmm17, Address(rsp, 16));
+ __ vinsertf128h(xmm18, Address(rsp, 32));
+ __ vinsertf128h(xmm19, Address(rsp, 48));
+ __ vinsertf128h(xmm20, Address(rsp, 64));
+ __ vinsertf128h(xmm21, Address(rsp, 80));
+ __ vinsertf128h(xmm22, Address(rsp, 96));
+ __ vinsertf128h(xmm23, Address(rsp,112));
+ __ vinsertf128h(xmm24, Address(rsp,128));
+ __ vinsertf128h(xmm25, Address(rsp,144));
+ __ vinsertf128h(xmm26, Address(rsp,160));
+ __ vinsertf128h(xmm27, Address(rsp,176));
+ __ vinsertf128h(xmm28, Address(rsp,192));
+ __ vinsertf128h(xmm29, Address(rsp,208));
+ __ vinsertf128h(xmm30, Address(rsp,224));
+ __ vinsertf128h(xmm31, Address(rsp,240));
+ __ addptr(rsp, 256);
+ // Restore upper half of ZMM registes.
+ __ vinsertf64x4h(xmm0, Address(rsp, 0));
+ __ vinsertf64x4h(xmm1, Address(rsp, 32));
+ __ vinsertf64x4h(xmm2, Address(rsp, 64));
+ __ vinsertf64x4h(xmm3, Address(rsp, 96));
+ __ vinsertf64x4h(xmm4, Address(rsp, 128));
+ __ vinsertf64x4h(xmm5, Address(rsp, 160));
+ __ vinsertf64x4h(xmm6, Address(rsp, 192));
+ __ vinsertf64x4h(xmm7, Address(rsp, 224));
+ __ vinsertf64x4h(xmm8, Address(rsp, 256));
+ __ vinsertf64x4h(xmm9, Address(rsp, 288));
+ __ vinsertf64x4h(xmm10, Address(rsp, 320));
+ __ vinsertf64x4h(xmm11, Address(rsp, 352));
+ __ vinsertf64x4h(xmm12, Address(rsp, 384));
+ __ vinsertf64x4h(xmm13, Address(rsp, 416));
+ __ vinsertf64x4h(xmm14, Address(rsp, 448));
+ __ vinsertf64x4h(xmm15, Address(rsp, 480));
+ __ vinsertf64x4h(xmm16, Address(rsp, 512));
+ __ vinsertf64x4h(xmm17, Address(rsp, 544));
+ __ vinsertf64x4h(xmm18, Address(rsp, 576));
+ __ vinsertf64x4h(xmm19, Address(rsp, 608));
+ __ vinsertf64x4h(xmm20, Address(rsp, 640));
+ __ vinsertf64x4h(xmm21, Address(rsp, 672));
+ __ vinsertf64x4h(xmm22, Address(rsp, 704));
+ __ vinsertf64x4h(xmm23, Address(rsp, 736));
+ __ vinsertf64x4h(xmm24, Address(rsp, 768));
+ __ vinsertf64x4h(xmm25, Address(rsp, 800));
+ __ vinsertf64x4h(xmm26, Address(rsp, 832));
+ __ vinsertf64x4h(xmm27, Address(rsp, 864));
+ __ vinsertf64x4h(xmm28, Address(rsp, 896));
+ __ vinsertf64x4h(xmm29, Address(rsp, 928));
+ __ vinsertf64x4h(xmm30, Address(rsp, 960));
+ __ vinsertf64x4h(xmm31, Address(rsp, 992));
+ __ subptr(rsp, 1024);
+ }
}
#else
assert(!restore_vectors, "vectors are generated only by C2");
#endif
// Recover CPU state
< prev index next >