55 56 public: 57 58 // Most of the runtime stubs have this simple frame layout. 59 // This class exists to make the layout shared in one place. 60 // Offsets are for compiler stack slots, which are jints. 61 enum layout { 62 // The frame sender code expects that rbp will be in the "natural" place and 63 // will override any oopMap setting for it. We must therefore force the layout 64 // so that it agrees with the frame sender code. 65 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt, 66 rbp_off2, 67 return_off, return_off2, 68 framesize 69 }; 70 }; 71 72 class RegisterSaver { 73 // Capture info about frame layout. Layout offsets are in jint 74 // units because compiler frame slots are jints. 75 #define HALF_ZMM_BANK_WORDS 128 76 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off 77 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off 78 enum layout { 79 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area 80 xmm_off = fpu_state_off + 160/BytesPerInt, // offset in fxsave save area 81 DEF_XMM_OFFS(0), 82 DEF_XMM_OFFS(1), 83 DEF_XMM_OFFS(2), 84 DEF_XMM_OFFS(3), 85 DEF_XMM_OFFS(4), 86 DEF_XMM_OFFS(5), 87 DEF_XMM_OFFS(6), 88 DEF_XMM_OFFS(7), 89 DEF_XMM_OFFS(8), 90 DEF_XMM_OFFS(9), 91 DEF_XMM_OFFS(10), 92 DEF_XMM_OFFS(11), 93 DEF_XMM_OFFS(12), 94 DEF_XMM_OFFS(13), 95 DEF_XMM_OFFS(14), 96 DEF_XMM_OFFS(15), 97 zmm_off = fpu_state_off + ((FPUStateSizeInWords - (HALF_ZMM_BANK_WORDS + 1))*wordSize / BytesPerInt), 98 DEF_ZMM_OFFS(16), 99 DEF_ZMM_OFFS(17), 100 DEF_ZMM_OFFS(18), 101 DEF_ZMM_OFFS(19), 102 DEF_ZMM_OFFS(20), 103 DEF_ZMM_OFFS(21), 104 DEF_ZMM_OFFS(22), 105 DEF_ZMM_OFFS(23), 106 DEF_ZMM_OFFS(24), 107 DEF_ZMM_OFFS(25), 108 DEF_ZMM_OFFS(26), 109 DEF_ZMM_OFFS(27), 110 DEF_ZMM_OFFS(28), 111 DEF_ZMM_OFFS(29), 112 DEF_ZMM_OFFS(30), 113 DEF_ZMM_OFFS(31), 114 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), 115 fpu_stateH_end, 116 r15_off, r15H_off, 117 r14_off, r14H_off, 118 r13_off, r13H_off, 119 r12_off, r12H_off, 120 r11_off, r11H_off, 121 r10_off, r10H_off, 122 r9_off, r9H_off, 123 r8_off, r8H_off, 124 rdi_off, rdiH_off, 125 rsi_off, rsiH_off, 126 ignore_off, ignoreH_off, // extra copy of rbp 127 rsp_off, rspH_off, 128 rbx_off, rbxH_off, 129 rdx_off, rdxH_off, 130 rcx_off, rcxH_off, 131 rax_off, raxH_off, 132 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state 133 align_off, alignH_off, 143 public: 144 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false); 145 static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); 146 147 // Offsets into the register save area 148 // Used by deoptimization when it is managing result register 149 // values on its own 150 151 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; } 152 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; } 153 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; } 154 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; } 155 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; } 156 157 // During deoptimization only the result registers need to be restored, 158 // all the other values have already been extracted. 159 static void restore_result_registers(MacroAssembler* masm); 160 }; 161 162 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { 163 int vect_words = 0; 164 int ymmhi_offset = -1; 165 int off = 0; 166 int num_xmm_regs = XMMRegisterImpl::number_of_registers; 167 if (UseAVX < 3) { 168 num_xmm_regs = num_xmm_regs/2; 169 } 170 #if defined(COMPILER2) || INCLUDE_JVMCI 171 if (save_vectors) { 172 assert(UseAVX > 0, "512bit vectors are supported only with EVEX"); 173 assert(MaxVectorSize == 64, "only 512bit vectors are supported now"); 174 // Save upper half of YMM registers 175 vect_words = 16 * num_xmm_regs / wordSize; 176 if (UseAVX < 3) { 177 ymmhi_offset = additional_frame_words; 178 additional_frame_words += vect_words; 179 } 180 } 181 #else 182 assert(!save_vectors, "vectors are generated only by C2 and JVMCI"); 183 #endif 184 185 // Always make the frame size 16-byte aligned 186 int frame_size_in_bytes = round_to(additional_frame_words*wordSize + 187 reg_save_size*BytesPerInt, num_xmm_regs); 188 // OopMap frame size is in compiler stack slots (jint's) not bytes or words 189 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; 190 // The caller will allocate additional_frame_words 191 int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; 192 // CodeBlob frame size is in words. 193 int frame_size_in_words = frame_size_in_bytes / wordSize; 194 *total_frame_words = frame_size_in_words; 195 196 // Save registers, fpu state, and flags. 197 // We assume caller has already pushed the return address onto the 198 // stack, so rsp is 8-byte aligned here. 199 // We push rpb twice in this sequence because we want the real rbp 200 // to be under the return like a normal enter. 201 202 __ enter(); // rsp becomes 16-byte aligned here 203 __ push_CPU_state(); // Push a multiple of 16 bytes 204 205 // push cpu state handles this on EVEX enabled targets 206 if ((vect_words > 0) && (UseAVX < 3)) { 207 assert(vect_words*wordSize >= 256, ""); 208 // Save upper half of YMM registes(0..num_xmm_regs) 209 __ subptr(rsp, num_xmm_regs*16); 210 for (int n = 0; n < num_xmm_regs; n++) { 211 __ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n)); 212 } 213 } 214 if (frame::arg_reg_save_area_bytes != 0) { 215 // Allocate argument register save area 216 __ subptr(rsp, frame::arg_reg_save_area_bytes); 217 } 218 219 // Set an oopmap for the call site. This oopmap will map all 220 // oop-registers and debug-info registers as callee-saved. This 221 // will allow deoptimization at this safepoint to find all possible 222 // debug-info recordings, as well as let GC find all oops. 223 224 OopMapSet *oop_maps = new OopMapSet(); 225 OopMap* map = new OopMap(frame_size_in_slots, 0); 226 227 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) 228 #define YMMHI_STACK_OFFSET(x) VMRegImpl::stack2reg((x / VMRegImpl::stack_slot_size) + ymmhi_offset) 229 230 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); 231 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); 232 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg()); 233 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg()); 234 // rbp location is known implicitly by the frame sender code, needs no oopmap 235 // and the location where rbp was saved by is ignored 236 map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg()); 237 map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg()); 238 map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg()); 239 map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg()); 240 map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg()); 241 map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg()); 242 map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg()); 243 map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg()); 244 map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg()); 245 map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg()); 246 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 247 // on EVEX enabled targets, we get it included in the xsave area 248 off = xmm0_off; 249 int delta = xmm1_off - off; 250 for (int n = 0; n < 16; n++) { 251 XMMRegister xmm_name = as_XMMRegister(n); 252 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()); 253 off += delta; 254 } 255 if(UseAVX > 2) { 256 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 257 off = zmm16_off; 258 delta = zmm17_off - off; 259 for (int n = 16; n < num_xmm_regs; n++) { 260 XMMRegister xmm_name = as_XMMRegister(n); 261 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()); 262 off += delta; 263 } 264 } 265 266 #if defined(COMPILER2) || INCLUDE_JVMCI 267 if (save_vectors) { 268 assert(ymmhi_offset != -1, "save area must exist"); 269 map->set_callee_saved(YMMHI_STACK_OFFSET( 0), xmm0->as_VMReg()->next(4)); 270 map->set_callee_saved(YMMHI_STACK_OFFSET( 16), xmm1->as_VMReg()->next(4)); 271 map->set_callee_saved(YMMHI_STACK_OFFSET( 32), xmm2->as_VMReg()->next(4)); 272 map->set_callee_saved(YMMHI_STACK_OFFSET( 48), xmm3->as_VMReg()->next(4)); 273 map->set_callee_saved(YMMHI_STACK_OFFSET( 64), xmm4->as_VMReg()->next(4)); 274 map->set_callee_saved(YMMHI_STACK_OFFSET( 80), xmm5->as_VMReg()->next(4)); 275 map->set_callee_saved(YMMHI_STACK_OFFSET( 96), xmm6->as_VMReg()->next(4)); 276 map->set_callee_saved(YMMHI_STACK_OFFSET(112), xmm7->as_VMReg()->next(4)); 277 map->set_callee_saved(YMMHI_STACK_OFFSET(128), xmm8->as_VMReg()->next(4)); 278 map->set_callee_saved(YMMHI_STACK_OFFSET(144), xmm9->as_VMReg()->next(4)); 279 map->set_callee_saved(YMMHI_STACK_OFFSET(160), xmm10->as_VMReg()->next(4)); 280 map->set_callee_saved(YMMHI_STACK_OFFSET(176), xmm11->as_VMReg()->next(4)); 281 map->set_callee_saved(YMMHI_STACK_OFFSET(192), xmm12->as_VMReg()->next(4)); 282 map->set_callee_saved(YMMHI_STACK_OFFSET(208), xmm13->as_VMReg()->next(4)); 283 map->set_callee_saved(YMMHI_STACK_OFFSET(224), xmm14->as_VMReg()->next(4)); 284 map->set_callee_saved(YMMHI_STACK_OFFSET(240), xmm15->as_VMReg()->next(4)); 285 } 286 #endif // COMPILER2 || INCLUDE_JVMCI 287 288 // %%% These should all be a waste but we'll keep things as they were for now 289 if (true) { 290 map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next()); 291 map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next()); 292 map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next()); 293 map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next()); 294 // rbp location is known implicitly by the frame sender code, needs no oopmap 295 map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next()); 296 map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next()); 297 map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next()); 298 map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next()); 299 map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next()); 300 map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next()); 301 map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next()); 302 map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next()); 303 map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next()); 304 map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next()); 305 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 306 // on EVEX enabled targets, we get it included in the xsave area 307 off = xmm0H_off; 308 delta = xmm1H_off - off; 309 for (int n = 0; n < 16; n++) { 310 XMMRegister xmm_name = as_XMMRegister(n); 311 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next()); 312 off += delta; 313 } 314 if (UseAVX > 2) { 315 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 316 off = zmm16H_off; 317 delta = zmm17H_off - off; 318 for (int n = 16; n < num_xmm_regs; n++) { 319 XMMRegister xmm_name = as_XMMRegister(n); 320 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next()); 321 off += delta; 322 } 323 } 324 } 325 326 return map; 327 } 328 329 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { 330 int num_xmm_regs = XMMRegisterImpl::number_of_registers; 331 if (UseAVX < 3) { 332 num_xmm_regs = num_xmm_regs/2; 333 } 334 if (frame::arg_reg_save_area_bytes != 0) { 335 // Pop arg register save area 336 __ addptr(rsp, frame::arg_reg_save_area_bytes); 337 } 338 #if defined(COMPILER2) || INCLUDE_JVMCI 339 // On EVEX enabled targets everything is handled in pop fpu state 340 if ((restore_vectors) && (UseAVX < 3)) { 341 assert(UseAVX > 0, "256/512-bit vectors are supported only with AVX"); 342 assert(MaxVectorSize == 64, "up to 512bit vectors are supported now"); 343 int off = 0; 344 // Restore upper half of YMM registes (0..num_xmm_regs) 345 for (int n = 0; n < num_xmm_regs; n++) { 346 __ vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16)); 347 } 348 __ addptr(rsp, num_xmm_regs*16); 349 } 350 #else 351 assert(!restore_vectors, "vectors are generated only by C2 and JVMCI"); 352 #endif 353 // Recover CPU state 354 __ pop_CPU_state(); 355 // Get the rbp described implicitly by the calling convention (no oopMap) 356 __ pop(rbp); 357 } 358 359 void RegisterSaver::restore_result_registers(MacroAssembler* masm) { 360 361 // Just restore result register. Only used by deoptimization. By 362 // now any callee save register that needs to be restored to a c2 363 // caller of the deoptee has been extracted into the vframeArray 364 // and will be stuffed into the c2i adapter we create for later 365 // restoration so only result registers need to be restored here. 366 367 // Restore fp result register 368 __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes())); 369 // Restore integer result register 370 __ movptr(rax, Address(rsp, rax_offset_in_bytes())); 371 __ movptr(rdx, Address(rsp, rdx_offset_in_bytes())); 372 | 55 56 public: 57 58 // Most of the runtime stubs have this simple frame layout. 59 // This class exists to make the layout shared in one place. 60 // Offsets are for compiler stack slots, which are jints. 61 enum layout { 62 // The frame sender code expects that rbp will be in the "natural" place and 63 // will override any oopMap setting for it. We must therefore force the layout 64 // so that it agrees with the frame sender code. 65 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt, 66 rbp_off2, 67 return_off, return_off2, 68 framesize 69 }; 70 }; 71 72 class RegisterSaver { 73 // Capture info about frame layout. Layout offsets are in jint 74 // units because compiler frame slots are jints. 75 #define XSAVE_AREA_BEGIN 160 76 #define XSAVE_AREA_YMM_BEGIN 576 77 #define XSAVE_AREA_ZMM_BEGIN 1152 78 #define XSAVE_AREA_UPPERBANK 1664 79 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off 80 #define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off 81 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off 82 enum layout { 83 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area 84 xmm_off = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt, // offset in fxsave save area 85 DEF_XMM_OFFS(0), 86 DEF_XMM_OFFS(1), 87 // 2..15 are implied in range usage 88 ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 89 DEF_YMM_OFFS(0), 90 DEF_YMM_OFFS(1), 91 // 2..15 are implied in range usage 92 zmm_high = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 93 zmm_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt, 94 DEF_ZMM_OFFS(16), 95 DEF_ZMM_OFFS(17), 96 // 18..31 are implied in range usage 97 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), 98 fpu_stateH_end, 99 r15_off, r15H_off, 100 r14_off, r14H_off, 101 r13_off, r13H_off, 102 r12_off, r12H_off, 103 r11_off, r11H_off, 104 r10_off, r10H_off, 105 r9_off, r9H_off, 106 r8_off, r8H_off, 107 rdi_off, rdiH_off, 108 rsi_off, rsiH_off, 109 ignore_off, ignoreH_off, // extra copy of rbp 110 rsp_off, rspH_off, 111 rbx_off, rbxH_off, 112 rdx_off, rdxH_off, 113 rcx_off, rcxH_off, 114 rax_off, raxH_off, 115 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state 116 align_off, alignH_off, 126 public: 127 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false); 128 static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); 129 130 // Offsets into the register save area 131 // Used by deoptimization when it is managing result register 132 // values on its own 133 134 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; } 135 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; } 136 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; } 137 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; } 138 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; } 139 140 // During deoptimization only the result registers need to be restored, 141 // all the other values have already been extracted. 142 static void restore_result_registers(MacroAssembler* masm); 143 }; 144 145 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { 146 int off = 0; 147 int num_xmm_regs = XMMRegisterImpl::number_of_registers; 148 if (UseAVX < 3) { 149 num_xmm_regs = num_xmm_regs/2; 150 } 151 #if defined(COMPILER2) || INCLUDE_JVMCI 152 if (save_vectors) { 153 assert(UseAVX > 0, "512bit vectors are supported only with EVEX"); 154 assert(MaxVectorSize == 64, "only 512bit vectors are supported now"); 155 } 156 #else 157 assert(!save_vectors, "vectors are generated only by C2 and JVMCI"); 158 #endif 159 160 // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated 161 int frame_size_in_bytes = round_to(reg_save_size*BytesPerInt, num_xmm_regs); 162 // OopMap frame size is in compiler stack slots (jint's) not bytes or words 163 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; 164 // CodeBlob frame size is in words. 165 int frame_size_in_words = frame_size_in_bytes / wordSize; 166 *total_frame_words = frame_size_in_words; 167 168 // Save registers, fpu state, and flags. 169 // We assume caller has already pushed the return address onto the 170 // stack, so rsp is 8-byte aligned here. 171 // We push rpb twice in this sequence because we want the real rbp 172 // to be under the return like a normal enter. 173 174 __ enter(); // rsp becomes 16-byte aligned here 175 __ push_CPU_state(); // Push a multiple of 16 bytes 176 177 // push cpu state handles this on EVEX enabled targets 178 if (save_vectors) { 179 // Save upper half of YMM registes(0..15) 180 int base_addr = XSAVE_AREA_YMM_BEGIN; 181 for (int n = 0; n < 16; n++) { 182 __ vextractf128h(Address(rsp, base_addr+n*16), as_XMMRegister(n)); 183 } 184 if (VM_Version::supports_evex()) { 185 // Save upper half of ZMM registes(0..15) 186 base_addr = XSAVE_AREA_ZMM_BEGIN; 187 for (int n = 0; n < 16; n++) { 188 __ vextractf64x4h(Address(rsp, base_addr+n*32), as_XMMRegister(n), 1); 189 } 190 // Save full ZMM registes(16..num_xmm_regs) 191 base_addr = XSAVE_AREA_UPPERBANK; 192 int off = 0; 193 int vector_len = Assembler::AVX_512bit; 194 for (int n = 16; n < num_xmm_regs; n++) { 195 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len); 196 } 197 } 198 } else { 199 if (VM_Version::supports_evex()) { 200 // Save upper bank of ZMM registers(16..31) for double/float usage 201 int base_addr = XSAVE_AREA_UPPERBANK; 202 int off = 0; 203 for (int n = 16; n < num_xmm_regs; n++) { 204 __ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n)); 205 } 206 } 207 } 208 if (frame::arg_reg_save_area_bytes != 0) { 209 // Allocate argument register save area 210 __ subptr(rsp, frame::arg_reg_save_area_bytes); 211 } 212 213 // Set an oopmap for the call site. This oopmap will map all 214 // oop-registers and debug-info registers as callee-saved. This 215 // will allow deoptimization at this safepoint to find all possible 216 // debug-info recordings, as well as let GC find all oops. 217 218 OopMapSet *oop_maps = new OopMapSet(); 219 OopMap* map = new OopMap(frame_size_in_slots, 0); 220 221 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x)) 222 223 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); 224 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); 225 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg()); 226 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg()); 227 // rbp location is known implicitly by the frame sender code, needs no oopmap 228 // and the location where rbp was saved by is ignored 229 map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg()); 230 map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg()); 231 map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg()); 232 map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg()); 233 map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg()); 234 map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg()); 235 map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg()); 236 map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg()); 237 map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg()); 238 map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg()); 239 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 240 // on EVEX enabled targets, we get it included in the xsave area 241 off = xmm0_off; 242 int delta = xmm1_off - off; 243 for (int n = 0; n < 16; n++) { 244 XMMRegister xmm_name = as_XMMRegister(n); 245 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()); 246 off += delta; 247 } 248 if(UseAVX > 2) { 249 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 250 off = zmm16_off; 251 delta = zmm17_off - off; 252 for (int n = 16; n < num_xmm_regs; n++) { 253 XMMRegister zmm_name = as_XMMRegister(n); 254 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()); 255 off += delta; 256 } 257 } 258 259 #if defined(COMPILER2) || INCLUDE_JVMCI 260 if (save_vectors) { 261 off = ymm0_off; 262 int delta = ymm1_off - off; 263 for (int n = 0; n < 16; n++) { 264 XMMRegister ymm_name = as_XMMRegister(n); 265 map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4)); 266 off += delta; 267 } 268 } 269 #endif // COMPILER2 || INCLUDE_JVMCI 270 271 // %%% These should all be a waste but we'll keep things as they were for now 272 if (true) { 273 map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next()); 274 map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next()); 275 map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next()); 276 map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next()); 277 // rbp location is known implicitly by the frame sender code, needs no oopmap 278 map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next()); 279 map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next()); 280 map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next()); 281 map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next()); 282 map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next()); 283 map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next()); 284 map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next()); 285 map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next()); 286 map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next()); 287 map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next()); 288 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 289 // on EVEX enabled targets, we get it included in the xsave area 290 off = xmm0H_off; 291 delta = xmm1H_off - off; 292 for (int n = 0; n < 16; n++) { 293 XMMRegister xmm_name = as_XMMRegister(n); 294 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next()); 295 off += delta; 296 } 297 if (UseAVX > 2) { 298 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 299 off = zmm16H_off; 300 delta = zmm17H_off - off; 301 for (int n = 16; n < num_xmm_regs; n++) { 302 XMMRegister zmm_name = as_XMMRegister(n); 303 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next()); 304 off += delta; 305 } 306 } 307 } 308 309 return map; 310 } 311 312 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { 313 int num_xmm_regs = XMMRegisterImpl::number_of_registers; 314 if (UseAVX < 3) { 315 num_xmm_regs = num_xmm_regs/2; 316 } 317 if (frame::arg_reg_save_area_bytes != 0) { 318 // Pop arg register save area 319 __ addptr(rsp, frame::arg_reg_save_area_bytes); 320 } 321 322 #if defined(COMPILER2) || INCLUDE_JVMCI 323 if (restore_vectors) { 324 assert(UseAVX > 0, "512bit vectors are supported only with EVEX"); 325 assert(MaxVectorSize == 64, "only 512bit vectors are supported now"); 326 } 327 #else 328 assert(!save_vectors, "vectors are generated only by C2"); 329 #endif 330 331 // On EVEX enabled targets everything is handled in pop fpu state 332 if (restore_vectors) { 333 // Restore upper half of YMM registes (0..15) 334 int base_addr = XSAVE_AREA_YMM_BEGIN; 335 for (int n = 0; n < 16; n++) { 336 __ vinsertf128h(as_XMMRegister(n), Address(rsp, base_addr+n*16)); 337 } 338 if (VM_Version::supports_evex()) { 339 // Restore upper half of ZMM registes (0..15) 340 base_addr = XSAVE_AREA_ZMM_BEGIN; 341 for (int n = 0; n < 16; n++) { 342 __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, base_addr+n*32), 1); 343 } 344 // Restore full ZMM registes(16..num_xmm_regs) 345 base_addr = XSAVE_AREA_UPPERBANK; 346 int vector_len = Assembler::AVX_512bit; 347 int off = 0; 348 for (int n = 16; n < num_xmm_regs; n++) { 349 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len); 350 } 351 } 352 } else { 353 if (VM_Version::supports_evex()) { 354 // Restore upper bank of ZMM registes(16..31) for double/float usage 355 int base_addr = XSAVE_AREA_UPPERBANK; 356 int off = 0; 357 for (int n = 16; n < num_xmm_regs; n++) { 358 __ movsd(as_XMMRegister(n), Address(rsp, base_addr+(off++*64))); 359 } 360 } 361 } 362 363 // Recover CPU state 364 __ pop_CPU_state(); 365 // Get the rbp described implicitly by the calling convention (no oopMap) 366 __ pop(rbp); 367 } 368 369 void RegisterSaver::restore_result_registers(MacroAssembler* masm) { 370 371 // Just restore result register. Only used by deoptimization. By 372 // now any callee save register that needs to be restored to a c2 373 // caller of the deoptee has been extracted into the vframeArray 374 // and will be stuffed into the c2i adapter we create for later 375 // restoration so only result registers need to be restored here. 376 377 // Restore fp result register 378 __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes())); 379 // Restore integer result register 380 __ movptr(rax, Address(rsp, rax_offset_in_bytes())); 381 __ movptr(rdx, Address(rsp, rdx_offset_in_bytes())); 382 |