< prev index next >

src/cpu/x86/vm/sharedRuntime_x86_64.cpp

Print this page




  55 
  56   public:
  57 
  58   // Most of the runtime stubs have this simple frame layout.
  59   // This class exists to make the layout shared in one place.
  60   // Offsets are for compiler stack slots, which are jints.
  61   enum layout {
  62     // The frame sender code expects that rbp will be in the "natural" place and
  63     // will override any oopMap setting for it. We must therefore force the layout
  64     // so that it agrees with the frame sender code.
  65     rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
  66     rbp_off2,
  67     return_off, return_off2,
  68     framesize
  69   };
  70 };
  71 
  72 class RegisterSaver {
  73   // Capture info about frame layout.  Layout offsets are in jint
  74   // units because compiler frame slots are jints.
  75 #define HALF_ZMM_BANK_WORDS 128



  76 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off

  77 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off
  78   enum layout {
  79     fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
  80     xmm_off       = fpu_state_off + 160/BytesPerInt,            // offset in fxsave save area
  81     DEF_XMM_OFFS(0),
  82     DEF_XMM_OFFS(1),
  83     DEF_XMM_OFFS(2),
  84     DEF_XMM_OFFS(3),
  85     DEF_XMM_OFFS(4),
  86     DEF_XMM_OFFS(5),
  87     DEF_XMM_OFFS(6),
  88     DEF_XMM_OFFS(7),
  89     DEF_XMM_OFFS(8),
  90     DEF_XMM_OFFS(9),
  91     DEF_XMM_OFFS(10),
  92     DEF_XMM_OFFS(11),
  93     DEF_XMM_OFFS(12),
  94     DEF_XMM_OFFS(13),
  95     DEF_XMM_OFFS(14),
  96     DEF_XMM_OFFS(15),
  97     zmm_off = fpu_state_off + ((FPUStateSizeInWords - (HALF_ZMM_BANK_WORDS + 1))*wordSize / BytesPerInt),
  98     DEF_ZMM_OFFS(16),
  99     DEF_ZMM_OFFS(17),
 100     DEF_ZMM_OFFS(18),
 101     DEF_ZMM_OFFS(19),
 102     DEF_ZMM_OFFS(20),
 103     DEF_ZMM_OFFS(21),
 104     DEF_ZMM_OFFS(22),
 105     DEF_ZMM_OFFS(23),
 106     DEF_ZMM_OFFS(24),
 107     DEF_ZMM_OFFS(25),
 108     DEF_ZMM_OFFS(26),
 109     DEF_ZMM_OFFS(27),
 110     DEF_ZMM_OFFS(28),
 111     DEF_ZMM_OFFS(29),
 112     DEF_ZMM_OFFS(30),
 113     DEF_ZMM_OFFS(31),
 114     fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
 115     fpu_stateH_end,
 116     r15_off, r15H_off,
 117     r14_off, r14H_off,
 118     r13_off, r13H_off,
 119     r12_off, r12H_off,
 120     r11_off, r11H_off,
 121     r10_off, r10H_off,
 122     r9_off,  r9H_off,
 123     r8_off,  r8H_off,
 124     rdi_off, rdiH_off,
 125     rsi_off, rsiH_off,
 126     ignore_off, ignoreH_off,  // extra copy of rbp
 127     rsp_off, rspH_off,
 128     rbx_off, rbxH_off,
 129     rdx_off, rdxH_off,
 130     rcx_off, rcxH_off,
 131     rax_off, raxH_off,
 132     // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state
 133     align_off, alignH_off,


 143  public:
 144   static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
 145   static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
 146 
 147   // Offsets into the register save area
 148   // Used by deoptimization when it is managing result register
 149   // values on its own
 150 
 151   static int rax_offset_in_bytes(void)    { return BytesPerInt * rax_off; }
 152   static int rdx_offset_in_bytes(void)    { return BytesPerInt * rdx_off; }
 153   static int rbx_offset_in_bytes(void)    { return BytesPerInt * rbx_off; }
 154   static int xmm0_offset_in_bytes(void)   { return BytesPerInt * xmm0_off; }
 155   static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
 156 
 157   // During deoptimization only the result registers need to be restored,
 158   // all the other values have already been extracted.
 159   static void restore_result_registers(MacroAssembler* masm);
 160 };
 161 
 162 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
 163   int vect_words = 0;
 164   int ymmhi_offset = -1;
 165   int off = 0;
 166   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
 167   if (UseAVX < 3) {
 168     num_xmm_regs = num_xmm_regs/2;
 169   }
 170 #if defined(COMPILER2) || INCLUDE_JVMCI
 171   if (save_vectors) {
 172     assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
 173     assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
 174     // Save upper half of YMM registers
 175     vect_words = 16 * num_xmm_regs / wordSize;
 176     if (UseAVX < 3) {
 177       ymmhi_offset = additional_frame_words;
 178       additional_frame_words += vect_words;
 179     }
 180   }
 181 #else
 182   assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
 183 #endif
 184 
 185   // Always make the frame size 16-byte aligned
 186   int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
 187                                      reg_save_size*BytesPerInt, num_xmm_regs);
 188   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
 189   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
 190   // The caller will allocate additional_frame_words
 191   int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
 192   // CodeBlob frame size is in words.
 193   int frame_size_in_words = frame_size_in_bytes / wordSize;
 194   *total_frame_words = frame_size_in_words;
 195 
 196   // Save registers, fpu state, and flags.
 197   // We assume caller has already pushed the return address onto the
 198   // stack, so rsp is 8-byte aligned here.
 199   // We push rpb twice in this sequence because we want the real rbp
 200   // to be under the return like a normal enter.
 201 
 202   __ enter();          // rsp becomes 16-byte aligned here
 203   __ push_CPU_state(); // Push a multiple of 16 bytes
 204 
 205   // push cpu state handles this on EVEX enabled targets
 206   if ((vect_words > 0) && (UseAVX < 3)) {
 207     assert(vect_words*wordSize >= 256, "");
 208     // Save upper half of YMM registes(0..num_xmm_regs)
 209     __ subptr(rsp, num_xmm_regs*16);
 210     for (int n = 0; n < num_xmm_regs; n++) {
 211       __ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));






















 212     }
 213   }
 214   if (frame::arg_reg_save_area_bytes != 0) {
 215     // Allocate argument register save area
 216     __ subptr(rsp, frame::arg_reg_save_area_bytes);
 217   }
 218 
 219   // Set an oopmap for the call site.  This oopmap will map all
 220   // oop-registers and debug-info registers as callee-saved.  This
 221   // will allow deoptimization at this safepoint to find all possible
 222   // debug-info recordings, as well as let GC find all oops.
 223 
 224   OopMapSet *oop_maps = new OopMapSet();
 225   OopMap* map = new OopMap(frame_size_in_slots, 0);
 226 
 227 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
 228 #define YMMHI_STACK_OFFSET(x) VMRegImpl::stack2reg((x / VMRegImpl::stack_slot_size) + ymmhi_offset)
 229 
 230   map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
 231   map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
 232   map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
 233   map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
 234   // rbp location is known implicitly by the frame sender code, needs no oopmap
 235   // and the location where rbp was saved by is ignored
 236   map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg());
 237   map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg());
 238   map->set_callee_saved(STACK_OFFSET( r8_off  ), r8->as_VMReg());
 239   map->set_callee_saved(STACK_OFFSET( r9_off  ), r9->as_VMReg());
 240   map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg());
 241   map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg());
 242   map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg());
 243   map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
 244   map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
 245   map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
 246   // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
 247   // on EVEX enabled targets, we get it included in the xsave area
 248   off = xmm0_off;
 249   int delta = xmm1_off - off;
 250   for (int n = 0; n < 16; n++) {
 251     XMMRegister xmm_name = as_XMMRegister(n);
 252     map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
 253     off += delta;
 254   }
 255   if(UseAVX > 2) {
 256     // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
 257     off = zmm16_off;
 258     delta = zmm17_off - off;
 259     for (int n = 16; n < num_xmm_regs; n++) {
 260       XMMRegister xmm_name = as_XMMRegister(n);
 261       map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
 262       off += delta;
 263     }
 264   }
 265 
 266 #if defined(COMPILER2) || INCLUDE_JVMCI
 267   if (save_vectors) {
 268     assert(ymmhi_offset != -1, "save area must exist");
 269     map->set_callee_saved(YMMHI_STACK_OFFSET(  0), xmm0->as_VMReg()->next(4));
 270     map->set_callee_saved(YMMHI_STACK_OFFSET( 16), xmm1->as_VMReg()->next(4));
 271     map->set_callee_saved(YMMHI_STACK_OFFSET( 32), xmm2->as_VMReg()->next(4));
 272     map->set_callee_saved(YMMHI_STACK_OFFSET( 48), xmm3->as_VMReg()->next(4));
 273     map->set_callee_saved(YMMHI_STACK_OFFSET( 64), xmm4->as_VMReg()->next(4));
 274     map->set_callee_saved(YMMHI_STACK_OFFSET( 80), xmm5->as_VMReg()->next(4));
 275     map->set_callee_saved(YMMHI_STACK_OFFSET( 96), xmm6->as_VMReg()->next(4));
 276     map->set_callee_saved(YMMHI_STACK_OFFSET(112), xmm7->as_VMReg()->next(4));
 277     map->set_callee_saved(YMMHI_STACK_OFFSET(128), xmm8->as_VMReg()->next(4));
 278     map->set_callee_saved(YMMHI_STACK_OFFSET(144), xmm9->as_VMReg()->next(4));
 279     map->set_callee_saved(YMMHI_STACK_OFFSET(160), xmm10->as_VMReg()->next(4));
 280     map->set_callee_saved(YMMHI_STACK_OFFSET(176), xmm11->as_VMReg()->next(4));
 281     map->set_callee_saved(YMMHI_STACK_OFFSET(192), xmm12->as_VMReg()->next(4));
 282     map->set_callee_saved(YMMHI_STACK_OFFSET(208), xmm13->as_VMReg()->next(4));
 283     map->set_callee_saved(YMMHI_STACK_OFFSET(224), xmm14->as_VMReg()->next(4));
 284     map->set_callee_saved(YMMHI_STACK_OFFSET(240), xmm15->as_VMReg()->next(4));
 285   }
 286 #endif // COMPILER2 || INCLUDE_JVMCI
 287 
 288   // %%% These should all be a waste but we'll keep things as they were for now
 289   if (true) {
 290     map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next());
 291     map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next());
 292     map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next());
 293     map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next());
 294     // rbp location is known implicitly by the frame sender code, needs no oopmap
 295     map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next());
 296     map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next());
 297     map->set_callee_saved(STACK_OFFSET( r8H_off  ), r8->as_VMReg()->next());
 298     map->set_callee_saved(STACK_OFFSET( r9H_off  ), r9->as_VMReg()->next());
 299     map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next());
 300     map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next());
 301     map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next());
 302     map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
 303     map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
 304     map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
 305     // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
 306     // on EVEX enabled targets, we get it included in the xsave area
 307     off = xmm0H_off;
 308     delta = xmm1H_off - off;
 309     for (int n = 0; n < 16; n++) {
 310       XMMRegister xmm_name = as_XMMRegister(n);
 311       map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
 312       off += delta;
 313     }
 314     if (UseAVX > 2) {
 315       // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
 316       off = zmm16H_off;
 317       delta = zmm17H_off - off;
 318       for (int n = 16; n < num_xmm_regs; n++) {
 319         XMMRegister xmm_name = as_XMMRegister(n);
 320         map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
 321         off += delta;
 322       }
 323     }
 324   }
 325 
 326   return map;
 327 }
 328 
 329 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
 330   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
 331   if (UseAVX < 3) {
 332     num_xmm_regs = num_xmm_regs/2;
 333   }
 334   if (frame::arg_reg_save_area_bytes != 0) {
 335     // Pop arg register save area
 336     __ addptr(rsp, frame::arg_reg_save_area_bytes);
 337   }

 338 #if defined(COMPILER2) || INCLUDE_JVMCI








 339   // On EVEX enabled targets everything is handled in pop fpu state
 340   if ((restore_vectors) && (UseAVX < 3)) {
 341     assert(UseAVX > 0, "256/512-bit vectors are supported only with AVX");
 342     assert(MaxVectorSize == 64, "up to 512bit vectors are supported now");












 343     int off = 0;
 344     // Restore upper half of YMM registes (0..num_xmm_regs)
 345     for (int n = 0; n < num_xmm_regs; n++) {
 346       __ vinsertf128h(as_XMMRegister(n), Address(rsp,  off++*16));
 347     }
 348     __ addptr(rsp, num_xmm_regs*16);
 349   }
 350 #else
 351   assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
 352 #endif








 353   // Recover CPU state
 354   __ pop_CPU_state();
 355   // Get the rbp described implicitly by the calling convention (no oopMap)
 356   __ pop(rbp);
 357 }
 358 
 359 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 360 
 361   // Just restore result register. Only used by deoptimization. By
 362   // now any callee save register that needs to be restored to a c2
 363   // caller of the deoptee has been extracted into the vframeArray
 364   // and will be stuffed into the c2i adapter we create for later
 365   // restoration so only result registers need to be restored here.
 366 
 367   // Restore fp result register
 368   __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes()));
 369   // Restore integer result register
 370   __ movptr(rax, Address(rsp, rax_offset_in_bytes()));
 371   __ movptr(rdx, Address(rsp, rdx_offset_in_bytes()));
 372 




  55 
  56   public:
  57 
  58   // Most of the runtime stubs have this simple frame layout.
  59   // This class exists to make the layout shared in one place.
  60   // Offsets are for compiler stack slots, which are jints.
  61   enum layout {
  62     // The frame sender code expects that rbp will be in the "natural" place and
  63     // will override any oopMap setting for it. We must therefore force the layout
  64     // so that it agrees with the frame sender code.
  65     rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
  66     rbp_off2,
  67     return_off, return_off2,
  68     framesize
  69   };
  70 };
  71 
  72 class RegisterSaver {
  73   // Capture info about frame layout.  Layout offsets are in jint
  74   // units because compiler frame slots are jints.
  75 #define XSAVE_AREA_BEGIN 160
  76 #define XSAVE_AREA_YMM_BEGIN 576
  77 #define XSAVE_AREA_ZMM_BEGIN 1152
  78 #define XSAVE_AREA_UPPERBANK 1664
  79 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
  80 #define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off
  81 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off
  82   enum layout {
  83     fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
  84     xmm_off       = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt,            // offset in fxsave save area
  85     DEF_XMM_OFFS(0),
  86     DEF_XMM_OFFS(1),
  87     // 2..15 are implied in range usage
  88     ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
  89     DEF_YMM_OFFS(0),
  90     DEF_YMM_OFFS(1),
  91     // 2..15 are implied in range usage
  92     zmm_high = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
  93     zmm_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt,








  94     DEF_ZMM_OFFS(16),
  95     DEF_ZMM_OFFS(17),
  96     // 18..31 are implied in range usage













  97     fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
  98     fpu_stateH_end,
  99     r15_off, r15H_off,
 100     r14_off, r14H_off,
 101     r13_off, r13H_off,
 102     r12_off, r12H_off,
 103     r11_off, r11H_off,
 104     r10_off, r10H_off,
 105     r9_off,  r9H_off,
 106     r8_off,  r8H_off,
 107     rdi_off, rdiH_off,
 108     rsi_off, rsiH_off,
 109     ignore_off, ignoreH_off,  // extra copy of rbp
 110     rsp_off, rspH_off,
 111     rbx_off, rbxH_off,
 112     rdx_off, rdxH_off,
 113     rcx_off, rcxH_off,
 114     rax_off, raxH_off,
 115     // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state
 116     align_off, alignH_off,


 126  public:
 127   static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
 128   static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
 129 
 130   // Offsets into the register save area
 131   // Used by deoptimization when it is managing result register
 132   // values on its own
 133 
 134   static int rax_offset_in_bytes(void)    { return BytesPerInt * rax_off; }
 135   static int rdx_offset_in_bytes(void)    { return BytesPerInt * rdx_off; }
 136   static int rbx_offset_in_bytes(void)    { return BytesPerInt * rbx_off; }
 137   static int xmm0_offset_in_bytes(void)   { return BytesPerInt * xmm0_off; }
 138   static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
 139 
 140   // During deoptimization only the result registers need to be restored,
 141   // all the other values have already been extracted.
 142   static void restore_result_registers(MacroAssembler* masm);
 143 };
 144 
 145 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {


 146   int off = 0;
 147   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
 148   if (UseAVX < 3) {
 149     num_xmm_regs = num_xmm_regs/2;
 150   }
 151 #if defined(COMPILER2) || INCLUDE_JVMCI
 152   if (save_vectors) {
 153     assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
 154     assert(MaxVectorSize == 64, "only 512bit vectors are supported now");






 155   }
 156 #else
 157   assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
 158 #endif
 159 
 160   // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
 161   int frame_size_in_bytes = round_to(reg_save_size*BytesPerInt, num_xmm_regs);

 162   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
 163   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;


 164   // CodeBlob frame size is in words.
 165   int frame_size_in_words = frame_size_in_bytes / wordSize;
 166   *total_frame_words = frame_size_in_words;
 167 
 168   // Save registers, fpu state, and flags.
 169   // We assume caller has already pushed the return address onto the
 170   // stack, so rsp is 8-byte aligned here.
 171   // We push rpb twice in this sequence because we want the real rbp
 172   // to be under the return like a normal enter.
 173 
 174   __ enter();          // rsp becomes 16-byte aligned here
 175   __ push_CPU_state(); // Push a multiple of 16 bytes
 176 
 177   // push cpu state handles this on EVEX enabled targets
 178   if (save_vectors) {
 179     // Save upper half of YMM registes(0..15)
 180     int base_addr = XSAVE_AREA_YMM_BEGIN;
 181     for (int n = 0; n < 16; n++) {
 182       __ vextractf128h(Address(rsp, base_addr+n*16), as_XMMRegister(n));
 183     }
 184     if (VM_Version::supports_evex()) {
 185       // Save upper half of ZMM registes(0..15)
 186       base_addr = XSAVE_AREA_ZMM_BEGIN;
 187       for (int n = 0; n < 16; n++) {
 188         __ vextractf64x4h(Address(rsp, base_addr+n*32), as_XMMRegister(n), 1);
 189       }
 190       // Save full ZMM registes(16..num_xmm_regs)
 191       base_addr = XSAVE_AREA_UPPERBANK;
 192       int off = 0;
 193       int vector_len = Assembler::AVX_512bit;
 194       for (int n = 16; n < num_xmm_regs; n++) {
 195         __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
 196       }
 197     }
 198   } else {
 199     if (VM_Version::supports_evex()) {
 200       // Save upper bank of ZMM registers(16..31) for double/float usage
 201       int base_addr = XSAVE_AREA_UPPERBANK;
 202       int off = 0;
 203       for (int n = 16; n < num_xmm_regs; n++) {
 204         __ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n));
 205       }
 206     }
 207   }
 208   if (frame::arg_reg_save_area_bytes != 0) {
 209     // Allocate argument register save area
 210     __ subptr(rsp, frame::arg_reg_save_area_bytes);
 211   }
 212 
 213   // Set an oopmap for the call site.  This oopmap will map all
 214   // oop-registers and debug-info registers as callee-saved.  This
 215   // will allow deoptimization at this safepoint to find all possible
 216   // debug-info recordings, as well as let GC find all oops.
 217 
 218   OopMapSet *oop_maps = new OopMapSet();
 219   OopMap* map = new OopMap(frame_size_in_slots, 0);
 220 
 221 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x))

 222 
 223   map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
 224   map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
 225   map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
 226   map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
 227   // rbp location is known implicitly by the frame sender code, needs no oopmap
 228   // and the location where rbp was saved by is ignored
 229   map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg());
 230   map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg());
 231   map->set_callee_saved(STACK_OFFSET( r8_off  ), r8->as_VMReg());
 232   map->set_callee_saved(STACK_OFFSET( r9_off  ), r9->as_VMReg());
 233   map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg());
 234   map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg());
 235   map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg());
 236   map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
 237   map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
 238   map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
 239   // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
 240   // on EVEX enabled targets, we get it included in the xsave area
 241   off = xmm0_off;
 242   int delta = xmm1_off - off;
 243   for (int n = 0; n < 16; n++) {
 244     XMMRegister xmm_name = as_XMMRegister(n);
 245     map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
 246     off += delta;
 247   }
 248   if(UseAVX > 2) {
 249     // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
 250     off = zmm16_off;
 251     delta = zmm17_off - off;
 252     for (int n = 16; n < num_xmm_regs; n++) {
 253       XMMRegister zmm_name = as_XMMRegister(n);
 254       map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg());
 255       off += delta;
 256     }
 257   }
 258 
 259 #if defined(COMPILER2) || INCLUDE_JVMCI
 260   if (save_vectors) {
 261     off = ymm0_off;
 262     int delta = ymm1_off - off;
 263     for (int n = 0; n < 16; n++) {
 264       XMMRegister ymm_name = as_XMMRegister(n);
 265       map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4));
 266       off += delta;
 267     }










 268   }
 269 #endif // COMPILER2 || INCLUDE_JVMCI
 270 
 271   // %%% These should all be a waste but we'll keep things as they were for now
 272   if (true) {
 273     map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next());
 274     map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next());
 275     map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next());
 276     map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next());
 277     // rbp location is known implicitly by the frame sender code, needs no oopmap
 278     map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next());
 279     map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next());
 280     map->set_callee_saved(STACK_OFFSET( r8H_off  ), r8->as_VMReg()->next());
 281     map->set_callee_saved(STACK_OFFSET( r9H_off  ), r9->as_VMReg()->next());
 282     map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next());
 283     map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next());
 284     map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next());
 285     map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
 286     map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
 287     map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
 288     // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
 289     // on EVEX enabled targets, we get it included in the xsave area
 290     off = xmm0H_off;
 291     delta = xmm1H_off - off;
 292     for (int n = 0; n < 16; n++) {
 293       XMMRegister xmm_name = as_XMMRegister(n);
 294       map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
 295       off += delta;
 296     }
 297     if (UseAVX > 2) {
 298       // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
 299       off = zmm16H_off;
 300       delta = zmm17H_off - off;
 301       for (int n = 16; n < num_xmm_regs; n++) {
 302         XMMRegister zmm_name = as_XMMRegister(n);
 303         map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next());
 304         off += delta;
 305       }
 306     }
 307   }
 308 
 309   return map;
 310 }
 311 
 312 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
 313   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
 314   if (UseAVX < 3) {
 315     num_xmm_regs = num_xmm_regs/2;
 316   }
 317   if (frame::arg_reg_save_area_bytes != 0) {
 318     // Pop arg register save area
 319     __ addptr(rsp, frame::arg_reg_save_area_bytes);
 320   }
 321 
 322 #if defined(COMPILER2) || INCLUDE_JVMCI
 323   if (restore_vectors) {
 324     assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
 325     assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
 326   }
 327 #else
 328   assert(!save_vectors, "vectors are generated only by C2");
 329 #endif
 330 
 331   // On EVEX enabled targets everything is handled in pop fpu state
 332   if (restore_vectors) {
 333     // Restore upper half of YMM registes (0..15)
 334     int base_addr = XSAVE_AREA_YMM_BEGIN;
 335     for (int n = 0; n < 16; n++) {
 336       __ vinsertf128h(as_XMMRegister(n), Address(rsp,  base_addr+n*16));
 337     }
 338     if (VM_Version::supports_evex()) {
 339       // Restore upper half of ZMM registes (0..15)
 340       base_addr = XSAVE_AREA_ZMM_BEGIN;
 341       for (int n = 0; n < 16; n++) {
 342         __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, base_addr+n*32), 1);
 343       }
 344       // Restore full ZMM registes(16..num_xmm_regs)
 345       base_addr = XSAVE_AREA_UPPERBANK;
 346       int vector_len = Assembler::AVX_512bit;
 347       int off = 0;
 348       for (int n = 16; n < num_xmm_regs; n++) {
 349         __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);

 350       }

 351     }
 352   } else {
 353     if (VM_Version::supports_evex()) {
 354       // Restore upper bank of ZMM registes(16..31) for double/float usage
 355       int base_addr = XSAVE_AREA_UPPERBANK;
 356       int off = 0;
 357       for (int n = 16; n < num_xmm_regs; n++) {
 358         __ movsd(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)));
 359       }
 360     }
 361   }
 362 
 363   // Recover CPU state
 364   __ pop_CPU_state();
 365   // Get the rbp described implicitly by the calling convention (no oopMap)
 366   __ pop(rbp);
 367 }
 368 
 369 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 370 
 371   // Just restore result register. Only used by deoptimization. By
 372   // now any callee save register that needs to be restored to a c2
 373   // caller of the deoptee has been extracted into the vframeArray
 374   // and will be stuffed into the c2i adapter we create for later
 375   // restoration so only result registers need to be restored here.
 376 
 377   // Restore fp result register
 378   __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes()));
 379   // Restore integer result register
 380   __ movptr(rax, Address(rsp, rax_offset_in_bytes()));
 381   __ movptr(rdx, Address(rsp, rdx_offset_in_bytes()));
 382 


< prev index next >