< prev index next >

src/cpu/x86/vm/sharedRuntime_x86_64.cpp

Print this page




  30 #include "asm/macroAssembler.inline.hpp"
  31 #include "code/debugInfoRec.hpp"
  32 #include "code/icBuffer.hpp"
  33 #include "code/vtableStubs.hpp"
  34 #include "interpreter/interpreter.hpp"
  35 #include "logging/log.hpp"
  36 #include "memory/resourceArea.hpp"
  37 #include "oops/compiledICHolder.hpp"
  38 #include "runtime/sharedRuntime.hpp"
  39 #include "runtime/vframeArray.hpp"
  40 #include "vmreg_x86.inline.hpp"
  41 #ifdef COMPILER1
  42 #include "c1/c1_Runtime1.hpp"
  43 #endif
  44 #ifdef COMPILER2
  45 #include "opto/runtime.hpp"
  46 #endif
  47 #if INCLUDE_JVMCI
  48 #include "jvmci/jvmciJavaClasses.hpp"
  49 #endif

  50 
  51 #define __ masm->
  52 
  53 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
  54 
  55 class SimpleRuntimeFrame {
  56 
  57   public:
  58 
  59   // Most of the runtime stubs have this simple frame layout.
  60   // This class exists to make the layout shared in one place.
  61   // Offsets are for compiler stack slots, which are jints.
  62   enum layout {
  63     // The frame sender code expects that rbp will be in the "natural" place and
  64     // will override any oopMap setting for it. We must therefore force the layout
  65     // so that it agrees with the frame sender code.
  66     rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
  67     rbp_off2,
  68     return_off, return_off2,
  69     framesize


 134 
 135   static int rax_offset_in_bytes(void)    { return BytesPerInt * rax_off; }
 136   static int rdx_offset_in_bytes(void)    { return BytesPerInt * rdx_off; }
 137   static int rbx_offset_in_bytes(void)    { return BytesPerInt * rbx_off; }
 138   static int xmm0_offset_in_bytes(void)   { return BytesPerInt * xmm0_off; }
 139   static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
 140 
 141   // During deoptimization only the result registers need to be restored,
 142   // all the other values have already been extracted.
 143   static void restore_result_registers(MacroAssembler* masm);
 144 };
 145 
 146 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
 147   int off = 0;
 148   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
 149   if (UseAVX < 3) {
 150     num_xmm_regs = num_xmm_regs/2;
 151   }
 152 #if defined(COMPILER2) || INCLUDE_JVMCI
 153   if (save_vectors) {
 154     assert(UseAVX > 0, "up to 512bit vectors are supported with EVEX");
 155     assert(MaxVectorSize <= 64, "up to 512bit vectors are supported now");
 156   }
 157 #else
 158   assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
 159 #endif
 160 
 161   // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
 162   int frame_size_in_bytes = round_to(reg_save_size*BytesPerInt, num_xmm_regs);
 163   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
 164   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
 165   // CodeBlob frame size is in words.
 166   int frame_size_in_words = frame_size_in_bytes / wordSize;
 167   *total_frame_words = frame_size_in_words;
 168 
 169   // Save registers, fpu state, and flags.
 170   // We assume caller has already pushed the return address onto the
 171   // stack, so rsp is 8-byte aligned here.
 172   // We push rpb twice in this sequence because we want the real rbp
 173   // to be under the return like a normal enter.
 174 
 175   __ enter();          // rsp becomes 16-byte aligned here


 189         __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n));
 190       }
 191       // Save full ZMM registers(16..num_xmm_regs)
 192       base_addr = XSAVE_AREA_UPPERBANK;
 193       off = 0;
 194       int vector_len = Assembler::AVX_512bit;
 195       for (int n = 16; n < num_xmm_regs; n++) {
 196         __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
 197       }
 198     }
 199   } else {
 200     if (VM_Version::supports_evex()) {
 201       // Save upper bank of ZMM registers(16..31) for double/float usage
 202       int base_addr = XSAVE_AREA_UPPERBANK;
 203       off = 0;
 204       for (int n = 16; n < num_xmm_regs; n++) {
 205         __ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n));
 206       }
 207     }
 208   }

 209   if (frame::arg_reg_save_area_bytes != 0) {
 210     // Allocate argument register save area
 211     __ subptr(rsp, frame::arg_reg_save_area_bytes);
 212   }
 213 
 214   // Set an oopmap for the call site.  This oopmap will map all
 215   // oop-registers and debug-info registers as callee-saved.  This
 216   // will allow deoptimization at this safepoint to find all possible
 217   // debug-info recordings, as well as let GC find all oops.
 218 
 219   OopMapSet *oop_maps = new OopMapSet();
 220   OopMap* map = new OopMap(frame_size_in_slots, 0);
 221 
 222 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x))
 223 
 224   map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
 225   map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
 226   map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
 227   map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
 228   // rbp location is known implicitly by the frame sender code, needs no oopmap


 305         off += delta;
 306       }
 307     }
 308   }
 309 
 310   return map;
 311 }
 312 
 313 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
 314   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
 315   if (UseAVX < 3) {
 316     num_xmm_regs = num_xmm_regs/2;
 317   }
 318   if (frame::arg_reg_save_area_bytes != 0) {
 319     // Pop arg register save area
 320     __ addptr(rsp, frame::arg_reg_save_area_bytes);
 321   }
 322 
 323 #if defined(COMPILER2) || INCLUDE_JVMCI
 324   if (restore_vectors) {
 325     assert(UseAVX > 0, "up to 512bit vectors are supported with EVEX");
 326     assert(MaxVectorSize <= 64, "up to 512bit vectors are supported now");
 327   }
 328 #else
 329   assert(!restore_vectors, "vectors are generated only by C2");
 330 #endif
 331 


 332   // On EVEX enabled targets everything is handled in pop fpu state
 333   if (restore_vectors) {
 334     // Restore upper half of YMM registers (0..15)
 335     int base_addr = XSAVE_AREA_YMM_BEGIN;
 336     for (int n = 0; n < 16; n++) {
 337       __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16));
 338     }
 339     if (VM_Version::supports_evex()) {
 340       // Restore upper half of ZMM registers (0..15)
 341       base_addr = XSAVE_AREA_ZMM_BEGIN;
 342       for (int n = 0; n < 16; n++) {
 343         __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32));
 344       }
 345       // Restore full ZMM registers(16..num_xmm_regs)
 346       base_addr = XSAVE_AREA_UPPERBANK;
 347       int vector_len = Assembler::AVX_512bit;
 348       int off = 0;
 349       for (int n = 16; n < num_xmm_regs; n++) {
 350         __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
 351       }


 511 
 512   return round_to(stk_args, 2);
 513 }
 514 
 515 // Patch the callers callsite with entry to compiled code if it exists.
 516 static void patch_callers_callsite(MacroAssembler *masm) {
 517   Label L;
 518   __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
 519   __ jcc(Assembler::equal, L);
 520 
 521   // Save the current stack pointer
 522   __ mov(r13, rsp);
 523   // Schedule the branch target address early.
 524   // Call into the VM to patch the caller, then jump to compiled callee
 525   // rax isn't live so capture return address while we easily can
 526   __ movptr(rax, Address(rsp, 0));
 527 
 528   // align stack so push_CPU_state doesn't fault
 529   __ andptr(rsp, -(StackAlignmentInBytes));
 530   __ push_CPU_state();
 531 
 532   // VM needs caller's callsite
 533   // VM needs target method
 534   // This needs to be a long call since we will relocate this adapter to
 535   // the codeBuffer and it may not reach
 536 
 537   // Allocate argument register save area
 538   if (frame::arg_reg_save_area_bytes != 0) {
 539     __ subptr(rsp, frame::arg_reg_save_area_bytes);
 540   }
 541   __ mov(c_rarg0, rbx);
 542   __ mov(c_rarg1, rax);
 543   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
 544 
 545   // De-allocate argument register save area
 546   if (frame::arg_reg_save_area_bytes != 0) {
 547     __ addptr(rsp, frame::arg_reg_save_area_bytes);
 548   }
 549 

 550   __ pop_CPU_state();
 551   // restore sp
 552   __ mov(rsp, r13);
 553   __ bind(L);
 554 }
 555 
 556 
 557 static void gen_c2i_adapter(MacroAssembler *masm,
 558                             int total_args_passed,
 559                             int comp_args_on_stack,
 560                             const BasicType *sig_bt,
 561                             const VMRegPair *regs,
 562                             Label& skip_fixup) {
 563   // Before we get into the guts of the C2I adapter, see if we should be here
 564   // at all.  We've come from compiled code and are attempting to jump to the
 565   // interpreter, which means the caller made a static call to get here
 566   // (vcalls always get a compiled target if there is one).  Check for a
 567   // compiled target.  If there is one, we need to patch the caller's call.
 568   patch_callers_callsite(masm);
 569 


1448   save_or_restore_arguments(masm, stack_slots, total_in_args,
1449                             arg_save_area, map, in_regs, in_sig_bt);
1450 
1451   address the_pc = __ pc();
1452   oop_maps->add_gc_map( __ offset(), map);
1453   __ set_last_Java_frame(rsp, noreg, the_pc);
1454 
1455   __ block_comment("block_for_jni_critical");
1456   __ movptr(c_rarg0, r15_thread);
1457   __ mov(r12, rsp); // remember sp
1458   __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
1459   __ andptr(rsp, -16); // align stack as required by ABI
1460   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical)));
1461   __ mov(rsp, r12); // restore sp
1462   __ reinit_heapbase();
1463 
1464   __ reset_last_Java_frame(false);
1465 
1466   save_or_restore_arguments(masm, stack_slots, total_in_args,
1467                             arg_save_area, NULL, in_regs, in_sig_bt);
1468 
1469   __ bind(cont);
1470 #ifdef ASSERT
1471   if (StressCriticalJNINatives) {
1472     // Stress register saving
1473     OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1474     save_or_restore_arguments(masm, stack_slots, total_in_args,
1475                               arg_save_area, map, in_regs, in_sig_bt);
1476     // Destroy argument registers
1477     for (int i = 0; i < total_in_args - 1; i++) {
1478       if (in_regs[i].first()->is_Register()) {
1479         const Register reg = in_regs[i].first()->as_Register();
1480         __ xorptr(reg, reg);
1481       } else if (in_regs[i].first()->is_XMMRegister()) {
1482         __ xorpd(in_regs[i].first()->as_XMMRegister(), in_regs[i].first()->as_XMMRegister());
1483       } else if (in_regs[i].first()->is_FloatRegister()) {
1484         ShouldNotReachHere();
1485       } else if (in_regs[i].first()->is_stack()) {
1486         // Nothing to do
1487       } else {
1488         ShouldNotReachHere();


2468 
2469   // check for safepoint operation in progress and/or pending suspend requests
2470   {
2471     Label Continue;
2472 
2473     __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
2474              SafepointSynchronize::_not_synchronized);
2475 
2476     Label L;
2477     __ jcc(Assembler::notEqual, L);
2478     __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
2479     __ jcc(Assembler::equal, Continue);
2480     __ bind(L);
2481 
2482     // Don't use call_VM as it will see a possible pending exception and forward it
2483     // and never return here preventing us from clearing _last_native_pc down below.
2484     // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
2485     // preserved and correspond to the bcp/locals pointers. So we do a runtime call
2486     // by hand.
2487     //

2488     save_native_result(masm, ret_type, stack_slots);
2489     __ mov(c_rarg0, r15_thread);
2490     __ mov(r12, rsp); // remember sp
2491     __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2492     __ andptr(rsp, -16); // align stack as required by ABI
2493     if (!is_critical_native) {
2494       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
2495     } else {
2496       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
2497     }
2498     __ mov(rsp, r12); // restore sp
2499     __ reinit_heapbase();
2500     // Restore any method result value
2501     restore_native_result(masm, ret_type, stack_slots);
2502 
2503     if (is_critical_native) {
2504       // The call above performed the transition to thread_in_Java so
2505       // skip the transition logic below.
2506       __ jmpb(after_transition);
2507     }


2644     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
2645     restore_args(masm, total_c_args, c_arg, out_regs);
2646 
2647 #ifdef ASSERT
2648     { Label L;
2649     __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
2650     __ jcc(Assembler::equal, L);
2651     __ stop("no pending exception allowed on exit from monitorenter");
2652     __ bind(L);
2653     }
2654 #endif
2655     __ jmp(lock_done);
2656 
2657     // END Slow path lock
2658 
2659     // BEGIN Slow path unlock
2660     __ bind(slow_path_unlock);
2661 
2662     // If we haven't already saved the native result we must save it now as xmm registers
2663     // are still exposed.
2664 
2665     if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2666       save_native_result(masm, ret_type, stack_slots);
2667     }
2668 
2669     __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
2670 
2671     __ mov(c_rarg0, obj_reg);
2672     __ mov(c_rarg2, r15_thread);
2673     __ mov(r12, rsp); // remember sp
2674     __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2675     __ andptr(rsp, -16); // align stack as required by ABI
2676 
2677     // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
2678     // NOTE that obj_reg == rbx currently
2679     __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset())));
2680     __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
2681 
2682     // args are (oop obj, BasicLock* lock, JavaThread* thread)
2683     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
2684     __ mov(rsp, r12); // restore sp


2690       __ jcc(Assembler::equal, L);
2691       __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
2692       __ bind(L);
2693     }
2694 #endif /* ASSERT */
2695 
2696     __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx);
2697 
2698     if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2699       restore_native_result(masm, ret_type, stack_slots);
2700     }
2701     __ jmp(unlock_done);
2702 
2703     // END Slow path unlock
2704 
2705   } // synchronized
2706 
2707   // SLOW PATH Reguard the stack if needed
2708 
2709   __ bind(reguard);

2710   save_native_result(masm, ret_type, stack_slots);
2711   __ mov(r12, rsp); // remember sp
2712   __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2713   __ andptr(rsp, -16); // align stack as required by ABI
2714   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)));
2715   __ mov(rsp, r12); // restore sp
2716   __ reinit_heapbase();
2717   restore_native_result(masm, ret_type, stack_slots);
2718   // and continue
2719   __ jmp(reguard_done);
2720 
2721 
2722 
2723   __ flush();
2724 
2725   nmethod *nm = nmethod::new_native_nmethod(method,
2726                                             compile_id,
2727                                             masm->code(),
2728                                             vep_offset,
2729                                             frame_complete,




  30 #include "asm/macroAssembler.inline.hpp"
  31 #include "code/debugInfoRec.hpp"
  32 #include "code/icBuffer.hpp"
  33 #include "code/vtableStubs.hpp"
  34 #include "interpreter/interpreter.hpp"
  35 #include "logging/log.hpp"
  36 #include "memory/resourceArea.hpp"
  37 #include "oops/compiledICHolder.hpp"
  38 #include "runtime/sharedRuntime.hpp"
  39 #include "runtime/vframeArray.hpp"
  40 #include "vmreg_x86.inline.hpp"
  41 #ifdef COMPILER1
  42 #include "c1/c1_Runtime1.hpp"
  43 #endif
  44 #ifdef COMPILER2
  45 #include "opto/runtime.hpp"
  46 #endif
  47 #if INCLUDE_JVMCI
  48 #include "jvmci/jvmciJavaClasses.hpp"
  49 #endif
  50 #include "vm_version_x86.hpp"
  51 
  52 #define __ masm->
  53 
  54 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
  55 
  56 class SimpleRuntimeFrame {
  57 
  58   public:
  59 
  60   // Most of the runtime stubs have this simple frame layout.
  61   // This class exists to make the layout shared in one place.
  62   // Offsets are for compiler stack slots, which are jints.
  63   enum layout {
  64     // The frame sender code expects that rbp will be in the "natural" place and
  65     // will override any oopMap setting for it. We must therefore force the layout
  66     // so that it agrees with the frame sender code.
  67     rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
  68     rbp_off2,
  69     return_off, return_off2,
  70     framesize


 135 
 136   static int rax_offset_in_bytes(void)    { return BytesPerInt * rax_off; }
 137   static int rdx_offset_in_bytes(void)    { return BytesPerInt * rdx_off; }
 138   static int rbx_offset_in_bytes(void)    { return BytesPerInt * rbx_off; }
 139   static int xmm0_offset_in_bytes(void)   { return BytesPerInt * xmm0_off; }
 140   static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
 141 
 142   // During deoptimization only the result registers need to be restored,
 143   // all the other values have already been extracted.
 144   static void restore_result_registers(MacroAssembler* masm);
 145 };
 146 
 147 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
 148   int off = 0;
 149   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
 150   if (UseAVX < 3) {
 151     num_xmm_regs = num_xmm_regs/2;
 152   }
 153 #if defined(COMPILER2) || INCLUDE_JVMCI
 154   if (save_vectors) {
 155     assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX");
 156     assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
 157   }
 158 #else
 159   assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
 160 #endif
 161 
 162   // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
 163   int frame_size_in_bytes = round_to(reg_save_size*BytesPerInt, num_xmm_regs);
 164   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
 165   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
 166   // CodeBlob frame size is in words.
 167   int frame_size_in_words = frame_size_in_bytes / wordSize;
 168   *total_frame_words = frame_size_in_words;
 169 
 170   // Save registers, fpu state, and flags.
 171   // We assume caller has already pushed the return address onto the
 172   // stack, so rsp is 8-byte aligned here.
 173   // We push rpb twice in this sequence because we want the real rbp
 174   // to be under the return like a normal enter.
 175 
 176   __ enter();          // rsp becomes 16-byte aligned here


 190         __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n));
 191       }
 192       // Save full ZMM registers(16..num_xmm_regs)
 193       base_addr = XSAVE_AREA_UPPERBANK;
 194       off = 0;
 195       int vector_len = Assembler::AVX_512bit;
 196       for (int n = 16; n < num_xmm_regs; n++) {
 197         __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
 198       }
 199     }
 200   } else {
 201     if (VM_Version::supports_evex()) {
 202       // Save upper bank of ZMM registers(16..31) for double/float usage
 203       int base_addr = XSAVE_AREA_UPPERBANK;
 204       off = 0;
 205       for (int n = 16; n < num_xmm_regs; n++) {
 206         __ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n));
 207       }
 208     }
 209   }
 210   __ vzeroupper();
 211   if (frame::arg_reg_save_area_bytes != 0) {
 212     // Allocate argument register save area
 213     __ subptr(rsp, frame::arg_reg_save_area_bytes);
 214   }
 215 
 216   // Set an oopmap for the call site.  This oopmap will map all
 217   // oop-registers and debug-info registers as callee-saved.  This
 218   // will allow deoptimization at this safepoint to find all possible
 219   // debug-info recordings, as well as let GC find all oops.
 220 
 221   OopMapSet *oop_maps = new OopMapSet();
 222   OopMap* map = new OopMap(frame_size_in_slots, 0);
 223 
 224 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x))
 225 
 226   map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
 227   map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
 228   map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
 229   map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
 230   // rbp location is known implicitly by the frame sender code, needs no oopmap


 307         off += delta;
 308       }
 309     }
 310   }
 311 
 312   return map;
 313 }
 314 
 315 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
 316   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
 317   if (UseAVX < 3) {
 318     num_xmm_regs = num_xmm_regs/2;
 319   }
 320   if (frame::arg_reg_save_area_bytes != 0) {
 321     // Pop arg register save area
 322     __ addptr(rsp, frame::arg_reg_save_area_bytes);
 323   }
 324 
 325 #if defined(COMPILER2) || INCLUDE_JVMCI
 326   if (restore_vectors) {
 327     assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX");
 328     assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
 329   }
 330 #else
 331   assert(!restore_vectors, "vectors are generated only by C2");
 332 #endif
 333 
 334   __ vzeroupper();
 335 
 336   // On EVEX enabled targets everything is handled in pop fpu state
 337   if (restore_vectors) {
 338     // Restore upper half of YMM registers (0..15)
 339     int base_addr = XSAVE_AREA_YMM_BEGIN;
 340     for (int n = 0; n < 16; n++) {
 341       __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16));
 342     }
 343     if (VM_Version::supports_evex()) {
 344       // Restore upper half of ZMM registers (0..15)
 345       base_addr = XSAVE_AREA_ZMM_BEGIN;
 346       for (int n = 0; n < 16; n++) {
 347         __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32));
 348       }
 349       // Restore full ZMM registers(16..num_xmm_regs)
 350       base_addr = XSAVE_AREA_UPPERBANK;
 351       int vector_len = Assembler::AVX_512bit;
 352       int off = 0;
 353       for (int n = 16; n < num_xmm_regs; n++) {
 354         __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
 355       }


 515 
 516   return round_to(stk_args, 2);
 517 }
 518 
 519 // Patch the callers callsite with entry to compiled code if it exists.
 520 static void patch_callers_callsite(MacroAssembler *masm) {
 521   Label L;
 522   __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
 523   __ jcc(Assembler::equal, L);
 524 
 525   // Save the current stack pointer
 526   __ mov(r13, rsp);
 527   // Schedule the branch target address early.
 528   // Call into the VM to patch the caller, then jump to compiled callee
 529   // rax isn't live so capture return address while we easily can
 530   __ movptr(rax, Address(rsp, 0));
 531 
 532   // align stack so push_CPU_state doesn't fault
 533   __ andptr(rsp, -(StackAlignmentInBytes));
 534   __ push_CPU_state();
 535   __ vzeroupper();
 536   // VM needs caller's callsite
 537   // VM needs target method
 538   // This needs to be a long call since we will relocate this adapter to
 539   // the codeBuffer and it may not reach
 540 
 541   // Allocate argument register save area
 542   if (frame::arg_reg_save_area_bytes != 0) {
 543     __ subptr(rsp, frame::arg_reg_save_area_bytes);
 544   }
 545   __ mov(c_rarg0, rbx);
 546   __ mov(c_rarg1, rax);
 547   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
 548 
 549   // De-allocate argument register save area
 550   if (frame::arg_reg_save_area_bytes != 0) {
 551     __ addptr(rsp, frame::arg_reg_save_area_bytes);
 552   }
 553 
 554   __ vzeroupper();
 555   __ pop_CPU_state();
 556   // restore sp
 557   __ mov(rsp, r13);
 558   __ bind(L);
 559 }
 560 
 561 
 562 static void gen_c2i_adapter(MacroAssembler *masm,
 563                             int total_args_passed,
 564                             int comp_args_on_stack,
 565                             const BasicType *sig_bt,
 566                             const VMRegPair *regs,
 567                             Label& skip_fixup) {
 568   // Before we get into the guts of the C2I adapter, see if we should be here
 569   // at all.  We've come from compiled code and are attempting to jump to the
 570   // interpreter, which means the caller made a static call to get here
 571   // (vcalls always get a compiled target if there is one).  Check for a
 572   // compiled target.  If there is one, we need to patch the caller's call.
 573   patch_callers_callsite(masm);
 574 


1453   save_or_restore_arguments(masm, stack_slots, total_in_args,
1454                             arg_save_area, map, in_regs, in_sig_bt);
1455 
1456   address the_pc = __ pc();
1457   oop_maps->add_gc_map( __ offset(), map);
1458   __ set_last_Java_frame(rsp, noreg, the_pc);
1459 
1460   __ block_comment("block_for_jni_critical");
1461   __ movptr(c_rarg0, r15_thread);
1462   __ mov(r12, rsp); // remember sp
1463   __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
1464   __ andptr(rsp, -16); // align stack as required by ABI
1465   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical)));
1466   __ mov(rsp, r12); // restore sp
1467   __ reinit_heapbase();
1468 
1469   __ reset_last_Java_frame(false);
1470 
1471   save_or_restore_arguments(masm, stack_slots, total_in_args,
1472                             arg_save_area, NULL, in_regs, in_sig_bt);

1473   __ bind(cont);
1474 #ifdef ASSERT
1475   if (StressCriticalJNINatives) {
1476     // Stress register saving
1477     OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1478     save_or_restore_arguments(masm, stack_slots, total_in_args,
1479                               arg_save_area, map, in_regs, in_sig_bt);
1480     // Destroy argument registers
1481     for (int i = 0; i < total_in_args - 1; i++) {
1482       if (in_regs[i].first()->is_Register()) {
1483         const Register reg = in_regs[i].first()->as_Register();
1484         __ xorptr(reg, reg);
1485       } else if (in_regs[i].first()->is_XMMRegister()) {
1486         __ xorpd(in_regs[i].first()->as_XMMRegister(), in_regs[i].first()->as_XMMRegister());
1487       } else if (in_regs[i].first()->is_FloatRegister()) {
1488         ShouldNotReachHere();
1489       } else if (in_regs[i].first()->is_stack()) {
1490         // Nothing to do
1491       } else {
1492         ShouldNotReachHere();


2472 
2473   // check for safepoint operation in progress and/or pending suspend requests
2474   {
2475     Label Continue;
2476 
2477     __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
2478              SafepointSynchronize::_not_synchronized);
2479 
2480     Label L;
2481     __ jcc(Assembler::notEqual, L);
2482     __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
2483     __ jcc(Assembler::equal, Continue);
2484     __ bind(L);
2485 
2486     // Don't use call_VM as it will see a possible pending exception and forward it
2487     // and never return here preventing us from clearing _last_native_pc down below.
2488     // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
2489     // preserved and correspond to the bcp/locals pointers. So we do a runtime call
2490     // by hand.
2491     //
2492     __ vzeroupper();
2493     save_native_result(masm, ret_type, stack_slots);
2494     __ mov(c_rarg0, r15_thread);
2495     __ mov(r12, rsp); // remember sp
2496     __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2497     __ andptr(rsp, -16); // align stack as required by ABI
2498     if (!is_critical_native) {
2499       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
2500     } else {
2501       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
2502     }
2503     __ mov(rsp, r12); // restore sp
2504     __ reinit_heapbase();
2505     // Restore any method result value
2506     restore_native_result(masm, ret_type, stack_slots);
2507 
2508     if (is_critical_native) {
2509       // The call above performed the transition to thread_in_Java so
2510       // skip the transition logic below.
2511       __ jmpb(after_transition);
2512     }


2649     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
2650     restore_args(masm, total_c_args, c_arg, out_regs);
2651 
2652 #ifdef ASSERT
2653     { Label L;
2654     __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
2655     __ jcc(Assembler::equal, L);
2656     __ stop("no pending exception allowed on exit from monitorenter");
2657     __ bind(L);
2658     }
2659 #endif
2660     __ jmp(lock_done);
2661 
2662     // END Slow path lock
2663 
2664     // BEGIN Slow path unlock
2665     __ bind(slow_path_unlock);
2666 
2667     // If we haven't already saved the native result we must save it now as xmm registers
2668     // are still exposed.
2669     __ vzeroupper();
2670     if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2671       save_native_result(masm, ret_type, stack_slots);
2672     }
2673 
2674     __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
2675 
2676     __ mov(c_rarg0, obj_reg);
2677     __ mov(c_rarg2, r15_thread);
2678     __ mov(r12, rsp); // remember sp
2679     __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2680     __ andptr(rsp, -16); // align stack as required by ABI
2681 
2682     // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
2683     // NOTE that obj_reg == rbx currently
2684     __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset())));
2685     __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
2686 
2687     // args are (oop obj, BasicLock* lock, JavaThread* thread)
2688     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
2689     __ mov(rsp, r12); // restore sp


2695       __ jcc(Assembler::equal, L);
2696       __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
2697       __ bind(L);
2698     }
2699 #endif /* ASSERT */
2700 
2701     __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx);
2702 
2703     if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2704       restore_native_result(masm, ret_type, stack_slots);
2705     }
2706     __ jmp(unlock_done);
2707 
2708     // END Slow path unlock
2709 
2710   } // synchronized
2711 
2712   // SLOW PATH Reguard the stack if needed
2713 
2714   __ bind(reguard);
2715   __ vzeroupper();
2716   save_native_result(masm, ret_type, stack_slots);
2717   __ mov(r12, rsp); // remember sp
2718   __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2719   __ andptr(rsp, -16); // align stack as required by ABI
2720   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)));
2721   __ mov(rsp, r12); // restore sp
2722   __ reinit_heapbase();
2723   restore_native_result(masm, ret_type, stack_slots);
2724   // and continue
2725   __ jmp(reguard_done);
2726 
2727 
2728 
2729   __ flush();
2730 
2731   nmethod *nm = nmethod::new_native_nmethod(method,
2732                                             compile_id,
2733                                             masm->code(),
2734                                             vep_offset,
2735                                             frame_complete,


< prev index next >