< prev index next >

src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp

Print this page
rev 9228 : 8143072: Port JVMCI to AArch64
Summary: AArch64-specific code for JVMCI
Reviewed-by: duke


  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.hpp"
  28 #include "asm/macroAssembler.inline.hpp"
  29 #include "code/debugInfoRec.hpp"
  30 #include "code/icBuffer.hpp"
  31 #include "code/vtableStubs.hpp"
  32 #include "interpreter/interpreter.hpp"
  33 #include "interpreter/interp_masm.hpp"
  34 #include "oops/compiledICHolder.hpp"
  35 #include "prims/jvmtiRedefineClassesTrace.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/vframeArray.hpp"
  38 #include "vmreg_aarch64.inline.hpp"
  39 #ifdef COMPILER1
  40 #include "c1/c1_Runtime1.hpp"
  41 #endif
  42 #ifdef COMPILER2
  43 #include "adfiles/ad_aarch64.hpp"
  44 #include "opto/runtime.hpp"
  45 #endif



  46 
  47 #ifdef BUILTIN_SIM
  48 #include "../../../../../../simulator/simulator.hpp"
  49 #endif
  50 
  51 #define __ masm->
  52 
  53 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
  54 
  55 class SimpleRuntimeFrame {
  56 
  57   public:
  58 
  59   // Most of the runtime stubs have this simple frame layout.
  60   // This class exists to make the layout shared in one place.
  61   // Offsets are for compiler stack slots, which are jints.
  62   enum layout {
  63     // The frame sender code expects that rbp will be in the "natural" place and
  64     // will override any oopMap setting for it. We must therefore force the layout
  65     // so that it agrees with the frame sender code.


  92   // During deoptimization only the result registers need to be restored,
  93   // all the other values have already been extracted.
  94   static void restore_result_registers(MacroAssembler* masm);
  95 
  96     // Capture info about frame layout
  97   enum layout {
  98                 fpu_state_off = 0,
  99                 fpu_state_end = fpu_state_off+FPUStateSizeInWords-1,
 100                 // The frame sender code expects that rfp will be in
 101                 // the "natural" place and will override any oopMap
 102                 // setting for it. We must therefore force the layout
 103                 // so that it agrees with the frame sender code.
 104                 r0_off = fpu_state_off+FPUStateSizeInWords,
 105                 rfp_off = r0_off + 30 * 2,
 106                 return_off = rfp_off + 2,      // slot for return address
 107                 reg_save_size = return_off + 2};
 108 
 109 };
 110 
 111 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
 112 #ifdef COMPILER2
 113   if (save_vectors) {
 114     // Save upper half of vector registers
 115     int vect_words = 32 * 8 / wordSize;
 116     additional_frame_words += vect_words;
 117   }
 118 #else
 119   assert(!save_vectors, "vectors are generated only by C2");
 120 #endif
 121 
 122   int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
 123                                      reg_save_size*BytesPerInt, 16);
 124   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
 125   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
 126   // The caller will allocate additional_frame_words
 127   int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
 128   // CodeBlob frame size is in words.
 129   int frame_size_in_words = frame_size_in_bytes / wordSize;
 130   *total_frame_words = frame_size_in_words;
 131 
 132   // Save registers, fpu state, and flags.
 133 
 134   __ enter();
 135   __ push_CPU_state(save_vectors);
 136 
 137   // Set an oopmap for the call site.  This oopmap will map all
 138   // oop-registers and debug-info registers as callee-saved.  This
 139   // will allow deoptimization at this safepoint to find all possible


 149                                     // register slots are 8 bytes
 150                                     // wide, 32 floating-point
 151                                     // registers
 152       oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots),
 153                                 r->as_VMReg());
 154     }
 155   }
 156 
 157   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
 158     FloatRegister r = as_FloatRegister(i);
 159     int sp_offset = save_vectors ? (4 * i) : (2 * i);
 160     oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
 161                               r->as_VMReg());
 162   }
 163 
 164   return oop_map;
 165 }
 166 
 167 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
 168 #ifndef COMPILER2
 169   assert(!restore_vectors, "vectors are generated only by C2");
 170 #endif
 171   __ pop_CPU_state(restore_vectors);
 172   __ leave();
 173 }
 174 
 175 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 176 
 177   // Just restore result register. Only used by deoptimization. By
 178   // now any callee save register that needs to be restored to a c2
 179   // caller of the deoptee has been extracted into the vframeArray
 180   // and will be stuffed into the c2i adapter we create for later
 181   // restoration so only result registers need to be restored here.
 182 
 183   // Restore fp result register
 184   __ ldrd(v0, Address(sp, v0_offset_in_bytes()));
 185   // Restore integer result register
 186   __ ldr(r0, Address(sp, r0_offset_in_bytes()));
 187 
 188   // Pop all of the register save are off the stack
 189   __ add(sp, sp, round_to(return_offset_in_bytes(), 16));


 530                   L_ok);
 531     const char* msg = "i2c adapter must return to an interpreter frame";
 532     __ block_comment(msg);
 533     __ stop(msg);
 534     __ bind(L_ok);
 535     __ block_comment("} verify_i2ce ");
 536 #endif
 537   }
 538 
 539   // Cut-out for having no stack args.
 540   int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
 541   if (comp_args_on_stack) {
 542     __ sub(rscratch1, sp, comp_words_on_stack * wordSize);
 543     __ andr(sp, rscratch1, -16);
 544   }
 545 
 546   // Will jump to the compiled code just as if compiled code was doing it.
 547   // Pre-load the register-jump target early, to schedule it better.
 548   __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_offset())));
 549 












 550   // Now generate the shuffle code.
 551   for (int i = 0; i < total_args_passed; i++) {
 552     if (sig_bt[i] == T_VOID) {
 553       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 554       continue;
 555     }
 556 
 557     // Pick up 0, 1 or 2 words from SP+offset.
 558 
 559     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 560             "scrambled load targets?");
 561     // Load in argument order going down.
 562     int ld_off = (total_args_passed - i - 1)*Interpreter::stackElementSize;
 563     // Point to interpreter value (vs. tag)
 564     int next_off = ld_off - Interpreter::stackElementSize;
 565     //
 566     //
 567     //
 568     VMReg r_1 = regs[i].first();
 569     VMReg r_2 = regs[i].second();


2220 }
2221 
2222 // this function returns the adjust size (in number of words) to a c2i adapter
2223 // activation for use during deoptimization
2224 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2225   assert(callee_locals >= callee_parameters,
2226           "test and remove; got more parms than locals");
2227   if (callee_locals < callee_parameters)
2228     return 0;                   // No adjustment for negative locals
2229   int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
2230   // diff is counted in stack words
2231   return round_to(diff, 2);
2232 }
2233 
2234 
2235 //------------------------------generate_deopt_blob----------------------------
2236 void SharedRuntime::generate_deopt_blob() {
2237   // Allocate space for the code
2238   ResourceMark rm;
2239   // Setup code generation tools
2240   CodeBuffer buffer("deopt_blob", 2048, 1024);






2241   MacroAssembler* masm = new MacroAssembler(&buffer);
2242   int frame_size_in_words;
2243   OopMap* map = NULL;
2244   OopMapSet *oop_maps = new OopMapSet();
2245 
2246 #ifdef BUILTIN_SIM
2247   AArch64Simulator *simulator;
2248   if (NotifySimulator) {
2249     simulator = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
2250     simulator->notifyCompile(const_cast<char*>("SharedRuntime::deopt_blob"), __ pc());
2251   }
2252 #endif
2253 
2254   // -------------
2255   // This code enters when returning to a de-optimized nmethod.  A return
2256   // address has been pushed on the the stack, and return values are in
2257   // registers.
2258   // If we are doing a normal deopt then we were called from the patched
2259   // nmethod from the point we returned to the nmethod. So the return
2260   // address on the stack is wrong by NativeCall::instruction_size


2277   // The current frame is compiled code and may contain many inlined
2278   // functions, each with their own JVM state.  We pop the current frame, then
2279   // push all the new frames.  Then we call the C routine unpack_frames() to
2280   // populate these frames.  Finally unpack_frames() returns us the new target
2281   // address.  Notice that callee-save registers are BLOWN here; they have
2282   // already been captured in the vframeArray at the time the return PC was
2283   // patched.
2284   address start = __ pc();
2285   Label cont;
2286 
2287   // Prolog for non exception case!
2288 
2289   // Save everything in sight.
2290   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2291 
2292   // Normal deoptimization.  Save exec mode for unpack_frames.
2293   __ movw(rcpool, Deoptimization::Unpack_deopt); // callee-saved
2294   __ b(cont);
2295 
2296   int reexecute_offset = __ pc() - start;






2297 
2298   // Reexecute case
2299   // return address is the pc describes what bci to do re-execute at
2300 
2301   // No need to update map as each call to save_live_registers will produce identical oopmap
2302   (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2303 
2304   __ movw(rcpool, Deoptimization::Unpack_reexecute); // callee-saved
2305   __ b(cont);
2306 






































2307   int exception_offset = __ pc() - start;
2308 
2309   // Prolog for exception case
2310 
2311   // all registers are dead at this entry point, except for r0, and
2312   // r3 which contain the exception oop and exception pc
2313   // respectively.  Set them in TLS and fall thru to the
2314   // unpack_with_exception_in_tls entry point.
2315 
2316   __ str(r3, Address(rthread, JavaThread::exception_pc_offset()));
2317   __ str(r0, Address(rthread, JavaThread::exception_oop_offset()));
2318 
2319   int exception_in_tls_offset = __ pc() - start;
2320 
2321   // new implementation because exception oop is now passed in JavaThread
2322 
2323   // Prolog for exception case
2324   // All registers must be preserved because they might be used by LinearScan
2325   // Exceptiop oop and throwing PC are passed in JavaThread
2326   // tos: stack at point of call to method that threw the exception (i.e. only


2377 #ifdef ASSERT0
2378   { Label L;
2379     __ ldr(rscratch1, Address(rthread,
2380                               JavaThread::last_Java_fp_offset()));
2381     __ cbz(rscratch1, L);
2382     __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
2383     __ bind(L);
2384   }
2385 #endif // ASSERT
2386   __ mov(c_rarg0, rthread);
2387   __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2388   __ blrt(rscratch1, 1, 0, 1);
2389   __ bind(retaddr);
2390 
2391   // Need to have an oopmap that tells fetch_unroll_info where to
2392   // find any register it might need.
2393   oop_maps->add_gc_map(__ pc() - start, map);
2394 
2395   __ reset_last_Java_frame(false, true);
2396 
2397   // Load UnrollBlock* into rdi






2398   __ mov(r5, r0);
2399 
2400    Label noException;
2401   __ cmpw(rcpool, Deoptimization::Unpack_exception);   // Was exception pending?
2402   __ br(Assembler::NE, noException);
2403   __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset()));
2404   // QQQ this is useless it was NULL above
2405   __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset()));
2406   __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
2407   __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
2408 
2409   __ verify_oop(r0);
2410 
2411   // Overwrite the result registers with the exception results.
2412   __ str(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
2413   // I think this is useless
2414   // __ str(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
2415 
2416   __ bind(noException);
2417 


2528   // Clear fp AND pc
2529   __ reset_last_Java_frame(true, true);
2530 
2531   // Collect return values
2532   __ ldrd(v0, Address(sp, RegisterSaver::v0_offset_in_bytes()));
2533   __ ldr(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
2534   // I think this is useless (throwing pc?)
2535   // __ ldr(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
2536 
2537   // Pop self-frame.
2538   __ leave();                           // Epilog
2539 
2540   // Jump to interpreter
2541   __ ret(lr);
2542 
2543   // Make sure all code is generated
2544   masm->flush();
2545 
2546   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
2547   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2548 





2549 #ifdef BUILTIN_SIM
2550   if (NotifySimulator) {
2551     unsigned char *base = _deopt_blob->code_begin();
2552     simulator->notifyRelocate(start, base - start);
2553   }
2554 #endif
2555 }
2556 
2557 uint SharedRuntime::out_preserve_stack_slots() {
2558   return 0;
2559 }
2560 
2561 #ifdef COMPILER2
2562 //------------------------------generate_uncommon_trap_blob--------------------
2563 void SharedRuntime::generate_uncommon_trap_blob() {
2564   // Allocate space for the code
2565   ResourceMark rm;
2566   // Setup code generation tools
2567   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
2568   MacroAssembler* masm = new MacroAssembler(&buffer);
2569 
2570 #ifdef BUILTIN_SIM
2571   AArch64Simulator *simulator;
2572   if (NotifySimulator) {
2573     simulator = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
2574     simulator->notifyCompile(const_cast<char*>("SharedRuntime:uncommon_trap_blob"), __ pc());
2575   }
2576 #endif
2577 
2578   assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
2579 
2580   address start = __ pc();
2581 


2913 
2914   RegisterSaver::restore_live_registers(masm);
2915 
2916   // exception pending => remove activation and forward to exception handler
2917 
2918   __ str(zr, Address(rthread, JavaThread::vm_result_offset()));
2919 
2920   __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
2921   __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2922 
2923   // -------------
2924   // make sure all code is generated
2925   masm->flush();
2926 
2927   // return the  blob
2928   // frame_size_words or bytes??
2929   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
2930 }
2931 
2932 
2933 #ifdef COMPILER2
2934 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
2935 //
2936 //------------------------------generate_exception_blob---------------------------
2937 // creates exception blob at the end
2938 // Using exception blob, this code is jumped from a compiled method.
2939 // (see emit_exception_handler in x86_64.ad file)
2940 //
2941 // Given an exception pc at a call we call into the runtime for the
2942 // handler in this method. This handler might merely restore state
2943 // (i.e. callee save registers) unwind the frame and jump to the
2944 // exception handler for the nmethod if there is no Java level handler
2945 // for the nmethod.
2946 //
2947 // This code is entered with a jmp.
2948 //
2949 // Arguments:
2950 //   r0: exception oop
2951 //   r3: exception pc
2952 //
2953 // Results:




  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.hpp"
  28 #include "asm/macroAssembler.inline.hpp"
  29 #include "code/debugInfoRec.hpp"
  30 #include "code/icBuffer.hpp"
  31 #include "code/vtableStubs.hpp"
  32 #include "interpreter/interpreter.hpp"
  33 #include "interpreter/interp_masm.hpp"
  34 #include "oops/compiledICHolder.hpp"
  35 #include "prims/jvmtiRedefineClassesTrace.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/vframeArray.hpp"
  38 #include "vmreg_aarch64.inline.hpp"
  39 #ifdef COMPILER1
  40 #include "c1/c1_Runtime1.hpp"
  41 #endif
  42 #if defined(COMPILER2) || INCLUDE_JVMCI
  43 #include "adfiles/ad_aarch64.hpp"
  44 #include "opto/runtime.hpp"
  45 #endif
  46 #if INCLUDE_JVMCI
  47 #include "jvmci/jvmciJavaClasses.hpp"
  48 #endif
  49 
  50 #ifdef BUILTIN_SIM
  51 #include "../../../../../../simulator/simulator.hpp"
  52 #endif
  53 
  54 #define __ masm->
  55 
  56 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
  57 
  58 class SimpleRuntimeFrame {
  59 
  60   public:
  61 
  62   // Most of the runtime stubs have this simple frame layout.
  63   // This class exists to make the layout shared in one place.
  64   // Offsets are for compiler stack slots, which are jints.
  65   enum layout {
  66     // The frame sender code expects that rbp will be in the "natural" place and
  67     // will override any oopMap setting for it. We must therefore force the layout
  68     // so that it agrees with the frame sender code.


  95   // During deoptimization only the result registers need to be restored,
  96   // all the other values have already been extracted.
  97   static void restore_result_registers(MacroAssembler* masm);
  98 
  99     // Capture info about frame layout
 100   enum layout {
 101                 fpu_state_off = 0,
 102                 fpu_state_end = fpu_state_off+FPUStateSizeInWords-1,
 103                 // The frame sender code expects that rfp will be in
 104                 // the "natural" place and will override any oopMap
 105                 // setting for it. We must therefore force the layout
 106                 // so that it agrees with the frame sender code.
 107                 r0_off = fpu_state_off+FPUStateSizeInWords,
 108                 rfp_off = r0_off + 30 * 2,
 109                 return_off = rfp_off + 2,      // slot for return address
 110                 reg_save_size = return_off + 2};
 111 
 112 };
 113 
 114 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
 115 #if defined(COMPILER2) || INCLUDE_JVMCI
 116   if (save_vectors) {
 117     // Save upper half of vector registers
 118     int vect_words = 32 * 8 / wordSize;
 119     additional_frame_words += vect_words;
 120   }
 121 #else
 122   assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
 123 #endif
 124 
 125   int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
 126                                      reg_save_size*BytesPerInt, 16);
 127   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
 128   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
 129   // The caller will allocate additional_frame_words
 130   int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
 131   // CodeBlob frame size is in words.
 132   int frame_size_in_words = frame_size_in_bytes / wordSize;
 133   *total_frame_words = frame_size_in_words;
 134 
 135   // Save registers, fpu state, and flags.
 136 
 137   __ enter();
 138   __ push_CPU_state(save_vectors);
 139 
 140   // Set an oopmap for the call site.  This oopmap will map all
 141   // oop-registers and debug-info registers as callee-saved.  This
 142   // will allow deoptimization at this safepoint to find all possible


 152                                     // register slots are 8 bytes
 153                                     // wide, 32 floating-point
 154                                     // registers
 155       oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots),
 156                                 r->as_VMReg());
 157     }
 158   }
 159 
 160   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
 161     FloatRegister r = as_FloatRegister(i);
 162     int sp_offset = save_vectors ? (4 * i) : (2 * i);
 163     oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
 164                               r->as_VMReg());
 165   }
 166 
 167   return oop_map;
 168 }
 169 
 170 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
 171 #ifndef COMPILER2
 172   assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
 173 #endif
 174   __ pop_CPU_state(restore_vectors);
 175   __ leave();
 176 }
 177 
 178 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 179 
 180   // Just restore result register. Only used by deoptimization. By
 181   // now any callee save register that needs to be restored to a c2
 182   // caller of the deoptee has been extracted into the vframeArray
 183   // and will be stuffed into the c2i adapter we create for later
 184   // restoration so only result registers need to be restored here.
 185 
 186   // Restore fp result register
 187   __ ldrd(v0, Address(sp, v0_offset_in_bytes()));
 188   // Restore integer result register
 189   __ ldr(r0, Address(sp, r0_offset_in_bytes()));
 190 
 191   // Pop all of the register save are off the stack
 192   __ add(sp, sp, round_to(return_offset_in_bytes(), 16));


 533                   L_ok);
 534     const char* msg = "i2c adapter must return to an interpreter frame";
 535     __ block_comment(msg);
 536     __ stop(msg);
 537     __ bind(L_ok);
 538     __ block_comment("} verify_i2ce ");
 539 #endif
 540   }
 541 
 542   // Cut-out for having no stack args.
 543   int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
 544   if (comp_args_on_stack) {
 545     __ sub(rscratch1, sp, comp_words_on_stack * wordSize);
 546     __ andr(sp, rscratch1, -16);
 547   }
 548 
 549   // Will jump to the compiled code just as if compiled code was doing it.
 550   // Pre-load the register-jump target early, to schedule it better.
 551   __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_offset())));
 552 
 553 #if INCLUDE_JVMCI
 554   if (EnableJVMCI) {
 555     // check if this call should be routed towards a specific entry point
 556     __ ldr(rscratch2, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
 557     Label no_alternative_target;
 558     __ cbz(rscratch2, no_alternative_target);
 559     __ mov(rscratch1, rscratch2);
 560     __ str(zr, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
 561     __ bind(no_alternative_target);
 562   }
 563 #endif // INCLUDE_JVMCI
 564 
 565   // Now generate the shuffle code.
 566   for (int i = 0; i < total_args_passed; i++) {
 567     if (sig_bt[i] == T_VOID) {
 568       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 569       continue;
 570     }
 571 
 572     // Pick up 0, 1 or 2 words from SP+offset.
 573 
 574     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 575             "scrambled load targets?");
 576     // Load in argument order going down.
 577     int ld_off = (total_args_passed - i - 1)*Interpreter::stackElementSize;
 578     // Point to interpreter value (vs. tag)
 579     int next_off = ld_off - Interpreter::stackElementSize;
 580     //
 581     //
 582     //
 583     VMReg r_1 = regs[i].first();
 584     VMReg r_2 = regs[i].second();


2235 }
2236 
2237 // this function returns the adjust size (in number of words) to a c2i adapter
2238 // activation for use during deoptimization
2239 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2240   assert(callee_locals >= callee_parameters,
2241           "test and remove; got more parms than locals");
2242   if (callee_locals < callee_parameters)
2243     return 0;                   // No adjustment for negative locals
2244   int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
2245   // diff is counted in stack words
2246   return round_to(diff, 2);
2247 }
2248 
2249 
2250 //------------------------------generate_deopt_blob----------------------------
2251 void SharedRuntime::generate_deopt_blob() {
2252   // Allocate space for the code
2253   ResourceMark rm;
2254   // Setup code generation tools
2255   int pad = 0;
2256 #if INCLUDE_JVMCI
2257   if (EnableJVMCI) {
2258     pad += 512; // Increase the buffer size when compiling for JVMCI
2259   }
2260 #endif
2261   CodeBuffer buffer("deopt_blob", 2048+pad, 1024);
2262   MacroAssembler* masm = new MacroAssembler(&buffer);
2263   int frame_size_in_words;
2264   OopMap* map = NULL;
2265   OopMapSet *oop_maps = new OopMapSet();
2266 
2267 #ifdef BUILTIN_SIM
2268   AArch64Simulator *simulator;
2269   if (NotifySimulator) {
2270     simulator = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
2271     simulator->notifyCompile(const_cast<char*>("SharedRuntime::deopt_blob"), __ pc());
2272   }
2273 #endif
2274 
2275   // -------------
2276   // This code enters when returning to a de-optimized nmethod.  A return
2277   // address has been pushed on the the stack, and return values are in
2278   // registers.
2279   // If we are doing a normal deopt then we were called from the patched
2280   // nmethod from the point we returned to the nmethod. So the return
2281   // address on the stack is wrong by NativeCall::instruction_size


2298   // The current frame is compiled code and may contain many inlined
2299   // functions, each with their own JVM state.  We pop the current frame, then
2300   // push all the new frames.  Then we call the C routine unpack_frames() to
2301   // populate these frames.  Finally unpack_frames() returns us the new target
2302   // address.  Notice that callee-save registers are BLOWN here; they have
2303   // already been captured in the vframeArray at the time the return PC was
2304   // patched.
2305   address start = __ pc();
2306   Label cont;
2307 
2308   // Prolog for non exception case!
2309 
2310   // Save everything in sight.
2311   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2312 
2313   // Normal deoptimization.  Save exec mode for unpack_frames.
2314   __ movw(rcpool, Deoptimization::Unpack_deopt); // callee-saved
2315   __ b(cont);
2316 
2317   int reexecute_offset = __ pc() - start;
2318 #if defined(INCLUDE_JVMCI) && !defined(COMPILER1)
2319   if (EnableJVMCI && UseJVMCICompiler) {
2320     // JVMCI does not use this kind of deoptimization
2321     __ should_not_reach_here();
2322   }
2323 #endif
2324 
2325   // Reexecute case
2326   // return address is the pc describes what bci to do re-execute at
2327 
2328   // No need to update map as each call to save_live_registers will produce identical oopmap
2329   (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2330 
2331   __ movw(rcpool, Deoptimization::Unpack_reexecute); // callee-saved
2332   __ b(cont);
2333 
2334 #if INCLUDE_JVMCI
2335   Label after_fetch_unroll_info_call;
2336   int implicit_exception_uncommon_trap_offset = 0;
2337   int uncommon_trap_offset = 0;
2338 
2339   if (EnableJVMCI) {
2340     implicit_exception_uncommon_trap_offset = __ pc() - start;
2341 
2342     __ ldr(lr, Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
2343     __ str(zr, Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
2344 
2345     uncommon_trap_offset = __ pc() - start;
2346 
2347     // Save everything in sight.
2348     RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2349     // fetch_unroll_info needs to call last_java_frame()
2350     Label retaddr;
2351     __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
2352 
2353     __ ldrw(c_rarg1, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset())));
2354     __ movw(rscratch1, -1);
2355     __ strw(rscratch1, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset())));
2356 
2357     __ movw(rcpool, (int32_t)Deoptimization::Unpack_reexecute);
2358     __ mov(c_rarg0, rthread);
2359     __ lea(rscratch1,
2360            RuntimeAddress(CAST_FROM_FN_PTR(address,
2361                                            Deoptimization::uncommon_trap)));
2362     __ blrt(rscratch1, 2, 0, MacroAssembler::ret_type_integral);
2363     __ bind(retaddr);
2364     oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
2365 
2366     __ reset_last_Java_frame(false, false);
2367 
2368     __ b(after_fetch_unroll_info_call);
2369   } // EnableJVMCI
2370 #endif // INCLUDE_JVMCI
2371 
2372   int exception_offset = __ pc() - start;
2373 
2374   // Prolog for exception case
2375 
2376   // all registers are dead at this entry point, except for r0, and
2377   // r3 which contain the exception oop and exception pc
2378   // respectively.  Set them in TLS and fall thru to the
2379   // unpack_with_exception_in_tls entry point.
2380 
2381   __ str(r3, Address(rthread, JavaThread::exception_pc_offset()));
2382   __ str(r0, Address(rthread, JavaThread::exception_oop_offset()));
2383 
2384   int exception_in_tls_offset = __ pc() - start;
2385 
2386   // new implementation because exception oop is now passed in JavaThread
2387 
2388   // Prolog for exception case
2389   // All registers must be preserved because they might be used by LinearScan
2390   // Exceptiop oop and throwing PC are passed in JavaThread
2391   // tos: stack at point of call to method that threw the exception (i.e. only


2442 #ifdef ASSERT0
2443   { Label L;
2444     __ ldr(rscratch1, Address(rthread,
2445                               JavaThread::last_Java_fp_offset()));
2446     __ cbz(rscratch1, L);
2447     __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
2448     __ bind(L);
2449   }
2450 #endif // ASSERT
2451   __ mov(c_rarg0, rthread);
2452   __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2453   __ blrt(rscratch1, 1, 0, 1);
2454   __ bind(retaddr);
2455 
2456   // Need to have an oopmap that tells fetch_unroll_info where to
2457   // find any register it might need.
2458   oop_maps->add_gc_map(__ pc() - start, map);
2459 
2460   __ reset_last_Java_frame(false, true);
2461 
2462 #if INCLUDE_JVMCI
2463   if (EnableJVMCI) {
2464     __ bind(after_fetch_unroll_info_call);
2465   }
2466 #endif
2467 
2468   // Load UnrollBlock* into r5
2469   __ mov(r5, r0);
2470 
2471    Label noException;
2472   __ cmpw(rcpool, Deoptimization::Unpack_exception);   // Was exception pending?
2473   __ br(Assembler::NE, noException);
2474   __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset()));
2475   // QQQ this is useless it was NULL above
2476   __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset()));
2477   __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
2478   __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
2479 
2480   __ verify_oop(r0);
2481 
2482   // Overwrite the result registers with the exception results.
2483   __ str(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
2484   // I think this is useless
2485   // __ str(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
2486 
2487   __ bind(noException);
2488 


2599   // Clear fp AND pc
2600   __ reset_last_Java_frame(true, true);
2601 
2602   // Collect return values
2603   __ ldrd(v0, Address(sp, RegisterSaver::v0_offset_in_bytes()));
2604   __ ldr(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
2605   // I think this is useless (throwing pc?)
2606   // __ ldr(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
2607 
2608   // Pop self-frame.
2609   __ leave();                           // Epilog
2610 
2611   // Jump to interpreter
2612   __ ret(lr);
2613 
2614   // Make sure all code is generated
2615   masm->flush();
2616 
2617   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
2618   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2619 #if INCLUDE_JVMCI
2620   if (EnableJVMCI) {
2621     _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
2622     _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
2623   }
2624 #endif
2625 #ifdef BUILTIN_SIM
2626   if (NotifySimulator) {
2627     unsigned char *base = _deopt_blob->code_begin();
2628     simulator->notifyRelocate(start, base - start);
2629   }
2630 #endif
2631 }
2632 
2633 uint SharedRuntime::out_preserve_stack_slots() {
2634   return 0;
2635 }
2636 
2637 #if defined(COMPILER2) || INCLUDE_JVMCI
2638 //------------------------------generate_uncommon_trap_blob--------------------
2639 void SharedRuntime::generate_uncommon_trap_blob() {
2640   // Allocate space for the code
2641   ResourceMark rm;
2642   // Setup code generation tools
2643   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
2644   MacroAssembler* masm = new MacroAssembler(&buffer);
2645 
2646 #ifdef BUILTIN_SIM
2647   AArch64Simulator *simulator;
2648   if (NotifySimulator) {
2649     simulator = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
2650     simulator->notifyCompile(const_cast<char*>("SharedRuntime:uncommon_trap_blob"), __ pc());
2651   }
2652 #endif
2653 
2654   assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
2655 
2656   address start = __ pc();
2657 


2989 
2990   RegisterSaver::restore_live_registers(masm);
2991 
2992   // exception pending => remove activation and forward to exception handler
2993 
2994   __ str(zr, Address(rthread, JavaThread::vm_result_offset()));
2995 
2996   __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
2997   __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2998 
2999   // -------------
3000   // make sure all code is generated
3001   masm->flush();
3002 
3003   // return the  blob
3004   // frame_size_words or bytes??
3005   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
3006 }
3007 
3008 
3009 #if defined(COMPILER2) || INCLUDE_JVMCI
3010 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
3011 //
3012 //------------------------------generate_exception_blob---------------------------
3013 // creates exception blob at the end
3014 // Using exception blob, this code is jumped from a compiled method.
3015 // (see emit_exception_handler in x86_64.ad file)
3016 //
3017 // Given an exception pc at a call we call into the runtime for the
3018 // handler in this method. This handler might merely restore state
3019 // (i.e. callee save registers) unwind the frame and jump to the
3020 // exception handler for the nmethod if there is no Java level handler
3021 // for the nmethod.
3022 //
3023 // This code is entered with a jmp.
3024 //
3025 // Arguments:
3026 //   r0: exception oop
3027 //   r3: exception pc
3028 //
3029 // Results:


< prev index next >