< prev index next >

src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp

Print this page
rev 9429 : 8143072: Port JVMCI to AArch64
Summary: AArch64-specific code for JVMCI
Reviewed-by: duke


  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.hpp"
  28 #include "asm/macroAssembler.inline.hpp"
  29 #include "code/debugInfoRec.hpp"
  30 #include "code/icBuffer.hpp"
  31 #include "code/vtableStubs.hpp"
  32 #include "interpreter/interpreter.hpp"
  33 #include "interpreter/interp_masm.hpp"
  34 #include "oops/compiledICHolder.hpp"
  35 #include "prims/jvmtiRedefineClassesTrace.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/vframeArray.hpp"
  38 #include "vmreg_aarch64.inline.hpp"
  39 #ifdef COMPILER1
  40 #include "c1/c1_Runtime1.hpp"
  41 #endif
  42 #ifdef COMPILER2
  43 #include "adfiles/ad_aarch64.hpp"
  44 #include "opto/runtime.hpp"
  45 #endif



  46 
  47 #ifdef BUILTIN_SIM
  48 #include "../../../../../../simulator/simulator.hpp"
  49 #endif
  50 
  51 #define __ masm->
  52 
  53 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
  54 
  55 class SimpleRuntimeFrame {
  56 
  57   public:
  58 
  59   // Most of the runtime stubs have this simple frame layout.
  60   // This class exists to make the layout shared in one place.
  61   // Offsets are for compiler stack slots, which are jints.
  62   enum layout {
  63     // The frame sender code expects that rbp will be in the "natural" place and
  64     // will override any oopMap setting for it. We must therefore force the layout
  65     // so that it agrees with the frame sender code.


  92   // During deoptimization only the result registers need to be restored,
  93   // all the other values have already been extracted.
  94   static void restore_result_registers(MacroAssembler* masm);
  95 
  96     // Capture info about frame layout
  97   enum layout {
  98                 fpu_state_off = 0,
  99                 fpu_state_end = fpu_state_off+FPUStateSizeInWords-1,
 100                 // The frame sender code expects that rfp will be in
 101                 // the "natural" place and will override any oopMap
 102                 // setting for it. We must therefore force the layout
 103                 // so that it agrees with the frame sender code.
 104                 r0_off = fpu_state_off+FPUStateSizeInWords,
 105                 rfp_off = r0_off + 30 * 2,
 106                 return_off = rfp_off + 2,      // slot for return address
 107                 reg_save_size = return_off + 2};
 108 
 109 };
 110 
 111 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
 112 #ifdef COMPILER2
 113   if (save_vectors) {
 114     // Save upper half of vector registers
 115     int vect_words = 32 * 8 / wordSize;
 116     additional_frame_words += vect_words;
 117   }
 118 #else
 119   assert(!save_vectors, "vectors are generated only by C2");
 120 #endif
 121 
 122   int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
 123                                      reg_save_size*BytesPerInt, 16);
 124   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
 125   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
 126   // The caller will allocate additional_frame_words
 127   int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
 128   // CodeBlob frame size is in words.
 129   int frame_size_in_words = frame_size_in_bytes / wordSize;
 130   *total_frame_words = frame_size_in_words;
 131 
 132   // Save registers, fpu state, and flags.
 133 
 134   __ enter();
 135   __ push_CPU_state(save_vectors);
 136 
 137   // Set an oopmap for the call site.  This oopmap will map all
 138   // oop-registers and debug-info registers as callee-saved.  This
 139   // will allow deoptimization at this safepoint to find all possible


 149                                     // register slots are 8 bytes
 150                                     // wide, 32 floating-point
 151                                     // registers
 152       oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots),
 153                                 r->as_VMReg());
 154     }
 155   }
 156 
 157   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
 158     FloatRegister r = as_FloatRegister(i);
 159     int sp_offset = save_vectors ? (4 * i) : (2 * i);
 160     oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
 161                               r->as_VMReg());
 162   }
 163 
 164   return oop_map;
 165 }
 166 
 167 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
 168 #ifndef COMPILER2
 169   assert(!restore_vectors, "vectors are generated only by C2");
 170 #endif
 171   __ pop_CPU_state(restore_vectors);
 172   __ leave();
 173 }
 174 
 175 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 176 
 177   // Just restore result register. Only used by deoptimization. By
 178   // now any callee save register that needs to be restored to a c2
 179   // caller of the deoptee has been extracted into the vframeArray
 180   // and will be stuffed into the c2i adapter we create for later
 181   // restoration so only result registers need to be restored here.
 182 
 183   // Restore fp result register
 184   __ ldrd(v0, Address(sp, v0_offset_in_bytes()));
 185   // Restore integer result register
 186   __ ldr(r0, Address(sp, r0_offset_in_bytes()));
 187 
 188   // Pop all of the register save are off the stack
 189   __ add(sp, sp, round_to(return_offset_in_bytes(), 16));


 530                   L_ok);
 531     const char* msg = "i2c adapter must return to an interpreter frame";
 532     __ block_comment(msg);
 533     __ stop(msg);
 534     __ bind(L_ok);
 535     __ block_comment("} verify_i2ce ");
 536 #endif
 537   }
 538 
 539   // Cut-out for having no stack args.
 540   int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
 541   if (comp_args_on_stack) {
 542     __ sub(rscratch1, sp, comp_words_on_stack * wordSize);
 543     __ andr(sp, rscratch1, -16);
 544   }
 545 
 546   // Will jump to the compiled code just as if compiled code was doing it.
 547   // Pre-load the register-jump target early, to schedule it better.
 548   __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_offset())));
 549 












 550   // Now generate the shuffle code.
 551   for (int i = 0; i < total_args_passed; i++) {
 552     if (sig_bt[i] == T_VOID) {
 553       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 554       continue;
 555     }
 556 
 557     // Pick up 0, 1 or 2 words from SP+offset.
 558 
 559     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 560             "scrambled load targets?");
 561     // Load in argument order going down.
 562     int ld_off = (total_args_passed - i - 1)*Interpreter::stackElementSize;
 563     // Point to interpreter value (vs. tag)
 564     int next_off = ld_off - Interpreter::stackElementSize;
 565     //
 566     //
 567     //
 568     VMReg r_1 = regs[i].first();
 569     VMReg r_2 = regs[i].second();


2220 }
2221 
2222 // this function returns the adjust size (in number of words) to a c2i adapter
2223 // activation for use during deoptimization
2224 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2225   assert(callee_locals >= callee_parameters,
2226           "test and remove; got more parms than locals");
2227   if (callee_locals < callee_parameters)
2228     return 0;                   // No adjustment for negative locals
2229   int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
2230   // diff is counted in stack words
2231   return round_to(diff, 2);
2232 }
2233 
2234 
2235 //------------------------------generate_deopt_blob----------------------------
2236 void SharedRuntime::generate_deopt_blob() {
2237   // Allocate space for the code
2238   ResourceMark rm;
2239   // Setup code generation tools
2240   CodeBuffer buffer("deopt_blob", 2048, 1024);






2241   MacroAssembler* masm = new MacroAssembler(&buffer);
2242   int frame_size_in_words;
2243   OopMap* map = NULL;
2244   OopMapSet *oop_maps = new OopMapSet();
2245 
2246 #ifdef BUILTIN_SIM
2247   AArch64Simulator *simulator;
2248   if (NotifySimulator) {
2249     simulator = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
2250     simulator->notifyCompile(const_cast<char*>("SharedRuntime::deopt_blob"), __ pc());
2251   }
2252 #endif
2253 
2254   // -------------
2255   // This code enters when returning to a de-optimized nmethod.  A return
2256   // address has been pushed on the the stack, and return values are in
2257   // registers.
2258   // If we are doing a normal deopt then we were called from the patched
2259   // nmethod from the point we returned to the nmethod. So the return
2260   // address on the stack is wrong by NativeCall::instruction_size


2277   // The current frame is compiled code and may contain many inlined
2278   // functions, each with their own JVM state.  We pop the current frame, then
2279   // push all the new frames.  Then we call the C routine unpack_frames() to
2280   // populate these frames.  Finally unpack_frames() returns us the new target
2281   // address.  Notice that callee-save registers are BLOWN here; they have
2282   // already been captured in the vframeArray at the time the return PC was
2283   // patched.
2284   address start = __ pc();
2285   Label cont;
2286 
2287   // Prolog for non exception case!
2288 
2289   // Save everything in sight.
2290   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2291 
2292   // Normal deoptimization.  Save exec mode for unpack_frames.
2293   __ movw(rcpool, Deoptimization::Unpack_deopt); // callee-saved
2294   __ b(cont);
2295 
2296   int reexecute_offset = __ pc() - start;






2297 
2298   // Reexecute case
2299   // return address is the pc describes what bci to do re-execute at
2300 
2301   // No need to update map as each call to save_live_registers will produce identical oopmap
2302   (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2303 
2304   __ movw(rcpool, Deoptimization::Unpack_reexecute); // callee-saved
2305   __ b(cont);
2306 






































2307   int exception_offset = __ pc() - start;
2308 
2309   // Prolog for exception case
2310 
2311   // all registers are dead at this entry point, except for r0, and
2312   // r3 which contain the exception oop and exception pc
2313   // respectively.  Set them in TLS and fall thru to the
2314   // unpack_with_exception_in_tls entry point.
2315 
2316   __ str(r3, Address(rthread, JavaThread::exception_pc_offset()));
2317   __ str(r0, Address(rthread, JavaThread::exception_oop_offset()));
2318 
2319   int exception_in_tls_offset = __ pc() - start;
2320 
2321   // new implementation because exception oop is now passed in JavaThread
2322 
2323   // Prolog for exception case
2324   // All registers must be preserved because they might be used by LinearScan
2325   // Exceptiop oop and throwing PC are passed in JavaThread
2326   // tos: stack at point of call to method that threw the exception (i.e. only


2378   { Label L;
2379     __ ldr(rscratch1, Address(rthread,
2380                               JavaThread::last_Java_fp_offset()));
2381     __ cbz(rscratch1, L);
2382     __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
2383     __ bind(L);
2384   }
2385 #endif // ASSERT
2386   __ mov(c_rarg0, rthread);
2387   __ mov(c_rarg1, rcpool);
2388   __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2389   __ blrt(rscratch1, 1, 0, 1);
2390   __ bind(retaddr);
2391 
2392   // Need to have an oopmap that tells fetch_unroll_info where to
2393   // find any register it might need.
2394   oop_maps->add_gc_map(__ pc() - start, map);
2395 
2396   __ reset_last_Java_frame(false, true);
2397 
2398   // Load UnrollBlock* into rdi






2399   __ mov(r5, r0);
2400 
2401   __ ldrw(rcpool, Address(r5, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2402    Label noException;
2403   __ cmpw(rcpool, Deoptimization::Unpack_exception);   // Was exception pending?
2404   __ br(Assembler::NE, noException);
2405   __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset()));
2406   // QQQ this is useless it was NULL above
2407   __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset()));
2408   __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
2409   __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
2410 
2411   __ verify_oop(r0);
2412 
2413   // Overwrite the result registers with the exception results.
2414   __ str(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
2415   // I think this is useless
2416   // __ str(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
2417 
2418   __ bind(noException);


2530   // Clear fp AND pc
2531   __ reset_last_Java_frame(true, true);
2532 
2533   // Collect return values
2534   __ ldrd(v0, Address(sp, RegisterSaver::v0_offset_in_bytes()));
2535   __ ldr(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
2536   // I think this is useless (throwing pc?)
2537   // __ ldr(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
2538 
2539   // Pop self-frame.
2540   __ leave();                           // Epilog
2541 
2542   // Jump to interpreter
2543   __ ret(lr);
2544 
2545   // Make sure all code is generated
2546   masm->flush();
2547 
2548   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
2549   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2550 





2551 #ifdef BUILTIN_SIM
2552   if (NotifySimulator) {
2553     unsigned char *base = _deopt_blob->code_begin();
2554     simulator->notifyRelocate(start, base - start);
2555   }
2556 #endif
2557 }
2558 
2559 uint SharedRuntime::out_preserve_stack_slots() {
2560   return 0;
2561 }
2562 
2563 #ifdef COMPILER2
2564 //------------------------------generate_uncommon_trap_blob--------------------
2565 void SharedRuntime::generate_uncommon_trap_blob() {
2566   // Allocate space for the code
2567   ResourceMark rm;
2568   // Setup code generation tools
2569   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
2570   MacroAssembler* masm = new MacroAssembler(&buffer);
2571 
2572 #ifdef BUILTIN_SIM
2573   AArch64Simulator *simulator;
2574   if (NotifySimulator) {
2575     simulator = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
2576     simulator->notifyCompile(const_cast<char*>("SharedRuntime:uncommon_trap_blob"), __ pc());
2577   }
2578 #endif
2579 
2580   assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
2581 
2582   address start = __ pc();
2583 


2926 
2927   RegisterSaver::restore_live_registers(masm);
2928 
2929   // exception pending => remove activation and forward to exception handler
2930 
2931   __ str(zr, Address(rthread, JavaThread::vm_result_offset()));
2932 
2933   __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
2934   __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2935 
2936   // -------------
2937   // make sure all code is generated
2938   masm->flush();
2939 
2940   // return the  blob
2941   // frame_size_words or bytes??
2942   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
2943 }
2944 
2945 
2946 #ifdef COMPILER2
2947 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
2948 //
2949 //------------------------------generate_exception_blob---------------------------
2950 // creates exception blob at the end
2951 // Using exception blob, this code is jumped from a compiled method.
2952 // (see emit_exception_handler in x86_64.ad file)
2953 //
2954 // Given an exception pc at a call we call into the runtime for the
2955 // handler in this method. This handler might merely restore state
2956 // (i.e. callee save registers) unwind the frame and jump to the
2957 // exception handler for the nmethod if there is no Java level handler
2958 // for the nmethod.
2959 //
2960 // This code is entered with a jmp.
2961 //
2962 // Arguments:
2963 //   r0: exception oop
2964 //   r3: exception pc
2965 //
2966 // Results:




  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.hpp"
  28 #include "asm/macroAssembler.inline.hpp"
  29 #include "code/debugInfoRec.hpp"
  30 #include "code/icBuffer.hpp"
  31 #include "code/vtableStubs.hpp"
  32 #include "interpreter/interpreter.hpp"
  33 #include "interpreter/interp_masm.hpp"
  34 #include "oops/compiledICHolder.hpp"
  35 #include "prims/jvmtiRedefineClassesTrace.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/vframeArray.hpp"
  38 #include "vmreg_aarch64.inline.hpp"
  39 #ifdef COMPILER1
  40 #include "c1/c1_Runtime1.hpp"
  41 #endif
  42 #if defined(COMPILER2) || INCLUDE_JVMCI
  43 #include "adfiles/ad_aarch64.hpp"
  44 #include "opto/runtime.hpp"
  45 #endif
  46 #if INCLUDE_JVMCI
  47 #include "jvmci/jvmciJavaClasses.hpp"
  48 #endif
  49 
  50 #ifdef BUILTIN_SIM
  51 #include "../../../../../../simulator/simulator.hpp"
  52 #endif
  53 
  54 #define __ masm->
  55 
  56 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
  57 
  58 class SimpleRuntimeFrame {
  59 
  60   public:
  61 
  62   // Most of the runtime stubs have this simple frame layout.
  63   // This class exists to make the layout shared in one place.
  64   // Offsets are for compiler stack slots, which are jints.
  65   enum layout {
  66     // The frame sender code expects that rbp will be in the "natural" place and
  67     // will override any oopMap setting for it. We must therefore force the layout
  68     // so that it agrees with the frame sender code.


  95   // During deoptimization only the result registers need to be restored,
  96   // all the other values have already been extracted.
  97   static void restore_result_registers(MacroAssembler* masm);
  98 
  99     // Capture info about frame layout
 100   enum layout {
 101                 fpu_state_off = 0,
 102                 fpu_state_end = fpu_state_off+FPUStateSizeInWords-1,
 103                 // The frame sender code expects that rfp will be in
 104                 // the "natural" place and will override any oopMap
 105                 // setting for it. We must therefore force the layout
 106                 // so that it agrees with the frame sender code.
 107                 r0_off = fpu_state_off+FPUStateSizeInWords,
 108                 rfp_off = r0_off + 30 * 2,
 109                 return_off = rfp_off + 2,      // slot for return address
 110                 reg_save_size = return_off + 2};
 111 
 112 };
 113 
 114 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
 115 #if defined(COMPILER2) || INCLUDE_JVMCI
 116   if (save_vectors) {
 117     // Save upper half of vector registers
 118     int vect_words = 32 * 8 / wordSize;
 119     additional_frame_words += vect_words;
 120   }
 121 #else
 122   assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
 123 #endif
 124 
 125   int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
 126                                      reg_save_size*BytesPerInt, 16);
 127   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
 128   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
 129   // The caller will allocate additional_frame_words
 130   int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
 131   // CodeBlob frame size is in words.
 132   int frame_size_in_words = frame_size_in_bytes / wordSize;
 133   *total_frame_words = frame_size_in_words;
 134 
 135   // Save registers, fpu state, and flags.
 136 
 137   __ enter();
 138   __ push_CPU_state(save_vectors);
 139 
 140   // Set an oopmap for the call site.  This oopmap will map all
 141   // oop-registers and debug-info registers as callee-saved.  This
 142   // will allow deoptimization at this safepoint to find all possible


 152                                     // register slots are 8 bytes
 153                                     // wide, 32 floating-point
 154                                     // registers
 155       oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots),
 156                                 r->as_VMReg());
 157     }
 158   }
 159 
 160   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
 161     FloatRegister r = as_FloatRegister(i);
 162     int sp_offset = save_vectors ? (4 * i) : (2 * i);
 163     oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
 164                               r->as_VMReg());
 165   }
 166 
 167   return oop_map;
 168 }
 169 
 170 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
 171 #ifndef COMPILER2
 172   assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
 173 #endif
 174   __ pop_CPU_state(restore_vectors);
 175   __ leave();
 176 }
 177 
 178 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 179 
 180   // Just restore result register. Only used by deoptimization. By
 181   // now any callee save register that needs to be restored to a c2
 182   // caller of the deoptee has been extracted into the vframeArray
 183   // and will be stuffed into the c2i adapter we create for later
 184   // restoration so only result registers need to be restored here.
 185 
 186   // Restore fp result register
 187   __ ldrd(v0, Address(sp, v0_offset_in_bytes()));
 188   // Restore integer result register
 189   __ ldr(r0, Address(sp, r0_offset_in_bytes()));
 190 
 191   // Pop all of the register save are off the stack
 192   __ add(sp, sp, round_to(return_offset_in_bytes(), 16));


 533                   L_ok);
 534     const char* msg = "i2c adapter must return to an interpreter frame";
 535     __ block_comment(msg);
 536     __ stop(msg);
 537     __ bind(L_ok);
 538     __ block_comment("} verify_i2ce ");
 539 #endif
 540   }
 541 
 542   // Cut-out for having no stack args.
 543   int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
 544   if (comp_args_on_stack) {
 545     __ sub(rscratch1, sp, comp_words_on_stack * wordSize);
 546     __ andr(sp, rscratch1, -16);
 547   }
 548 
 549   // Will jump to the compiled code just as if compiled code was doing it.
 550   // Pre-load the register-jump target early, to schedule it better.
 551   __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_offset())));
 552 
 553 #if INCLUDE_JVMCI
 554   if (EnableJVMCI) {
 555     // check if this call should be routed towards a specific entry point
 556     __ ldr(rscratch2, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
 557     Label no_alternative_target;
 558     __ cbz(rscratch2, no_alternative_target);
 559     __ mov(rscratch1, rscratch2);
 560     __ str(zr, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
 561     __ bind(no_alternative_target);
 562   }
 563 #endif // INCLUDE_JVMCI
 564 
 565   // Now generate the shuffle code.
 566   for (int i = 0; i < total_args_passed; i++) {
 567     if (sig_bt[i] == T_VOID) {
 568       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 569       continue;
 570     }
 571 
 572     // Pick up 0, 1 or 2 words from SP+offset.
 573 
 574     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 575             "scrambled load targets?");
 576     // Load in argument order going down.
 577     int ld_off = (total_args_passed - i - 1)*Interpreter::stackElementSize;
 578     // Point to interpreter value (vs. tag)
 579     int next_off = ld_off - Interpreter::stackElementSize;
 580     //
 581     //
 582     //
 583     VMReg r_1 = regs[i].first();
 584     VMReg r_2 = regs[i].second();


2235 }
2236 
2237 // this function returns the adjust size (in number of words) to a c2i adapter
2238 // activation for use during deoptimization
2239 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2240   assert(callee_locals >= callee_parameters,
2241           "test and remove; got more parms than locals");
2242   if (callee_locals < callee_parameters)
2243     return 0;                   // No adjustment for negative locals
2244   int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
2245   // diff is counted in stack words
2246   return round_to(diff, 2);
2247 }
2248 
2249 
2250 //------------------------------generate_deopt_blob----------------------------
2251 void SharedRuntime::generate_deopt_blob() {
2252   // Allocate space for the code
2253   ResourceMark rm;
2254   // Setup code generation tools
2255   int pad = 0;
2256 #if INCLUDE_JVMCI
2257   if (EnableJVMCI) {
2258     pad += 512; // Increase the buffer size when compiling for JVMCI
2259   }
2260 #endif
2261   CodeBuffer buffer("deopt_blob", 2048+pad, 1024);
2262   MacroAssembler* masm = new MacroAssembler(&buffer);
2263   int frame_size_in_words;
2264   OopMap* map = NULL;
2265   OopMapSet *oop_maps = new OopMapSet();
2266 
2267 #ifdef BUILTIN_SIM
2268   AArch64Simulator *simulator;
2269   if (NotifySimulator) {
2270     simulator = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
2271     simulator->notifyCompile(const_cast<char*>("SharedRuntime::deopt_blob"), __ pc());
2272   }
2273 #endif
2274 
2275   // -------------
2276   // This code enters when returning to a de-optimized nmethod.  A return
2277   // address has been pushed on the the stack, and return values are in
2278   // registers.
2279   // If we are doing a normal deopt then we were called from the patched
2280   // nmethod from the point we returned to the nmethod. So the return
2281   // address on the stack is wrong by NativeCall::instruction_size


2298   // The current frame is compiled code and may contain many inlined
2299   // functions, each with their own JVM state.  We pop the current frame, then
2300   // push all the new frames.  Then we call the C routine unpack_frames() to
2301   // populate these frames.  Finally unpack_frames() returns us the new target
2302   // address.  Notice that callee-save registers are BLOWN here; they have
2303   // already been captured in the vframeArray at the time the return PC was
2304   // patched.
2305   address start = __ pc();
2306   Label cont;
2307 
2308   // Prolog for non exception case!
2309 
2310   // Save everything in sight.
2311   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2312 
2313   // Normal deoptimization.  Save exec mode for unpack_frames.
2314   __ movw(rcpool, Deoptimization::Unpack_deopt); // callee-saved
2315   __ b(cont);
2316 
2317   int reexecute_offset = __ pc() - start;
2318 #if defined(INCLUDE_JVMCI) && !defined(COMPILER1)
2319   if (EnableJVMCI && UseJVMCICompiler) {
2320     // JVMCI does not use this kind of deoptimization
2321     __ should_not_reach_here();
2322   }
2323 #endif
2324 
2325   // Reexecute case
2326   // return address is the pc describes what bci to do re-execute at
2327 
2328   // No need to update map as each call to save_live_registers will produce identical oopmap
2329   (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2330 
2331   __ movw(rcpool, Deoptimization::Unpack_reexecute); // callee-saved
2332   __ b(cont);
2333 
2334 #if INCLUDE_JVMCI
2335   Label after_fetch_unroll_info_call;
2336   int implicit_exception_uncommon_trap_offset = 0;
2337   int uncommon_trap_offset = 0;
2338 
2339   if (EnableJVMCI) {
2340     implicit_exception_uncommon_trap_offset = __ pc() - start;
2341 
2342     __ ldr(lr, Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
2343     __ str(zr, Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
2344 
2345     uncommon_trap_offset = __ pc() - start;
2346 
2347     // Save everything in sight.
2348     RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2349     // fetch_unroll_info needs to call last_java_frame()
2350     Label retaddr;
2351     __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
2352 
2353     __ ldrw(c_rarg1, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset())));
2354     __ movw(rscratch1, -1);
2355     __ strw(rscratch1, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset())));
2356 
2357     __ movw(rcpool, (int32_t)Deoptimization::Unpack_reexecute);
2358     __ mov(c_rarg0, rthread);
2359     __ lea(rscratch1,
2360            RuntimeAddress(CAST_FROM_FN_PTR(address,
2361                                            Deoptimization::uncommon_trap)));
2362     __ blrt(rscratch1, 2, 0, MacroAssembler::ret_type_integral);
2363     __ bind(retaddr);
2364     oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
2365 
2366     __ reset_last_Java_frame(false, false);
2367 
2368     __ b(after_fetch_unroll_info_call);
2369   } // EnableJVMCI
2370 #endif // INCLUDE_JVMCI
2371 
2372   int exception_offset = __ pc() - start;
2373 
2374   // Prolog for exception case
2375 
2376   // all registers are dead at this entry point, except for r0, and
2377   // r3 which contain the exception oop and exception pc
2378   // respectively.  Set them in TLS and fall thru to the
2379   // unpack_with_exception_in_tls entry point.
2380 
2381   __ str(r3, Address(rthread, JavaThread::exception_pc_offset()));
2382   __ str(r0, Address(rthread, JavaThread::exception_oop_offset()));
2383 
2384   int exception_in_tls_offset = __ pc() - start;
2385 
2386   // new implementation because exception oop is now passed in JavaThread
2387 
2388   // Prolog for exception case
2389   // All registers must be preserved because they might be used by LinearScan
2390   // Exceptiop oop and throwing PC are passed in JavaThread
2391   // tos: stack at point of call to method that threw the exception (i.e. only


2443   { Label L;
2444     __ ldr(rscratch1, Address(rthread,
2445                               JavaThread::last_Java_fp_offset()));
2446     __ cbz(rscratch1, L);
2447     __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
2448     __ bind(L);
2449   }
2450 #endif // ASSERT
2451   __ mov(c_rarg0, rthread);
2452   __ mov(c_rarg1, rcpool);
2453   __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2454   __ blrt(rscratch1, 1, 0, 1);
2455   __ bind(retaddr);
2456 
2457   // Need to have an oopmap that tells fetch_unroll_info where to
2458   // find any register it might need.
2459   oop_maps->add_gc_map(__ pc() - start, map);
2460 
2461   __ reset_last_Java_frame(false, true);
2462 
2463 #if INCLUDE_JVMCI
2464   if (EnableJVMCI) {
2465     __ bind(after_fetch_unroll_info_call);
2466   }
2467 #endif
2468 
2469   // Load UnrollBlock* into r5
2470   __ mov(r5, r0);
2471 
2472   __ ldrw(rcpool, Address(r5, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2473    Label noException;
2474   __ cmpw(rcpool, Deoptimization::Unpack_exception);   // Was exception pending?
2475   __ br(Assembler::NE, noException);
2476   __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset()));
2477   // QQQ this is useless it was NULL above
2478   __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset()));
2479   __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
2480   __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
2481 
2482   __ verify_oop(r0);
2483 
2484   // Overwrite the result registers with the exception results.
2485   __ str(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
2486   // I think this is useless
2487   // __ str(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
2488 
2489   __ bind(noException);


2601   // Clear fp AND pc
2602   __ reset_last_Java_frame(true, true);
2603 
2604   // Collect return values
2605   __ ldrd(v0, Address(sp, RegisterSaver::v0_offset_in_bytes()));
2606   __ ldr(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
2607   // I think this is useless (throwing pc?)
2608   // __ ldr(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
2609 
2610   // Pop self-frame.
2611   __ leave();                           // Epilog
2612 
2613   // Jump to interpreter
2614   __ ret(lr);
2615 
2616   // Make sure all code is generated
2617   masm->flush();
2618 
2619   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
2620   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2621 #if INCLUDE_JVMCI
2622   if (EnableJVMCI) {
2623     _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
2624     _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
2625   }
2626 #endif
2627 #ifdef BUILTIN_SIM
2628   if (NotifySimulator) {
2629     unsigned char *base = _deopt_blob->code_begin();
2630     simulator->notifyRelocate(start, base - start);
2631   }
2632 #endif
2633 }
2634 
2635 uint SharedRuntime::out_preserve_stack_slots() {
2636   return 0;
2637 }
2638 
2639 #if defined(COMPILER2) || INCLUDE_JVMCI
2640 //------------------------------generate_uncommon_trap_blob--------------------
2641 void SharedRuntime::generate_uncommon_trap_blob() {
2642   // Allocate space for the code
2643   ResourceMark rm;
2644   // Setup code generation tools
2645   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
2646   MacroAssembler* masm = new MacroAssembler(&buffer);
2647 
2648 #ifdef BUILTIN_SIM
2649   AArch64Simulator *simulator;
2650   if (NotifySimulator) {
2651     simulator = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
2652     simulator->notifyCompile(const_cast<char*>("SharedRuntime:uncommon_trap_blob"), __ pc());
2653   }
2654 #endif
2655 
2656   assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
2657 
2658   address start = __ pc();
2659 


3002 
3003   RegisterSaver::restore_live_registers(masm);
3004 
3005   // exception pending => remove activation and forward to exception handler
3006 
3007   __ str(zr, Address(rthread, JavaThread::vm_result_offset()));
3008 
3009   __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
3010   __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3011 
3012   // -------------
3013   // make sure all code is generated
3014   masm->flush();
3015 
3016   // return the  blob
3017   // frame_size_words or bytes??
3018   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
3019 }
3020 
3021 
3022 #if defined(COMPILER2) || INCLUDE_JVMCI
3023 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
3024 //
3025 //------------------------------generate_exception_blob---------------------------
3026 // creates exception blob at the end
3027 // Using exception blob, this code is jumped from a compiled method.
3028 // (see emit_exception_handler in x86_64.ad file)
3029 //
3030 // Given an exception pc at a call we call into the runtime for the
3031 // handler in this method. This handler might merely restore state
3032 // (i.e. callee save registers) unwind the frame and jump to the
3033 // exception handler for the nmethod if there is no Java level handler
3034 // for the nmethod.
3035 //
3036 // This code is entered with a jmp.
3037 //
3038 // Arguments:
3039 //   r0: exception oop
3040 //   r3: exception pc
3041 //
3042 // Results:


< prev index next >