22 * questions.
23 *
24 */
25
26 #include "precompiled.hpp"
27 #include "asm/macroAssembler.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "code/debugInfoRec.hpp"
30 #include "code/icBuffer.hpp"
31 #include "code/vtableStubs.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "interpreter/interp_masm.hpp"
34 #include "oops/compiledICHolder.hpp"
35 #include "prims/jvmtiRedefineClassesTrace.hpp"
36 #include "runtime/sharedRuntime.hpp"
37 #include "runtime/vframeArray.hpp"
38 #include "vmreg_aarch64.inline.hpp"
39 #ifdef COMPILER1
40 #include "c1/c1_Runtime1.hpp"
41 #endif
42 #ifdef COMPILER2
43 #include "adfiles/ad_aarch64.hpp"
44 #include "opto/runtime.hpp"
45 #endif
46
47 #ifdef BUILTIN_SIM
48 #include "../../../../../../simulator/simulator.hpp"
49 #endif
50
51 #define __ masm->
52
53 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
54
55 class SimpleRuntimeFrame {
56
57 public:
58
59 // Most of the runtime stubs have this simple frame layout.
60 // This class exists to make the layout shared in one place.
61 // Offsets are for compiler stack slots, which are jints.
62 enum layout {
63 // The frame sender code expects that rbp will be in the "natural" place and
64 // will override any oopMap setting for it. We must therefore force the layout
65 // so that it agrees with the frame sender code.
92 // During deoptimization only the result registers need to be restored,
93 // all the other values have already been extracted.
94 static void restore_result_registers(MacroAssembler* masm);
95
96 // Capture info about frame layout
97 enum layout {
98 fpu_state_off = 0,
99 fpu_state_end = fpu_state_off+FPUStateSizeInWords-1,
100 // The frame sender code expects that rfp will be in
101 // the "natural" place and will override any oopMap
102 // setting for it. We must therefore force the layout
103 // so that it agrees with the frame sender code.
104 r0_off = fpu_state_off+FPUStateSizeInWords,
105 rfp_off = r0_off + 30 * 2,
106 return_off = rfp_off + 2, // slot for return address
107 reg_save_size = return_off + 2};
108
109 };
110
111 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
112 #ifdef COMPILER2
113 if (save_vectors) {
114 // Save upper half of vector registers
115 int vect_words = 32 * 8 / wordSize;
116 additional_frame_words += vect_words;
117 }
118 #else
119 assert(!save_vectors, "vectors are generated only by C2");
120 #endif
121
122 int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
123 reg_save_size*BytesPerInt, 16);
124 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
125 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
126 // The caller will allocate additional_frame_words
127 int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
128 // CodeBlob frame size is in words.
129 int frame_size_in_words = frame_size_in_bytes / wordSize;
130 *total_frame_words = frame_size_in_words;
131
132 // Save registers, fpu state, and flags.
133
134 __ enter();
135 __ push_CPU_state(save_vectors);
136
137 // Set an oopmap for the call site. This oopmap will map all
138 // oop-registers and debug-info registers as callee-saved. This
139 // will allow deoptimization at this safepoint to find all possible
149 // register slots are 8 bytes
150 // wide, 32 floating-point
151 // registers
152 oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots),
153 r->as_VMReg());
154 }
155 }
156
157 for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
158 FloatRegister r = as_FloatRegister(i);
159 int sp_offset = save_vectors ? (4 * i) : (2 * i);
160 oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
161 r->as_VMReg());
162 }
163
164 return oop_map;
165 }
166
167 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
168 #ifndef COMPILER2
169 assert(!restore_vectors, "vectors are generated only by C2");
170 #endif
171 __ pop_CPU_state(restore_vectors);
172 __ leave();
173 }
174
175 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
176
177 // Just restore result register. Only used by deoptimization. By
178 // now any callee save register that needs to be restored to a c2
179 // caller of the deoptee has been extracted into the vframeArray
180 // and will be stuffed into the c2i adapter we create for later
181 // restoration so only result registers need to be restored here.
182
183 // Restore fp result register
184 __ ldrd(v0, Address(sp, v0_offset_in_bytes()));
185 // Restore integer result register
186 __ ldr(r0, Address(sp, r0_offset_in_bytes()));
187
188 // Pop all of the register save are off the stack
189 __ add(sp, sp, round_to(return_offset_in_bytes(), 16));
530 L_ok);
531 const char* msg = "i2c adapter must return to an interpreter frame";
532 __ block_comment(msg);
533 __ stop(msg);
534 __ bind(L_ok);
535 __ block_comment("} verify_i2ce ");
536 #endif
537 }
538
539 // Cut-out for having no stack args.
540 int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
541 if (comp_args_on_stack) {
542 __ sub(rscratch1, sp, comp_words_on_stack * wordSize);
543 __ andr(sp, rscratch1, -16);
544 }
545
546 // Will jump to the compiled code just as if compiled code was doing it.
547 // Pre-load the register-jump target early, to schedule it better.
548 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_offset())));
549
550 // Now generate the shuffle code.
551 for (int i = 0; i < total_args_passed; i++) {
552 if (sig_bt[i] == T_VOID) {
553 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
554 continue;
555 }
556
557 // Pick up 0, 1 or 2 words from SP+offset.
558
559 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
560 "scrambled load targets?");
561 // Load in argument order going down.
562 int ld_off = (total_args_passed - i - 1)*Interpreter::stackElementSize;
563 // Point to interpreter value (vs. tag)
564 int next_off = ld_off - Interpreter::stackElementSize;
565 //
566 //
567 //
568 VMReg r_1 = regs[i].first();
569 VMReg r_2 = regs[i].second();
2220 }
2221
2222 // this function returns the adjust size (in number of words) to a c2i adapter
2223 // activation for use during deoptimization
2224 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2225 assert(callee_locals >= callee_parameters,
2226 "test and remove; got more parms than locals");
2227 if (callee_locals < callee_parameters)
2228 return 0; // No adjustment for negative locals
2229 int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
2230 // diff is counted in stack words
2231 return round_to(diff, 2);
2232 }
2233
2234
2235 //------------------------------generate_deopt_blob----------------------------
2236 void SharedRuntime::generate_deopt_blob() {
2237 // Allocate space for the code
2238 ResourceMark rm;
2239 // Setup code generation tools
2240 CodeBuffer buffer("deopt_blob", 2048, 1024);
2241 MacroAssembler* masm = new MacroAssembler(&buffer);
2242 int frame_size_in_words;
2243 OopMap* map = NULL;
2244 OopMapSet *oop_maps = new OopMapSet();
2245
2246 #ifdef BUILTIN_SIM
2247 AArch64Simulator *simulator;
2248 if (NotifySimulator) {
2249 simulator = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
2250 simulator->notifyCompile(const_cast<char*>("SharedRuntime::deopt_blob"), __ pc());
2251 }
2252 #endif
2253
2254 // -------------
2255 // This code enters when returning to a de-optimized nmethod. A return
2256 // address has been pushed on the the stack, and return values are in
2257 // registers.
2258 // If we are doing a normal deopt then we were called from the patched
2259 // nmethod from the point we returned to the nmethod. So the return
2260 // address on the stack is wrong by NativeCall::instruction_size
2277 // The current frame is compiled code and may contain many inlined
2278 // functions, each with their own JVM state. We pop the current frame, then
2279 // push all the new frames. Then we call the C routine unpack_frames() to
2280 // populate these frames. Finally unpack_frames() returns us the new target
2281 // address. Notice that callee-save registers are BLOWN here; they have
2282 // already been captured in the vframeArray at the time the return PC was
2283 // patched.
2284 address start = __ pc();
2285 Label cont;
2286
2287 // Prolog for non exception case!
2288
2289 // Save everything in sight.
2290 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2291
2292 // Normal deoptimization. Save exec mode for unpack_frames.
2293 __ movw(rcpool, Deoptimization::Unpack_deopt); // callee-saved
2294 __ b(cont);
2295
2296 int reexecute_offset = __ pc() - start;
2297
2298 // Reexecute case
2299 // return address is the pc describes what bci to do re-execute at
2300
2301 // No need to update map as each call to save_live_registers will produce identical oopmap
2302 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2303
2304 __ movw(rcpool, Deoptimization::Unpack_reexecute); // callee-saved
2305 __ b(cont);
2306
2307 int exception_offset = __ pc() - start;
2308
2309 // Prolog for exception case
2310
2311 // all registers are dead at this entry point, except for r0, and
2312 // r3 which contain the exception oop and exception pc
2313 // respectively. Set them in TLS and fall thru to the
2314 // unpack_with_exception_in_tls entry point.
2315
2316 __ str(r3, Address(rthread, JavaThread::exception_pc_offset()));
2317 __ str(r0, Address(rthread, JavaThread::exception_oop_offset()));
2318
2319 int exception_in_tls_offset = __ pc() - start;
2320
2321 // new implementation because exception oop is now passed in JavaThread
2322
2323 // Prolog for exception case
2324 // All registers must be preserved because they might be used by LinearScan
2325 // Exceptiop oop and throwing PC are passed in JavaThread
2326 // tos: stack at point of call to method that threw the exception (i.e. only
2378 { Label L;
2379 __ ldr(rscratch1, Address(rthread,
2380 JavaThread::last_Java_fp_offset()));
2381 __ cbz(rscratch1, L);
2382 __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
2383 __ bind(L);
2384 }
2385 #endif // ASSERT
2386 __ mov(c_rarg0, rthread);
2387 __ mov(c_rarg1, rcpool);
2388 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2389 __ blrt(rscratch1, 1, 0, 1);
2390 __ bind(retaddr);
2391
2392 // Need to have an oopmap that tells fetch_unroll_info where to
2393 // find any register it might need.
2394 oop_maps->add_gc_map(__ pc() - start, map);
2395
2396 __ reset_last_Java_frame(false, true);
2397
2398 // Load UnrollBlock* into rdi
2399 __ mov(r5, r0);
2400
2401 __ ldrw(rcpool, Address(r5, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2402 Label noException;
2403 __ cmpw(rcpool, Deoptimization::Unpack_exception); // Was exception pending?
2404 __ br(Assembler::NE, noException);
2405 __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset()));
2406 // QQQ this is useless it was NULL above
2407 __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset()));
2408 __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
2409 __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
2410
2411 __ verify_oop(r0);
2412
2413 // Overwrite the result registers with the exception results.
2414 __ str(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
2415 // I think this is useless
2416 // __ str(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
2417
2418 __ bind(noException);
2530 // Clear fp AND pc
2531 __ reset_last_Java_frame(true, true);
2532
2533 // Collect return values
2534 __ ldrd(v0, Address(sp, RegisterSaver::v0_offset_in_bytes()));
2535 __ ldr(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
2536 // I think this is useless (throwing pc?)
2537 // __ ldr(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
2538
2539 // Pop self-frame.
2540 __ leave(); // Epilog
2541
2542 // Jump to interpreter
2543 __ ret(lr);
2544
2545 // Make sure all code is generated
2546 masm->flush();
2547
2548 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
2549 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2550
2551 #ifdef BUILTIN_SIM
2552 if (NotifySimulator) {
2553 unsigned char *base = _deopt_blob->code_begin();
2554 simulator->notifyRelocate(start, base - start);
2555 }
2556 #endif
2557 }
2558
2559 uint SharedRuntime::out_preserve_stack_slots() {
2560 return 0;
2561 }
2562
2563 #ifdef COMPILER2
2564 //------------------------------generate_uncommon_trap_blob--------------------
2565 void SharedRuntime::generate_uncommon_trap_blob() {
2566 // Allocate space for the code
2567 ResourceMark rm;
2568 // Setup code generation tools
2569 CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
2570 MacroAssembler* masm = new MacroAssembler(&buffer);
2571
2572 #ifdef BUILTIN_SIM
2573 AArch64Simulator *simulator;
2574 if (NotifySimulator) {
2575 simulator = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
2576 simulator->notifyCompile(const_cast<char*>("SharedRuntime:uncommon_trap_blob"), __ pc());
2577 }
2578 #endif
2579
2580 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
2581
2582 address start = __ pc();
2583
2926
2927 RegisterSaver::restore_live_registers(masm);
2928
2929 // exception pending => remove activation and forward to exception handler
2930
2931 __ str(zr, Address(rthread, JavaThread::vm_result_offset()));
2932
2933 __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
2934 __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2935
2936 // -------------
2937 // make sure all code is generated
2938 masm->flush();
2939
2940 // return the blob
2941 // frame_size_words or bytes??
2942 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
2943 }
2944
2945
2946 #ifdef COMPILER2
2947 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
2948 //
2949 //------------------------------generate_exception_blob---------------------------
2950 // creates exception blob at the end
2951 // Using exception blob, this code is jumped from a compiled method.
2952 // (see emit_exception_handler in x86_64.ad file)
2953 //
2954 // Given an exception pc at a call we call into the runtime for the
2955 // handler in this method. This handler might merely restore state
2956 // (i.e. callee save registers) unwind the frame and jump to the
2957 // exception handler for the nmethod if there is no Java level handler
2958 // for the nmethod.
2959 //
2960 // This code is entered with a jmp.
2961 //
2962 // Arguments:
2963 // r0: exception oop
2964 // r3: exception pc
2965 //
2966 // Results:
|
22 * questions.
23 *
24 */
25
26 #include "precompiled.hpp"
27 #include "asm/macroAssembler.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "code/debugInfoRec.hpp"
30 #include "code/icBuffer.hpp"
31 #include "code/vtableStubs.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "interpreter/interp_masm.hpp"
34 #include "oops/compiledICHolder.hpp"
35 #include "prims/jvmtiRedefineClassesTrace.hpp"
36 #include "runtime/sharedRuntime.hpp"
37 #include "runtime/vframeArray.hpp"
38 #include "vmreg_aarch64.inline.hpp"
39 #ifdef COMPILER1
40 #include "c1/c1_Runtime1.hpp"
41 #endif
42 #if defined(COMPILER2) || INCLUDE_JVMCI
43 #include "adfiles/ad_aarch64.hpp"
44 #include "opto/runtime.hpp"
45 #endif
46 #if INCLUDE_JVMCI
47 #include "jvmci/jvmciJavaClasses.hpp"
48 #endif
49
50 #ifdef BUILTIN_SIM
51 #include "../../../../../../simulator/simulator.hpp"
52 #endif
53
54 #define __ masm->
55
56 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
57
58 class SimpleRuntimeFrame {
59
60 public:
61
62 // Most of the runtime stubs have this simple frame layout.
63 // This class exists to make the layout shared in one place.
64 // Offsets are for compiler stack slots, which are jints.
65 enum layout {
66 // The frame sender code expects that rbp will be in the "natural" place and
67 // will override any oopMap setting for it. We must therefore force the layout
68 // so that it agrees with the frame sender code.
95 // During deoptimization only the result registers need to be restored,
96 // all the other values have already been extracted.
97 static void restore_result_registers(MacroAssembler* masm);
98
99 // Capture info about frame layout
100 enum layout {
101 fpu_state_off = 0,
102 fpu_state_end = fpu_state_off+FPUStateSizeInWords-1,
103 // The frame sender code expects that rfp will be in
104 // the "natural" place and will override any oopMap
105 // setting for it. We must therefore force the layout
106 // so that it agrees with the frame sender code.
107 r0_off = fpu_state_off+FPUStateSizeInWords,
108 rfp_off = r0_off + 30 * 2,
109 return_off = rfp_off + 2, // slot for return address
110 reg_save_size = return_off + 2};
111
112 };
113
114 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
115 #if defined(COMPILER2) || INCLUDE_JVMCI
116 if (save_vectors) {
117 // Save upper half of vector registers
118 int vect_words = 32 * 8 / wordSize;
119 additional_frame_words += vect_words;
120 }
121 #else
122 assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
123 #endif
124
125 int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
126 reg_save_size*BytesPerInt, 16);
127 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
128 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
129 // The caller will allocate additional_frame_words
130 int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
131 // CodeBlob frame size is in words.
132 int frame_size_in_words = frame_size_in_bytes / wordSize;
133 *total_frame_words = frame_size_in_words;
134
135 // Save registers, fpu state, and flags.
136
137 __ enter();
138 __ push_CPU_state(save_vectors);
139
140 // Set an oopmap for the call site. This oopmap will map all
141 // oop-registers and debug-info registers as callee-saved. This
142 // will allow deoptimization at this safepoint to find all possible
152 // register slots are 8 bytes
153 // wide, 32 floating-point
154 // registers
155 oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots),
156 r->as_VMReg());
157 }
158 }
159
160 for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
161 FloatRegister r = as_FloatRegister(i);
162 int sp_offset = save_vectors ? (4 * i) : (2 * i);
163 oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
164 r->as_VMReg());
165 }
166
167 return oop_map;
168 }
169
170 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
171 #ifndef COMPILER2
172 assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
173 #endif
174 __ pop_CPU_state(restore_vectors);
175 __ leave();
176 }
177
178 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
179
180 // Just restore result register. Only used by deoptimization. By
181 // now any callee save register that needs to be restored to a c2
182 // caller of the deoptee has been extracted into the vframeArray
183 // and will be stuffed into the c2i adapter we create for later
184 // restoration so only result registers need to be restored here.
185
186 // Restore fp result register
187 __ ldrd(v0, Address(sp, v0_offset_in_bytes()));
188 // Restore integer result register
189 __ ldr(r0, Address(sp, r0_offset_in_bytes()));
190
191 // Pop all of the register save are off the stack
192 __ add(sp, sp, round_to(return_offset_in_bytes(), 16));
533 L_ok);
534 const char* msg = "i2c adapter must return to an interpreter frame";
535 __ block_comment(msg);
536 __ stop(msg);
537 __ bind(L_ok);
538 __ block_comment("} verify_i2ce ");
539 #endif
540 }
541
542 // Cut-out for having no stack args.
543 int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
544 if (comp_args_on_stack) {
545 __ sub(rscratch1, sp, comp_words_on_stack * wordSize);
546 __ andr(sp, rscratch1, -16);
547 }
548
549 // Will jump to the compiled code just as if compiled code was doing it.
550 // Pre-load the register-jump target early, to schedule it better.
551 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_offset())));
552
553 #if INCLUDE_JVMCI
554 if (EnableJVMCI) {
555 // check if this call should be routed towards a specific entry point
556 __ ldr(rscratch2, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
557 Label no_alternative_target;
558 __ cbz(rscratch2, no_alternative_target);
559 __ mov(rscratch1, rscratch2);
560 __ str(zr, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
561 __ bind(no_alternative_target);
562 }
563 #endif // INCLUDE_JVMCI
564
565 // Now generate the shuffle code.
566 for (int i = 0; i < total_args_passed; i++) {
567 if (sig_bt[i] == T_VOID) {
568 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
569 continue;
570 }
571
572 // Pick up 0, 1 or 2 words from SP+offset.
573
574 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
575 "scrambled load targets?");
576 // Load in argument order going down.
577 int ld_off = (total_args_passed - i - 1)*Interpreter::stackElementSize;
578 // Point to interpreter value (vs. tag)
579 int next_off = ld_off - Interpreter::stackElementSize;
580 //
581 //
582 //
583 VMReg r_1 = regs[i].first();
584 VMReg r_2 = regs[i].second();
2235 }
2236
2237 // this function returns the adjust size (in number of words) to a c2i adapter
2238 // activation for use during deoptimization
2239 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2240 assert(callee_locals >= callee_parameters,
2241 "test and remove; got more parms than locals");
2242 if (callee_locals < callee_parameters)
2243 return 0; // No adjustment for negative locals
2244 int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
2245 // diff is counted in stack words
2246 return round_to(diff, 2);
2247 }
2248
2249
2250 //------------------------------generate_deopt_blob----------------------------
2251 void SharedRuntime::generate_deopt_blob() {
2252 // Allocate space for the code
2253 ResourceMark rm;
2254 // Setup code generation tools
2255 int pad = 0;
2256 #if INCLUDE_JVMCI
2257 if (EnableJVMCI) {
2258 pad += 512; // Increase the buffer size when compiling for JVMCI
2259 }
2260 #endif
2261 CodeBuffer buffer("deopt_blob", 2048+pad, 1024);
2262 MacroAssembler* masm = new MacroAssembler(&buffer);
2263 int frame_size_in_words;
2264 OopMap* map = NULL;
2265 OopMapSet *oop_maps = new OopMapSet();
2266
2267 #ifdef BUILTIN_SIM
2268 AArch64Simulator *simulator;
2269 if (NotifySimulator) {
2270 simulator = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
2271 simulator->notifyCompile(const_cast<char*>("SharedRuntime::deopt_blob"), __ pc());
2272 }
2273 #endif
2274
2275 // -------------
2276 // This code enters when returning to a de-optimized nmethod. A return
2277 // address has been pushed on the the stack, and return values are in
2278 // registers.
2279 // If we are doing a normal deopt then we were called from the patched
2280 // nmethod from the point we returned to the nmethod. So the return
2281 // address on the stack is wrong by NativeCall::instruction_size
2298 // The current frame is compiled code and may contain many inlined
2299 // functions, each with their own JVM state. We pop the current frame, then
2300 // push all the new frames. Then we call the C routine unpack_frames() to
2301 // populate these frames. Finally unpack_frames() returns us the new target
2302 // address. Notice that callee-save registers are BLOWN here; they have
2303 // already been captured in the vframeArray at the time the return PC was
2304 // patched.
2305 address start = __ pc();
2306 Label cont;
2307
2308 // Prolog for non exception case!
2309
2310 // Save everything in sight.
2311 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2312
2313 // Normal deoptimization. Save exec mode for unpack_frames.
2314 __ movw(rcpool, Deoptimization::Unpack_deopt); // callee-saved
2315 __ b(cont);
2316
2317 int reexecute_offset = __ pc() - start;
2318 #if defined(INCLUDE_JVMCI) && !defined(COMPILER1)
2319 if (EnableJVMCI && UseJVMCICompiler) {
2320 // JVMCI does not use this kind of deoptimization
2321 __ should_not_reach_here();
2322 }
2323 #endif
2324
2325 // Reexecute case
2326 // return address is the pc describes what bci to do re-execute at
2327
2328 // No need to update map as each call to save_live_registers will produce identical oopmap
2329 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2330
2331 __ movw(rcpool, Deoptimization::Unpack_reexecute); // callee-saved
2332 __ b(cont);
2333
2334 #if INCLUDE_JVMCI
2335 Label after_fetch_unroll_info_call;
2336 int implicit_exception_uncommon_trap_offset = 0;
2337 int uncommon_trap_offset = 0;
2338
2339 if (EnableJVMCI) {
2340 implicit_exception_uncommon_trap_offset = __ pc() - start;
2341
2342 __ ldr(lr, Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
2343 __ str(zr, Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
2344
2345 uncommon_trap_offset = __ pc() - start;
2346
2347 // Save everything in sight.
2348 RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2349 // fetch_unroll_info needs to call last_java_frame()
2350 Label retaddr;
2351 __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
2352
2353 __ ldrw(c_rarg1, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset())));
2354 __ movw(rscratch1, -1);
2355 __ strw(rscratch1, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset())));
2356
2357 __ movw(rcpool, (int32_t)Deoptimization::Unpack_reexecute);
2358 __ mov(c_rarg0, rthread);
2359 __ lea(rscratch1,
2360 RuntimeAddress(CAST_FROM_FN_PTR(address,
2361 Deoptimization::uncommon_trap)));
2362 __ blrt(rscratch1, 2, 0, MacroAssembler::ret_type_integral);
2363 __ bind(retaddr);
2364 oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
2365
2366 __ reset_last_Java_frame(false, false);
2367
2368 __ b(after_fetch_unroll_info_call);
2369 } // EnableJVMCI
2370 #endif // INCLUDE_JVMCI
2371
2372 int exception_offset = __ pc() - start;
2373
2374 // Prolog for exception case
2375
2376 // all registers are dead at this entry point, except for r0, and
2377 // r3 which contain the exception oop and exception pc
2378 // respectively. Set them in TLS and fall thru to the
2379 // unpack_with_exception_in_tls entry point.
2380
2381 __ str(r3, Address(rthread, JavaThread::exception_pc_offset()));
2382 __ str(r0, Address(rthread, JavaThread::exception_oop_offset()));
2383
2384 int exception_in_tls_offset = __ pc() - start;
2385
2386 // new implementation because exception oop is now passed in JavaThread
2387
2388 // Prolog for exception case
2389 // All registers must be preserved because they might be used by LinearScan
2390 // Exceptiop oop and throwing PC are passed in JavaThread
2391 // tos: stack at point of call to method that threw the exception (i.e. only
2443 { Label L;
2444 __ ldr(rscratch1, Address(rthread,
2445 JavaThread::last_Java_fp_offset()));
2446 __ cbz(rscratch1, L);
2447 __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
2448 __ bind(L);
2449 }
2450 #endif // ASSERT
2451 __ mov(c_rarg0, rthread);
2452 __ mov(c_rarg1, rcpool);
2453 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2454 __ blrt(rscratch1, 1, 0, 1);
2455 __ bind(retaddr);
2456
2457 // Need to have an oopmap that tells fetch_unroll_info where to
2458 // find any register it might need.
2459 oop_maps->add_gc_map(__ pc() - start, map);
2460
2461 __ reset_last_Java_frame(false, true);
2462
2463 #if INCLUDE_JVMCI
2464 if (EnableJVMCI) {
2465 __ bind(after_fetch_unroll_info_call);
2466 }
2467 #endif
2468
2469 // Load UnrollBlock* into r5
2470 __ mov(r5, r0);
2471
2472 __ ldrw(rcpool, Address(r5, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2473 Label noException;
2474 __ cmpw(rcpool, Deoptimization::Unpack_exception); // Was exception pending?
2475 __ br(Assembler::NE, noException);
2476 __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset()));
2477 // QQQ this is useless it was NULL above
2478 __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset()));
2479 __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
2480 __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
2481
2482 __ verify_oop(r0);
2483
2484 // Overwrite the result registers with the exception results.
2485 __ str(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
2486 // I think this is useless
2487 // __ str(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
2488
2489 __ bind(noException);
2601 // Clear fp AND pc
2602 __ reset_last_Java_frame(true, true);
2603
2604 // Collect return values
2605 __ ldrd(v0, Address(sp, RegisterSaver::v0_offset_in_bytes()));
2606 __ ldr(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
2607 // I think this is useless (throwing pc?)
2608 // __ ldr(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
2609
2610 // Pop self-frame.
2611 __ leave(); // Epilog
2612
2613 // Jump to interpreter
2614 __ ret(lr);
2615
2616 // Make sure all code is generated
2617 masm->flush();
2618
2619 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
2620 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2621 #if INCLUDE_JVMCI
2622 if (EnableJVMCI) {
2623 _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
2624 _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
2625 }
2626 #endif
2627 #ifdef BUILTIN_SIM
2628 if (NotifySimulator) {
2629 unsigned char *base = _deopt_blob->code_begin();
2630 simulator->notifyRelocate(start, base - start);
2631 }
2632 #endif
2633 }
2634
2635 uint SharedRuntime::out_preserve_stack_slots() {
2636 return 0;
2637 }
2638
2639 #if defined(COMPILER2) || INCLUDE_JVMCI
2640 //------------------------------generate_uncommon_trap_blob--------------------
2641 void SharedRuntime::generate_uncommon_trap_blob() {
2642 // Allocate space for the code
2643 ResourceMark rm;
2644 // Setup code generation tools
2645 CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
2646 MacroAssembler* masm = new MacroAssembler(&buffer);
2647
2648 #ifdef BUILTIN_SIM
2649 AArch64Simulator *simulator;
2650 if (NotifySimulator) {
2651 simulator = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
2652 simulator->notifyCompile(const_cast<char*>("SharedRuntime:uncommon_trap_blob"), __ pc());
2653 }
2654 #endif
2655
2656 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
2657
2658 address start = __ pc();
2659
3002
3003 RegisterSaver::restore_live_registers(masm);
3004
3005 // exception pending => remove activation and forward to exception handler
3006
3007 __ str(zr, Address(rthread, JavaThread::vm_result_offset()));
3008
3009 __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
3010 __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3011
3012 // -------------
3013 // make sure all code is generated
3014 masm->flush();
3015
3016 // return the blob
3017 // frame_size_words or bytes??
3018 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
3019 }
3020
3021
3022 #if defined(COMPILER2) || INCLUDE_JVMCI
3023 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
3024 //
3025 //------------------------------generate_exception_blob---------------------------
3026 // creates exception blob at the end
3027 // Using exception blob, this code is jumped from a compiled method.
3028 // (see emit_exception_handler in x86_64.ad file)
3029 //
3030 // Given an exception pc at a call we call into the runtime for the
3031 // handler in this method. This handler might merely restore state
3032 // (i.e. callee save registers) unwind the frame and jump to the
3033 // exception handler for the nmethod if there is no Java level handler
3034 // for the nmethod.
3035 //
3036 // This code is entered with a jmp.
3037 //
3038 // Arguments:
3039 // r0: exception oop
3040 // r3: exception pc
3041 //
3042 // Results:
|