26 #ifndef _WINDOWS
27 #include "alloca.h"
28 #endif
29 #include "asm/macroAssembler.hpp"
30 #include "asm/macroAssembler.inline.hpp"
31 #include "code/debugInfoRec.hpp"
32 #include "code/icBuffer.hpp"
33 #include "code/vtableStubs.hpp"
34 #include "interpreter/interpreter.hpp"
35 #include "oops/compiledICHolder.hpp"
36 #include "prims/jvmtiRedefineClassesTrace.hpp"
37 #include "runtime/sharedRuntime.hpp"
38 #include "runtime/vframeArray.hpp"
39 #include "vmreg_x86.inline.hpp"
40 #ifdef COMPILER1
41 #include "c1/c1_Runtime1.hpp"
42 #endif
43 #ifdef COMPILER2
44 #include "opto/runtime.hpp"
45 #endif
46
47 #define __ masm->
48
49 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
50
51 class SimpleRuntimeFrame {
52
53 public:
54
55 // Most of the runtime stubs have this simple frame layout.
56 // This class exists to make the layout shared in one place.
57 // Offsets are for compiler stack slots, which are jints.
58 enum layout {
59 // The frame sender code expects that rbp will be in the "natural" place and
60 // will override any oopMap setting for it. We must therefore force the layout
61 // so that it agrees with the frame sender code.
62 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
63 rbp_off2,
64 return_off, return_off2,
65 framesize
88 DEF_XMM_OFFS(12),
89 DEF_XMM_OFFS(13),
90 DEF_XMM_OFFS(14),
91 DEF_XMM_OFFS(15),
92 DEF_XMM_OFFS(16),
93 DEF_XMM_OFFS(17),
94 DEF_XMM_OFFS(18),
95 DEF_XMM_OFFS(19),
96 DEF_XMM_OFFS(20),
97 DEF_XMM_OFFS(21),
98 DEF_XMM_OFFS(22),
99 DEF_XMM_OFFS(23),
100 DEF_XMM_OFFS(24),
101 DEF_XMM_OFFS(25),
102 DEF_XMM_OFFS(26),
103 DEF_XMM_OFFS(27),
104 DEF_XMM_OFFS(28),
105 DEF_XMM_OFFS(29),
106 DEF_XMM_OFFS(30),
107 DEF_XMM_OFFS(31),
108 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords - 1)*wordSize / BytesPerInt),
109 fpu_stateH_end,
110 r15_off, r15H_off,
111 r14_off, r14H_off,
112 r13_off, r13H_off,
113 r12_off, r12H_off,
114 r11_off, r11H_off,
115 r10_off, r10H_off,
116 r9_off, r9H_off,
117 r8_off, r8H_off,
118 rdi_off, rdiH_off,
119 rsi_off, rsiH_off,
120 ignore_off, ignoreH_off, // extra copy of rbp
121 rsp_off, rspH_off,
122 rbx_off, rbxH_off,
123 rdx_off, rdxH_off,
124 rcx_off, rcxH_off,
125 rax_off, raxH_off,
126 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state
127 align_off, alignH_off,
128 flags_off, flagsH_off,
138 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
139 static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
140
141 // Offsets into the register save area
142 // Used by deoptimization when it is managing result register
143 // values on its own
144
145 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; }
146 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; }
147 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; }
148 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; }
149 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
150
151 // During deoptimization only the result registers need to be restored,
152 // all the other values have already been extracted.
153 static void restore_result_registers(MacroAssembler* masm);
154 };
155
156 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
157 int vect_words = 0;
158 int num_xmm_regs = 16;
159 if (UseAVX > 2) {
160 num_xmm_regs = 32;
161 }
162 #ifdef COMPILER2
163 if (save_vectors) {
164 assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
165 assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
166 // Save upper half of YMM registers
167 vect_words = 16 * num_xmm_regs / wordSize;
168 additional_frame_words += vect_words;
169 if (UseAVX > 2) {
170 // Save upper half of ZMM registers as well
171 additional_frame_words += vect_words;
172 }
173 }
174 #else
175 assert(!save_vectors, "vectors are generated only by C2");
176 #endif
177
178 // Always make the frame size 16-byte aligned
179 int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
180 reg_save_size*BytesPerInt, num_xmm_regs);
181 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
182 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
183 // The caller will allocate additional_frame_words
184 int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
185 // CodeBlob frame size is in words.
186 int frame_size_in_words = frame_size_in_bytes / wordSize;
187 *total_frame_words = frame_size_in_words;
188
189 // Save registers, fpu state, and flags.
190 // We assume caller has already pushed the return address onto the
191 // stack, so rsp is 8-byte aligned here.
192 // We push rpb twice in this sequence because we want the real rbp
193 // to be under the return like a normal enter.
194
195 __ enter(); // rsp becomes 16-byte aligned here
265 __ vextractf64x4h(Address(rsp, 896), xmm28);
266 __ vextractf64x4h(Address(rsp, 928), xmm29);
267 __ vextractf64x4h(Address(rsp, 960), xmm30);
268 __ vextractf64x4h(Address(rsp, 992), xmm31);
269 }
270 }
271 if (frame::arg_reg_save_area_bytes != 0) {
272 // Allocate argument register save area
273 __ subptr(rsp, frame::arg_reg_save_area_bytes);
274 }
275
276 // Set an oopmap for the call site. This oopmap will map all
277 // oop-registers and debug-info registers as callee-saved. This
278 // will allow deoptimization at this safepoint to find all possible
279 // debug-info recordings, as well as let GC find all oops.
280
281 OopMapSet *oop_maps = new OopMapSet();
282 OopMap* map = new OopMap(frame_size_in_slots, 0);
283
284 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
285
286 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
287 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
288 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
289 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
290 // rbp location is known implicitly by the frame sender code, needs no oopmap
291 // and the location where rbp was saved by is ignored
292 map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg());
293 map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg());
294 map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg());
295 map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg());
296 map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg());
297 map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg());
298 map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg());
299 map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
300 map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
301 map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
302 map->set_callee_saved(STACK_OFFSET(xmm0_off ), xmm0->as_VMReg());
303 map->set_callee_saved(STACK_OFFSET(xmm1_off ), xmm1->as_VMReg());
304 map->set_callee_saved(STACK_OFFSET(xmm2_off ), xmm2->as_VMReg());
317 map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg());
318 if (UseAVX > 2) {
319 map->set_callee_saved(STACK_OFFSET(xmm16_off), xmm16->as_VMReg());
320 map->set_callee_saved(STACK_OFFSET(xmm17_off), xmm17->as_VMReg());
321 map->set_callee_saved(STACK_OFFSET(xmm18_off), xmm18->as_VMReg());
322 map->set_callee_saved(STACK_OFFSET(xmm19_off), xmm19->as_VMReg());
323 map->set_callee_saved(STACK_OFFSET(xmm20_off), xmm20->as_VMReg());
324 map->set_callee_saved(STACK_OFFSET(xmm21_off), xmm21->as_VMReg());
325 map->set_callee_saved(STACK_OFFSET(xmm22_off), xmm22->as_VMReg());
326 map->set_callee_saved(STACK_OFFSET(xmm23_off), xmm23->as_VMReg());
327 map->set_callee_saved(STACK_OFFSET(xmm24_off), xmm24->as_VMReg());
328 map->set_callee_saved(STACK_OFFSET(xmm25_off), xmm25->as_VMReg());
329 map->set_callee_saved(STACK_OFFSET(xmm26_off), xmm26->as_VMReg());
330 map->set_callee_saved(STACK_OFFSET(xmm27_off), xmm27->as_VMReg());
331 map->set_callee_saved(STACK_OFFSET(xmm28_off), xmm28->as_VMReg());
332 map->set_callee_saved(STACK_OFFSET(xmm29_off), xmm29->as_VMReg());
333 map->set_callee_saved(STACK_OFFSET(xmm30_off), xmm30->as_VMReg());
334 map->set_callee_saved(STACK_OFFSET(xmm31_off), xmm31->as_VMReg());
335 }
336
337 // %%% These should all be a waste but we'll keep things as they were for now
338 if (true) {
339 map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next());
340 map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next());
341 map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next());
342 map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next());
343 // rbp location is known implicitly by the frame sender code, needs no oopmap
344 map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next());
345 map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next());
346 map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next());
347 map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next());
348 map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next());
349 map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next());
350 map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next());
351 map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
352 map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
353 map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
354 map->set_callee_saved(STACK_OFFSET(xmm0H_off ), xmm0->as_VMReg()->next());
355 map->set_callee_saved(STACK_OFFSET(xmm1H_off ), xmm1->as_VMReg()->next());
356 map->set_callee_saved(STACK_OFFSET(xmm2H_off ), xmm2->as_VMReg()->next());
378 map->set_callee_saved(STACK_OFFSET(xmm23H_off), xmm23->as_VMReg()->next());
379 map->set_callee_saved(STACK_OFFSET(xmm24H_off), xmm24->as_VMReg()->next());
380 map->set_callee_saved(STACK_OFFSET(xmm25H_off), xmm25->as_VMReg()->next());
381 map->set_callee_saved(STACK_OFFSET(xmm26H_off), xmm26->as_VMReg()->next());
382 map->set_callee_saved(STACK_OFFSET(xmm27H_off), xmm27->as_VMReg()->next());
383 map->set_callee_saved(STACK_OFFSET(xmm28H_off), xmm28->as_VMReg()->next());
384 map->set_callee_saved(STACK_OFFSET(xmm29H_off), xmm29->as_VMReg()->next());
385 map->set_callee_saved(STACK_OFFSET(xmm30H_off), xmm30->as_VMReg()->next());
386 map->set_callee_saved(STACK_OFFSET(xmm31H_off), xmm31->as_VMReg()->next());
387 }
388 }
389
390 return map;
391 }
392
393 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
394 if (frame::arg_reg_save_area_bytes != 0) {
395 // Pop arg register save area
396 __ addptr(rsp, frame::arg_reg_save_area_bytes);
397 }
398 #ifdef COMPILER2
399 if (restore_vectors) {
400 // Restore upper half of YMM registes (0..15)
401 assert(UseAVX > 0, "512bit vectors are supported only with AVX");
402 assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
403 __ vinsertf128h(xmm0, Address(rsp, 0));
404 __ vinsertf128h(xmm1, Address(rsp, 16));
405 __ vinsertf128h(xmm2, Address(rsp, 32));
406 __ vinsertf128h(xmm3, Address(rsp, 48));
407 __ vinsertf128h(xmm4, Address(rsp, 64));
408 __ vinsertf128h(xmm5, Address(rsp, 80));
409 __ vinsertf128h(xmm6, Address(rsp, 96));
410 __ vinsertf128h(xmm7, Address(rsp,112));
411 __ vinsertf128h(xmm8, Address(rsp,128));
412 __ vinsertf128h(xmm9, Address(rsp,144));
413 __ vinsertf128h(xmm10, Address(rsp,160));
414 __ vinsertf128h(xmm11, Address(rsp,176));
415 __ vinsertf128h(xmm12, Address(rsp,192));
416 __ vinsertf128h(xmm13, Address(rsp,208));
417 __ vinsertf128h(xmm14, Address(rsp,224));
418 __ vinsertf128h(xmm15, Address(rsp,240));
456 __ vinsertf64x4h(xmm16, Address(rsp, 512));
457 __ vinsertf64x4h(xmm17, Address(rsp, 544));
458 __ vinsertf64x4h(xmm18, Address(rsp, 576));
459 __ vinsertf64x4h(xmm19, Address(rsp, 608));
460 __ vinsertf64x4h(xmm20, Address(rsp, 640));
461 __ vinsertf64x4h(xmm21, Address(rsp, 672));
462 __ vinsertf64x4h(xmm22, Address(rsp, 704));
463 __ vinsertf64x4h(xmm23, Address(rsp, 736));
464 __ vinsertf64x4h(xmm24, Address(rsp, 768));
465 __ vinsertf64x4h(xmm25, Address(rsp, 800));
466 __ vinsertf64x4h(xmm26, Address(rsp, 832));
467 __ vinsertf64x4h(xmm27, Address(rsp, 864));
468 __ vinsertf64x4h(xmm28, Address(rsp, 896));
469 __ vinsertf64x4h(xmm29, Address(rsp, 928));
470 __ vinsertf64x4h(xmm30, Address(rsp, 960));
471 __ vinsertf64x4h(xmm31, Address(rsp, 992));
472 __ addptr(rsp, 1024);
473 }
474 }
475 #else
476 assert(!restore_vectors, "vectors are generated only by C2");
477 #endif
478 // Recover CPU state
479 __ pop_CPU_state();
480 // Get the rbp described implicitly by the calling convention (no oopMap)
481 __ pop(rbp);
482 }
483
484 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
485
486 // Just restore result register. Only used by deoptimization. By
487 // now any callee save register that needs to be restored to a c2
488 // caller of the deoptee has been extracted into the vframeArray
489 // and will be stuffed into the c2i adapter we create for later
490 // restoration so only result registers need to be restored here.
491
492 // Restore fp result register
493 __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes()));
494 // Restore integer result register
495 __ movptr(rax, Address(rsp, rax_offset_in_bytes()));
496 __ movptr(rdx, Address(rsp, rdx_offset_in_bytes()));
791 }
792
793 // Schedule the branch target address early.
794 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
795 __ jmp(rcx);
796 }
797
798 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
799 address code_start, address code_end,
800 Label& L_ok) {
801 Label L_fail;
802 __ lea(temp_reg, ExternalAddress(code_start));
803 __ cmpptr(pc_reg, temp_reg);
804 __ jcc(Assembler::belowEqual, L_fail);
805 __ lea(temp_reg, ExternalAddress(code_end));
806 __ cmpptr(pc_reg, temp_reg);
807 __ jcc(Assembler::below, L_ok);
808 __ bind(L_fail);
809 }
810
811 static void gen_i2c_adapter(MacroAssembler *masm,
812 int total_args_passed,
813 int comp_args_on_stack,
814 const BasicType *sig_bt,
815 const VMRegPair *regs) {
816
817 // Note: r13 contains the senderSP on entry. We must preserve it since
818 // we may do a i2c -> c2i transition if we lose a race where compiled
819 // code goes non-entrant while we get args ready.
820 // In addition we use r13 to locate all the interpreter args as
821 // we must align the stack to 16 bytes on an i2c entry else we
822 // lose alignment we expect in all compiled code and register
823 // save code can segv when fxsave instructions find improperly
824 // aligned stack pointer.
825
826 // Adapters can be frameless because they do not require the caller
827 // to perform additional cleanup work, such as correcting the stack pointer.
828 // An i2c adapter is frameless because the *caller* frame, which is interpreted,
829 // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
830 // even if a callee has modified the stack pointer.
831 // A c2i adapter is frameless because the *callee* frame, which is interpreted,
888 comp_words_on_stack = round_to(comp_words_on_stack, 2);
889 __ subptr(rsp, comp_words_on_stack * wordSize);
890 }
891
892
893 // Ensure compiled code always sees stack at proper alignment
894 __ andptr(rsp, -16);
895
896 // push the return address and misalign the stack that youngest frame always sees
897 // as far as the placement of the call instruction
898 __ push(rax);
899
900 // Put saved SP in another register
901 const Register saved_sp = rax;
902 __ movptr(saved_sp, r11);
903
904 // Will jump to the compiled code just as if compiled code was doing it.
905 // Pre-load the register-jump target early, to schedule it better.
906 __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset())));
907
908 // Now generate the shuffle code. Pick up all register args and move the
909 // rest through the floating point stack top.
910 for (int i = 0; i < total_args_passed; i++) {
911 if (sig_bt[i] == T_VOID) {
912 // Longs and doubles are passed in native word order, but misaligned
913 // in the 32-bit build.
914 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
915 continue;
916 }
917
918 // Pick up 0, 1 or 2 words from SP+offset.
919
920 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
921 "scrambled load targets?");
922 // Load in argument order going down.
923 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
924 // Point to interpreter value (vs. tag)
925 int next_off = ld_off - Interpreter::stackElementSize;
926 //
927 //
2821 return nm;
2822
2823 }
2824
2825 // this function returns the adjust size (in number of words) to a c2i adapter
2826 // activation for use during deoptimization
2827 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) {
2828 return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
2829 }
2830
2831
2832 uint SharedRuntime::out_preserve_stack_slots() {
2833 return 0;
2834 }
2835
2836 //------------------------------generate_deopt_blob----------------------------
2837 void SharedRuntime::generate_deopt_blob() {
2838 // Allocate space for the code
2839 ResourceMark rm;
2840 // Setup code generation tools
2841 CodeBuffer buffer("deopt_blob", 2048, 1024);
2842 MacroAssembler* masm = new MacroAssembler(&buffer);
2843 int frame_size_in_words;
2844 OopMap* map = NULL;
2845 OopMapSet *oop_maps = new OopMapSet();
2846
2847 // -------------
2848 // This code enters when returning to a de-optimized nmethod. A return
2849 // address has been pushed on the the stack, and return values are in
2850 // registers.
2851 // If we are doing a normal deopt then we were called from the patched
2852 // nmethod from the point we returned to the nmethod. So the return
2853 // address on the stack is wrong by NativeCall::instruction_size
2854 // We will adjust the value so it looks like we have the original return
2855 // address on the stack (like when we eagerly deoptimized).
2856 // In the case of an exception pending when deoptimizing, we enter
2857 // with a return address on the stack that points after the call we patched
2858 // into the exception handler. We have the following register state from,
2859 // e.g., the forward exception stub (see stubGenerator_x86_64.cpp).
2860 // rax: exception oop
2861 // rbx: exception handler
2870 // The current frame is compiled code and may contain many inlined
2871 // functions, each with their own JVM state. We pop the current frame, then
2872 // push all the new frames. Then we call the C routine unpack_frames() to
2873 // populate these frames. Finally unpack_frames() returns us the new target
2874 // address. Notice that callee-save registers are BLOWN here; they have
2875 // already been captured in the vframeArray at the time the return PC was
2876 // patched.
2877 address start = __ pc();
2878 Label cont;
2879
2880 // Prolog for non exception case!
2881
2882 // Save everything in sight.
2883 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2884
2885 // Normal deoptimization. Save exec mode for unpack_frames.
2886 __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved
2887 __ jmp(cont);
2888
2889 int reexecute_offset = __ pc() - start;
2890
2891 // Reexecute case
2892 // return address is the pc describes what bci to do re-execute at
2893
2894 // No need to update map as each call to save_live_registers will produce identical oopmap
2895 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2896
2897 __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved
2898 __ jmp(cont);
2899
2900 int exception_offset = __ pc() - start;
2901
2902 // Prolog for exception case
2903
2904 // all registers are dead at this entry point, except for rax, and
2905 // rdx which contain the exception oop and exception pc
2906 // respectively. Set them in TLS and fall thru to the
2907 // unpack_with_exception_in_tls entry point.
2908
2909 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx);
2910 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax);
2911
2912 int exception_in_tls_offset = __ pc() - start;
2913
2914 // new implementation because exception oop is now passed in JavaThread
2915
2916 // Prolog for exception case
2917 // All registers must be preserved because they might be used by LinearScan
2918 // Exceptiop oop and throwing PC are passed in JavaThread
2919 // tos: stack at point of call to method that threw the exception (i.e. only
2965 __ set_last_Java_frame(noreg, noreg, NULL);
2966 #ifdef ASSERT
2967 { Label L;
2968 __ cmpptr(Address(r15_thread,
2969 JavaThread::last_Java_fp_offset()),
2970 (int32_t)0);
2971 __ jcc(Assembler::equal, L);
2972 __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
2973 __ bind(L);
2974 }
2975 #endif // ASSERT
2976 __ mov(c_rarg0, r15_thread);
2977 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2978
2979 // Need to have an oopmap that tells fetch_unroll_info where to
2980 // find any register it might need.
2981 oop_maps->add_gc_map(__ pc() - start, map);
2982
2983 __ reset_last_Java_frame(false, false);
2984
2985 // Load UnrollBlock* into rdi
2986 __ mov(rdi, rax);
2987
2988 Label noException;
2989 __ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending?
2990 __ jcc(Assembler::notEqual, noException);
2991 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
2992 // QQQ this is useless it was NULL above
2993 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
2994 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD);
2995 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
2996
2997 __ verify_oop(rax);
2998
2999 // Overwrite the result registers with the exception results.
3000 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
3001 // I think this is useless
3002 __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx);
3003
3004 __ bind(noException);
3139 // Clear fp AND pc
3140 __ reset_last_Java_frame(true, true);
3141
3142 // Collect return values
3143 __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes()));
3144 __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes()));
3145 // I think this is useless (throwing pc?)
3146 __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes()));
3147
3148 // Pop self-frame.
3149 __ leave(); // Epilog
3150
3151 // Jump to interpreter
3152 __ ret(0);
3153
3154 // Make sure all code is generated
3155 masm->flush();
3156
3157 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
3158 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3159 }
3160
3161 #ifdef COMPILER2
3162 //------------------------------generate_uncommon_trap_blob--------------------
3163 void SharedRuntime::generate_uncommon_trap_blob() {
3164 // Allocate space for the code
3165 ResourceMark rm;
3166 // Setup code generation tools
3167 CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
3168 MacroAssembler* masm = new MacroAssembler(&buffer);
3169
3170 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
3171
3172 address start = __ pc();
3173
3174 if (UseRTMLocking) {
3175 // Abort RTM transaction before possible nmethod deoptimization.
3176 __ xabort(0);
3177 }
3178
|
26 #ifndef _WINDOWS
27 #include "alloca.h"
28 #endif
29 #include "asm/macroAssembler.hpp"
30 #include "asm/macroAssembler.inline.hpp"
31 #include "code/debugInfoRec.hpp"
32 #include "code/icBuffer.hpp"
33 #include "code/vtableStubs.hpp"
34 #include "interpreter/interpreter.hpp"
35 #include "oops/compiledICHolder.hpp"
36 #include "prims/jvmtiRedefineClassesTrace.hpp"
37 #include "runtime/sharedRuntime.hpp"
38 #include "runtime/vframeArray.hpp"
39 #include "vmreg_x86.inline.hpp"
40 #ifdef COMPILER1
41 #include "c1/c1_Runtime1.hpp"
42 #endif
43 #ifdef COMPILER2
44 #include "opto/runtime.hpp"
45 #endif
46 #if INCLUDE_JVMCI
47 #include "jvmci/jvmciJavaClasses.hpp"
48 #endif
49
50 #define __ masm->
51
52 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
53
54 class SimpleRuntimeFrame {
55
56 public:
57
58 // Most of the runtime stubs have this simple frame layout.
59 // This class exists to make the layout shared in one place.
60 // Offsets are for compiler stack slots, which are jints.
61 enum layout {
62 // The frame sender code expects that rbp will be in the "natural" place and
63 // will override any oopMap setting for it. We must therefore force the layout
64 // so that it agrees with the frame sender code.
65 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
66 rbp_off2,
67 return_off, return_off2,
68 framesize
91 DEF_XMM_OFFS(12),
92 DEF_XMM_OFFS(13),
93 DEF_XMM_OFFS(14),
94 DEF_XMM_OFFS(15),
95 DEF_XMM_OFFS(16),
96 DEF_XMM_OFFS(17),
97 DEF_XMM_OFFS(18),
98 DEF_XMM_OFFS(19),
99 DEF_XMM_OFFS(20),
100 DEF_XMM_OFFS(21),
101 DEF_XMM_OFFS(22),
102 DEF_XMM_OFFS(23),
103 DEF_XMM_OFFS(24),
104 DEF_XMM_OFFS(25),
105 DEF_XMM_OFFS(26),
106 DEF_XMM_OFFS(27),
107 DEF_XMM_OFFS(28),
108 DEF_XMM_OFFS(29),
109 DEF_XMM_OFFS(30),
110 DEF_XMM_OFFS(31),
111 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
112 fpu_stateH_end,
113 r15_off, r15H_off,
114 r14_off, r14H_off,
115 r13_off, r13H_off,
116 r12_off, r12H_off,
117 r11_off, r11H_off,
118 r10_off, r10H_off,
119 r9_off, r9H_off,
120 r8_off, r8H_off,
121 rdi_off, rdiH_off,
122 rsi_off, rsiH_off,
123 ignore_off, ignoreH_off, // extra copy of rbp
124 rsp_off, rspH_off,
125 rbx_off, rbxH_off,
126 rdx_off, rdxH_off,
127 rcx_off, rcxH_off,
128 rax_off, raxH_off,
129 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state
130 align_off, alignH_off,
131 flags_off, flagsH_off,
141 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
142 static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
143
144 // Offsets into the register save area
145 // Used by deoptimization when it is managing result register
146 // values on its own
147
148 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; }
149 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; }
150 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; }
151 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; }
152 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
153
154 // During deoptimization only the result registers need to be restored,
155 // all the other values have already been extracted.
156 static void restore_result_registers(MacroAssembler* masm);
157 };
158
159 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
160 int vect_words = 0;
161 int ymmhi_offset = -1;
162 int num_xmm_regs = 16;
163 if (UseAVX > 2) {
164 num_xmm_regs = 32;
165 }
166 #if defined(COMPILER2) || INCLUDE_JVMCI
167 if (save_vectors) {
168 assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
169 assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
170 // Save upper half of YMM registers
171 vect_words = 16 * num_xmm_regs / wordSize;
172 ymmhi_offset = additional_frame_words;
173 additional_frame_words += vect_words;
174 if (UseAVX > 2) {
175 // Save upper half of ZMM registers as well
176 additional_frame_words += vect_words;
177 }
178 }
179 #else
180 assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
181 #endif
182
183 // Always make the frame size 16-byte aligned
184 int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
185 reg_save_size*BytesPerInt, num_xmm_regs);
186 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
187 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
188 // The caller will allocate additional_frame_words
189 int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
190 // CodeBlob frame size is in words.
191 int frame_size_in_words = frame_size_in_bytes / wordSize;
192 *total_frame_words = frame_size_in_words;
193
194 // Save registers, fpu state, and flags.
195 // We assume caller has already pushed the return address onto the
196 // stack, so rsp is 8-byte aligned here.
197 // We push rpb twice in this sequence because we want the real rbp
198 // to be under the return like a normal enter.
199
200 __ enter(); // rsp becomes 16-byte aligned here
270 __ vextractf64x4h(Address(rsp, 896), xmm28);
271 __ vextractf64x4h(Address(rsp, 928), xmm29);
272 __ vextractf64x4h(Address(rsp, 960), xmm30);
273 __ vextractf64x4h(Address(rsp, 992), xmm31);
274 }
275 }
276 if (frame::arg_reg_save_area_bytes != 0) {
277 // Allocate argument register save area
278 __ subptr(rsp, frame::arg_reg_save_area_bytes);
279 }
280
281 // Set an oopmap for the call site. This oopmap will map all
282 // oop-registers and debug-info registers as callee-saved. This
283 // will allow deoptimization at this safepoint to find all possible
284 // debug-info recordings, as well as let GC find all oops.
285
286 OopMapSet *oop_maps = new OopMapSet();
287 OopMap* map = new OopMap(frame_size_in_slots, 0);
288
289 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
290 #define YMMHI_STACK_OFFSET(x) VMRegImpl::stack2reg((x / VMRegImpl::stack_slot_size) + ymmhi_offset)
291
292 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
293 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
294 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
295 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
296 // rbp location is known implicitly by the frame sender code, needs no oopmap
297 // and the location where rbp was saved by is ignored
298 map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg());
299 map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg());
300 map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg());
301 map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg());
302 map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg());
303 map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg());
304 map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg());
305 map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
306 map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
307 map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
308 map->set_callee_saved(STACK_OFFSET(xmm0_off ), xmm0->as_VMReg());
309 map->set_callee_saved(STACK_OFFSET(xmm1_off ), xmm1->as_VMReg());
310 map->set_callee_saved(STACK_OFFSET(xmm2_off ), xmm2->as_VMReg());
323 map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg());
324 if (UseAVX > 2) {
325 map->set_callee_saved(STACK_OFFSET(xmm16_off), xmm16->as_VMReg());
326 map->set_callee_saved(STACK_OFFSET(xmm17_off), xmm17->as_VMReg());
327 map->set_callee_saved(STACK_OFFSET(xmm18_off), xmm18->as_VMReg());
328 map->set_callee_saved(STACK_OFFSET(xmm19_off), xmm19->as_VMReg());
329 map->set_callee_saved(STACK_OFFSET(xmm20_off), xmm20->as_VMReg());
330 map->set_callee_saved(STACK_OFFSET(xmm21_off), xmm21->as_VMReg());
331 map->set_callee_saved(STACK_OFFSET(xmm22_off), xmm22->as_VMReg());
332 map->set_callee_saved(STACK_OFFSET(xmm23_off), xmm23->as_VMReg());
333 map->set_callee_saved(STACK_OFFSET(xmm24_off), xmm24->as_VMReg());
334 map->set_callee_saved(STACK_OFFSET(xmm25_off), xmm25->as_VMReg());
335 map->set_callee_saved(STACK_OFFSET(xmm26_off), xmm26->as_VMReg());
336 map->set_callee_saved(STACK_OFFSET(xmm27_off), xmm27->as_VMReg());
337 map->set_callee_saved(STACK_OFFSET(xmm28_off), xmm28->as_VMReg());
338 map->set_callee_saved(STACK_OFFSET(xmm29_off), xmm29->as_VMReg());
339 map->set_callee_saved(STACK_OFFSET(xmm30_off), xmm30->as_VMReg());
340 map->set_callee_saved(STACK_OFFSET(xmm31_off), xmm31->as_VMReg());
341 }
342
343 #if defined(COMPILER2) || INCLUDE_JVMCI
344 if (save_vectors) {
345 assert(ymmhi_offset != -1, "save area must exist");
346 map->set_callee_saved(YMMHI_STACK_OFFSET( 0), xmm0->as_VMReg()->next()->next()->next()->next());
347 map->set_callee_saved(YMMHI_STACK_OFFSET( 16), xmm1->as_VMReg()->next()->next()->next()->next());
348 map->set_callee_saved(YMMHI_STACK_OFFSET( 32), xmm2->as_VMReg()->next()->next()->next()->next());
349 map->set_callee_saved(YMMHI_STACK_OFFSET( 48), xmm3->as_VMReg()->next()->next()->next()->next());
350 map->set_callee_saved(YMMHI_STACK_OFFSET( 64), xmm4->as_VMReg()->next()->next()->next()->next());
351 map->set_callee_saved(YMMHI_STACK_OFFSET( 80), xmm5->as_VMReg()->next()->next()->next()->next());
352 map->set_callee_saved(YMMHI_STACK_OFFSET( 96), xmm6->as_VMReg()->next()->next()->next()->next());
353 map->set_callee_saved(YMMHI_STACK_OFFSET(112), xmm7->as_VMReg()->next()->next()->next()->next());
354 map->set_callee_saved(YMMHI_STACK_OFFSET(128), xmm8->as_VMReg()->next()->next()->next()->next());
355 map->set_callee_saved(YMMHI_STACK_OFFSET(144), xmm9->as_VMReg()->next()->next()->next()->next());
356 map->set_callee_saved(YMMHI_STACK_OFFSET(160), xmm10->as_VMReg()->next()->next()->next()->next());
357 map->set_callee_saved(YMMHI_STACK_OFFSET(176), xmm11->as_VMReg()->next()->next()->next()->next());
358 map->set_callee_saved(YMMHI_STACK_OFFSET(192), xmm12->as_VMReg()->next()->next()->next()->next());
359 map->set_callee_saved(YMMHI_STACK_OFFSET(208), xmm13->as_VMReg()->next()->next()->next()->next());
360 map->set_callee_saved(YMMHI_STACK_OFFSET(224), xmm14->as_VMReg()->next()->next()->next()->next());
361 map->set_callee_saved(YMMHI_STACK_OFFSET(240), xmm15->as_VMReg()->next()->next()->next()->next());
362 }
363 #endif
364
365 // %%% These should all be a waste but we'll keep things as they were for now
366 if (true) {
367 map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next());
368 map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next());
369 map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next());
370 map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next());
371 // rbp location is known implicitly by the frame sender code, needs no oopmap
372 map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next());
373 map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next());
374 map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next());
375 map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next());
376 map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next());
377 map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next());
378 map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next());
379 map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
380 map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
381 map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
382 map->set_callee_saved(STACK_OFFSET(xmm0H_off ), xmm0->as_VMReg()->next());
383 map->set_callee_saved(STACK_OFFSET(xmm1H_off ), xmm1->as_VMReg()->next());
384 map->set_callee_saved(STACK_OFFSET(xmm2H_off ), xmm2->as_VMReg()->next());
406 map->set_callee_saved(STACK_OFFSET(xmm23H_off), xmm23->as_VMReg()->next());
407 map->set_callee_saved(STACK_OFFSET(xmm24H_off), xmm24->as_VMReg()->next());
408 map->set_callee_saved(STACK_OFFSET(xmm25H_off), xmm25->as_VMReg()->next());
409 map->set_callee_saved(STACK_OFFSET(xmm26H_off), xmm26->as_VMReg()->next());
410 map->set_callee_saved(STACK_OFFSET(xmm27H_off), xmm27->as_VMReg()->next());
411 map->set_callee_saved(STACK_OFFSET(xmm28H_off), xmm28->as_VMReg()->next());
412 map->set_callee_saved(STACK_OFFSET(xmm29H_off), xmm29->as_VMReg()->next());
413 map->set_callee_saved(STACK_OFFSET(xmm30H_off), xmm30->as_VMReg()->next());
414 map->set_callee_saved(STACK_OFFSET(xmm31H_off), xmm31->as_VMReg()->next());
415 }
416 }
417
418 return map;
419 }
420
421 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
422 if (frame::arg_reg_save_area_bytes != 0) {
423 // Pop arg register save area
424 __ addptr(rsp, frame::arg_reg_save_area_bytes);
425 }
426 #if defined(COMPILER2) || INCLUDE_JVMCI
427 if (restore_vectors) {
428 // Restore upper half of YMM registes (0..15)
429 assert(UseAVX > 0, "512bit vectors are supported only with AVX");
430 assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
431 __ vinsertf128h(xmm0, Address(rsp, 0));
432 __ vinsertf128h(xmm1, Address(rsp, 16));
433 __ vinsertf128h(xmm2, Address(rsp, 32));
434 __ vinsertf128h(xmm3, Address(rsp, 48));
435 __ vinsertf128h(xmm4, Address(rsp, 64));
436 __ vinsertf128h(xmm5, Address(rsp, 80));
437 __ vinsertf128h(xmm6, Address(rsp, 96));
438 __ vinsertf128h(xmm7, Address(rsp,112));
439 __ vinsertf128h(xmm8, Address(rsp,128));
440 __ vinsertf128h(xmm9, Address(rsp,144));
441 __ vinsertf128h(xmm10, Address(rsp,160));
442 __ vinsertf128h(xmm11, Address(rsp,176));
443 __ vinsertf128h(xmm12, Address(rsp,192));
444 __ vinsertf128h(xmm13, Address(rsp,208));
445 __ vinsertf128h(xmm14, Address(rsp,224));
446 __ vinsertf128h(xmm15, Address(rsp,240));
484 __ vinsertf64x4h(xmm16, Address(rsp, 512));
485 __ vinsertf64x4h(xmm17, Address(rsp, 544));
486 __ vinsertf64x4h(xmm18, Address(rsp, 576));
487 __ vinsertf64x4h(xmm19, Address(rsp, 608));
488 __ vinsertf64x4h(xmm20, Address(rsp, 640));
489 __ vinsertf64x4h(xmm21, Address(rsp, 672));
490 __ vinsertf64x4h(xmm22, Address(rsp, 704));
491 __ vinsertf64x4h(xmm23, Address(rsp, 736));
492 __ vinsertf64x4h(xmm24, Address(rsp, 768));
493 __ vinsertf64x4h(xmm25, Address(rsp, 800));
494 __ vinsertf64x4h(xmm26, Address(rsp, 832));
495 __ vinsertf64x4h(xmm27, Address(rsp, 864));
496 __ vinsertf64x4h(xmm28, Address(rsp, 896));
497 __ vinsertf64x4h(xmm29, Address(rsp, 928));
498 __ vinsertf64x4h(xmm30, Address(rsp, 960));
499 __ vinsertf64x4h(xmm31, Address(rsp, 992));
500 __ addptr(rsp, 1024);
501 }
502 }
503 #else
504 assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
505 #endif
506 // Recover CPU state
507 __ pop_CPU_state();
508 // Get the rbp described implicitly by the calling convention (no oopMap)
509 __ pop(rbp);
510 }
511
512 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
513
514 // Just restore result register. Only used by deoptimization. By
515 // now any callee save register that needs to be restored to a c2
516 // caller of the deoptee has been extracted into the vframeArray
517 // and will be stuffed into the c2i adapter we create for later
518 // restoration so only result registers need to be restored here.
519
520 // Restore fp result register
521 __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes()));
522 // Restore integer result register
523 __ movptr(rax, Address(rsp, rax_offset_in_bytes()));
524 __ movptr(rdx, Address(rsp, rdx_offset_in_bytes()));
819 }
820
821 // Schedule the branch target address early.
822 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
823 __ jmp(rcx);
824 }
825
826 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
827 address code_start, address code_end,
828 Label& L_ok) {
829 Label L_fail;
830 __ lea(temp_reg, ExternalAddress(code_start));
831 __ cmpptr(pc_reg, temp_reg);
832 __ jcc(Assembler::belowEqual, L_fail);
833 __ lea(temp_reg, ExternalAddress(code_end));
834 __ cmpptr(pc_reg, temp_reg);
835 __ jcc(Assembler::below, L_ok);
836 __ bind(L_fail);
837 }
838
839 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
840 int total_args_passed,
841 int comp_args_on_stack,
842 const BasicType *sig_bt,
843 const VMRegPair *regs) {
844
845 // Note: r13 contains the senderSP on entry. We must preserve it since
846 // we may do a i2c -> c2i transition if we lose a race where compiled
847 // code goes non-entrant while we get args ready.
848 // In addition we use r13 to locate all the interpreter args as
849 // we must align the stack to 16 bytes on an i2c entry else we
850 // lose alignment we expect in all compiled code and register
851 // save code can segv when fxsave instructions find improperly
852 // aligned stack pointer.
853
854 // Adapters can be frameless because they do not require the caller
855 // to perform additional cleanup work, such as correcting the stack pointer.
856 // An i2c adapter is frameless because the *caller* frame, which is interpreted,
857 // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
858 // even if a callee has modified the stack pointer.
859 // A c2i adapter is frameless because the *callee* frame, which is interpreted,
916 comp_words_on_stack = round_to(comp_words_on_stack, 2);
917 __ subptr(rsp, comp_words_on_stack * wordSize);
918 }
919
920
921 // Ensure compiled code always sees stack at proper alignment
922 __ andptr(rsp, -16);
923
924 // push the return address and misalign the stack that youngest frame always sees
925 // as far as the placement of the call instruction
926 __ push(rax);
927
928 // Put saved SP in another register
929 const Register saved_sp = rax;
930 __ movptr(saved_sp, r11);
931
932 // Will jump to the compiled code just as if compiled code was doing it.
933 // Pre-load the register-jump target early, to schedule it better.
934 __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset())));
935
936 #if INCLUDE_JVMCI
937 if (EnableJVMCI) {
938 // check if this call should be routed towards a specific entry point
939 __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
940 Label no_alternative_target;
941 __ jcc(Assembler::equal, no_alternative_target);
942 __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
943 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
944 __ bind(no_alternative_target);
945 }
946 #endif
947
948 // Now generate the shuffle code. Pick up all register args and move the
949 // rest through the floating point stack top.
950 for (int i = 0; i < total_args_passed; i++) {
951 if (sig_bt[i] == T_VOID) {
952 // Longs and doubles are passed in native word order, but misaligned
953 // in the 32-bit build.
954 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
955 continue;
956 }
957
958 // Pick up 0, 1 or 2 words from SP+offset.
959
960 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
961 "scrambled load targets?");
962 // Load in argument order going down.
963 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
964 // Point to interpreter value (vs. tag)
965 int next_off = ld_off - Interpreter::stackElementSize;
966 //
967 //
2861 return nm;
2862
2863 }
2864
2865 // this function returns the adjust size (in number of words) to a c2i adapter
2866 // activation for use during deoptimization
2867 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) {
2868 return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
2869 }
2870
2871
2872 uint SharedRuntime::out_preserve_stack_slots() {
2873 return 0;
2874 }
2875
2876 //------------------------------generate_deopt_blob----------------------------
2877 void SharedRuntime::generate_deopt_blob() {
2878 // Allocate space for the code
2879 ResourceMark rm;
2880 // Setup code generation tools
2881 int pad = 0;
2882 #if INCLUDE_JVMCI
2883 if (EnableJVMCI) {
2884 pad += 512; // Increase the buffer size when compiling for JVMCI
2885 }
2886 #endif
2887 CodeBuffer buffer("deopt_blob", 2048+pad, 1024);
2888 MacroAssembler* masm = new MacroAssembler(&buffer);
2889 int frame_size_in_words;
2890 OopMap* map = NULL;
2891 OopMapSet *oop_maps = new OopMapSet();
2892
2893 // -------------
2894 // This code enters when returning to a de-optimized nmethod. A return
2895 // address has been pushed on the the stack, and return values are in
2896 // registers.
2897 // If we are doing a normal deopt then we were called from the patched
2898 // nmethod from the point we returned to the nmethod. So the return
2899 // address on the stack is wrong by NativeCall::instruction_size
2900 // We will adjust the value so it looks like we have the original return
2901 // address on the stack (like when we eagerly deoptimized).
2902 // In the case of an exception pending when deoptimizing, we enter
2903 // with a return address on the stack that points after the call we patched
2904 // into the exception handler. We have the following register state from,
2905 // e.g., the forward exception stub (see stubGenerator_x86_64.cpp).
2906 // rax: exception oop
2907 // rbx: exception handler
2916 // The current frame is compiled code and may contain many inlined
2917 // functions, each with their own JVM state. We pop the current frame, then
2918 // push all the new frames. Then we call the C routine unpack_frames() to
2919 // populate these frames. Finally unpack_frames() returns us the new target
2920 // address. Notice that callee-save registers are BLOWN here; they have
2921 // already been captured in the vframeArray at the time the return PC was
2922 // patched.
2923 address start = __ pc();
2924 Label cont;
2925
2926 // Prolog for non exception case!
2927
2928 // Save everything in sight.
2929 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2930
2931 // Normal deoptimization. Save exec mode for unpack_frames.
2932 __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved
2933 __ jmp(cont);
2934
2935 int reexecute_offset = __ pc() - start;
2936 #if INCLUDE_JVMCI && !defined(COMPILER1)
2937 if (EnableJVMCI && UseJVMCICompiler) {
2938 // JVMCI does not use this kind of deoptimization
2939 __ should_not_reach_here();
2940 }
2941 #endif
2942
2943 // Reexecute case
2944 // return address is the pc describes what bci to do re-execute at
2945
2946 // No need to update map as each call to save_live_registers will produce identical oopmap
2947 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2948
2949 __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved
2950 __ jmp(cont);
2951
2952 #if INCLUDE_JVMCI
2953 Label after_fetch_unroll_info_call;
2954 int implicit_exception_uncommon_trap_offset = 0;
2955 int uncommon_trap_offset = 0;
2956
2957 if (EnableJVMCI) {
2958 implicit_exception_uncommon_trap_offset = __ pc() - start;
2959
2960 __ pushptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
2961 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())), (int32_t)NULL_WORD);
2962
2963 uncommon_trap_offset = __ pc() - start;
2964
2965 // Save everything in sight.
2966 RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2967 // fetch_unroll_info needs to call last_java_frame()
2968 __ set_last_Java_frame(noreg, noreg, NULL);
2969
2970 __ movl(c_rarg1, Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())));
2971 __ movl(Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())), -1);
2972
2973 __ movl(r14, (int32_t)Deoptimization::Unpack_reexecute);
2974 __ mov(c_rarg0, r15_thread);
2975 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
2976 oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
2977
2978 __ reset_last_Java_frame(false, false);
2979
2980 __ jmp(after_fetch_unroll_info_call);
2981 } // EnableJVMCI
2982 #endif // INCLUDE_JVMCI
2983
2984 int exception_offset = __ pc() - start;
2985
2986 // Prolog for exception case
2987
2988 // all registers are dead at this entry point, except for rax, and
2989 // rdx which contain the exception oop and exception pc
2990 // respectively. Set them in TLS and fall thru to the
2991 // unpack_with_exception_in_tls entry point.
2992
2993 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx);
2994 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax);
2995
2996 int exception_in_tls_offset = __ pc() - start;
2997
2998 // new implementation because exception oop is now passed in JavaThread
2999
3000 // Prolog for exception case
3001 // All registers must be preserved because they might be used by LinearScan
3002 // Exceptiop oop and throwing PC are passed in JavaThread
3003 // tos: stack at point of call to method that threw the exception (i.e. only
3049 __ set_last_Java_frame(noreg, noreg, NULL);
3050 #ifdef ASSERT
3051 { Label L;
3052 __ cmpptr(Address(r15_thread,
3053 JavaThread::last_Java_fp_offset()),
3054 (int32_t)0);
3055 __ jcc(Assembler::equal, L);
3056 __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
3057 __ bind(L);
3058 }
3059 #endif // ASSERT
3060 __ mov(c_rarg0, r15_thread);
3061 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
3062
3063 // Need to have an oopmap that tells fetch_unroll_info where to
3064 // find any register it might need.
3065 oop_maps->add_gc_map(__ pc() - start, map);
3066
3067 __ reset_last_Java_frame(false, false);
3068
3069 #if INCLUDE_JVMCI
3070 if (EnableJVMCI) {
3071 __ bind(after_fetch_unroll_info_call);
3072 }
3073 #endif
3074
3075 // Load UnrollBlock* into rdi
3076 __ mov(rdi, rax);
3077
3078 Label noException;
3079 __ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending?
3080 __ jcc(Assembler::notEqual, noException);
3081 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
3082 // QQQ this is useless it was NULL above
3083 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
3084 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD);
3085 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
3086
3087 __ verify_oop(rax);
3088
3089 // Overwrite the result registers with the exception results.
3090 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
3091 // I think this is useless
3092 __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx);
3093
3094 __ bind(noException);
3229 // Clear fp AND pc
3230 __ reset_last_Java_frame(true, true);
3231
3232 // Collect return values
3233 __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes()));
3234 __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes()));
3235 // I think this is useless (throwing pc?)
3236 __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes()));
3237
3238 // Pop self-frame.
3239 __ leave(); // Epilog
3240
3241 // Jump to interpreter
3242 __ ret(0);
3243
3244 // Make sure all code is generated
3245 masm->flush();
3246
3247 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
3248 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3249 #if INCLUDE_JVMCI
3250 if (EnableJVMCI) {
3251 _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
3252 _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
3253 }
3254 #endif
3255 }
3256
3257 #ifdef COMPILER2
3258 //------------------------------generate_uncommon_trap_blob--------------------
3259 void SharedRuntime::generate_uncommon_trap_blob() {
3260 // Allocate space for the code
3261 ResourceMark rm;
3262 // Setup code generation tools
3263 CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
3264 MacroAssembler* masm = new MacroAssembler(&buffer);
3265
3266 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
3267
3268 address start = __ pc();
3269
3270 if (UseRTMLocking) {
3271 // Abort RTM transaction before possible nmethod deoptimization.
3272 __ xabort(0);
3273 }
3274
|