30 #include "asm/macroAssembler.inline.hpp"
31 #include "code/debugInfoRec.hpp"
32 #include "code/icBuffer.hpp"
33 #include "code/vtableStubs.hpp"
34 #include "interpreter/interpreter.hpp"
35 #include "logging/log.hpp"
36 #include "memory/resourceArea.hpp"
37 #include "oops/compiledICHolder.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "runtime/vframeArray.hpp"
40 #include "vmreg_x86.inline.hpp"
41 #ifdef COMPILER1
42 #include "c1/c1_Runtime1.hpp"
43 #endif
44 #ifdef COMPILER2
45 #include "opto/runtime.hpp"
46 #endif
47 #if INCLUDE_JVMCI
48 #include "jvmci/jvmciJavaClasses.hpp"
49 #endif
50
51 #define __ masm->
52
53 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
54
55 class SimpleRuntimeFrame {
56
57 public:
58
59 // Most of the runtime stubs have this simple frame layout.
60 // This class exists to make the layout shared in one place.
61 // Offsets are for compiler stack slots, which are jints.
62 enum layout {
63 // The frame sender code expects that rbp will be in the "natural" place and
64 // will override any oopMap setting for it. We must therefore force the layout
65 // so that it agrees with the frame sender code.
66 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
67 rbp_off2,
68 return_off, return_off2,
69 framesize
134
135 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; }
136 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; }
137 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; }
138 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; }
139 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
140
141 // During deoptimization only the result registers need to be restored,
142 // all the other values have already been extracted.
143 static void restore_result_registers(MacroAssembler* masm);
144 };
145
146 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
147 int off = 0;
148 int num_xmm_regs = XMMRegisterImpl::number_of_registers;
149 if (UseAVX < 3) {
150 num_xmm_regs = num_xmm_regs/2;
151 }
152 #if defined(COMPILER2) || INCLUDE_JVMCI
153 if (save_vectors) {
154 assert(UseAVX > 0, "up to 512bit vectors are supported with EVEX");
155 assert(MaxVectorSize <= 64, "up to 512bit vectors are supported now");
156 }
157 #else
158 assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
159 #endif
160
161 // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
162 int frame_size_in_bytes = round_to(reg_save_size*BytesPerInt, num_xmm_regs);
163 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
164 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
165 // CodeBlob frame size is in words.
166 int frame_size_in_words = frame_size_in_bytes / wordSize;
167 *total_frame_words = frame_size_in_words;
168
169 // Save registers, fpu state, and flags.
170 // We assume caller has already pushed the return address onto the
171 // stack, so rsp is 8-byte aligned here.
172 // We push rpb twice in this sequence because we want the real rbp
173 // to be under the return like a normal enter.
174
175 __ enter(); // rsp becomes 16-byte aligned here
189 __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n));
190 }
191 // Save full ZMM registers(16..num_xmm_regs)
192 base_addr = XSAVE_AREA_UPPERBANK;
193 off = 0;
194 int vector_len = Assembler::AVX_512bit;
195 for (int n = 16; n < num_xmm_regs; n++) {
196 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
197 }
198 }
199 } else {
200 if (VM_Version::supports_evex()) {
201 // Save upper bank of ZMM registers(16..31) for double/float usage
202 int base_addr = XSAVE_AREA_UPPERBANK;
203 off = 0;
204 for (int n = 16; n < num_xmm_regs; n++) {
205 __ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n));
206 }
207 }
208 }
209 if (frame::arg_reg_save_area_bytes != 0) {
210 // Allocate argument register save area
211 __ subptr(rsp, frame::arg_reg_save_area_bytes);
212 }
213
214 // Set an oopmap for the call site. This oopmap will map all
215 // oop-registers and debug-info registers as callee-saved. This
216 // will allow deoptimization at this safepoint to find all possible
217 // debug-info recordings, as well as let GC find all oops.
218
219 OopMapSet *oop_maps = new OopMapSet();
220 OopMap* map = new OopMap(frame_size_in_slots, 0);
221
222 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x))
223
224 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
225 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
226 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
227 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
228 // rbp location is known implicitly by the frame sender code, needs no oopmap
305 off += delta;
306 }
307 }
308 }
309
310 return map;
311 }
312
313 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
314 int num_xmm_regs = XMMRegisterImpl::number_of_registers;
315 if (UseAVX < 3) {
316 num_xmm_regs = num_xmm_regs/2;
317 }
318 if (frame::arg_reg_save_area_bytes != 0) {
319 // Pop arg register save area
320 __ addptr(rsp, frame::arg_reg_save_area_bytes);
321 }
322
323 #if defined(COMPILER2) || INCLUDE_JVMCI
324 if (restore_vectors) {
325 assert(UseAVX > 0, "up to 512bit vectors are supported with EVEX");
326 assert(MaxVectorSize <= 64, "up to 512bit vectors are supported now");
327 }
328 #else
329 assert(!restore_vectors, "vectors are generated only by C2");
330 #endif
331
332 // On EVEX enabled targets everything is handled in pop fpu state
333 if (restore_vectors) {
334 // Restore upper half of YMM registers (0..15)
335 int base_addr = XSAVE_AREA_YMM_BEGIN;
336 for (int n = 0; n < 16; n++) {
337 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16));
338 }
339 if (VM_Version::supports_evex()) {
340 // Restore upper half of ZMM registers (0..15)
341 base_addr = XSAVE_AREA_ZMM_BEGIN;
342 for (int n = 0; n < 16; n++) {
343 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32));
344 }
345 // Restore full ZMM registers(16..num_xmm_regs)
346 base_addr = XSAVE_AREA_UPPERBANK;
347 int vector_len = Assembler::AVX_512bit;
348 int off = 0;
349 for (int n = 16; n < num_xmm_regs; n++) {
350 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
351 }
511
512 return round_to(stk_args, 2);
513 }
514
515 // Patch the callers callsite with entry to compiled code if it exists.
516 static void patch_callers_callsite(MacroAssembler *masm) {
517 Label L;
518 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
519 __ jcc(Assembler::equal, L);
520
521 // Save the current stack pointer
522 __ mov(r13, rsp);
523 // Schedule the branch target address early.
524 // Call into the VM to patch the caller, then jump to compiled callee
525 // rax isn't live so capture return address while we easily can
526 __ movptr(rax, Address(rsp, 0));
527
528 // align stack so push_CPU_state doesn't fault
529 __ andptr(rsp, -(StackAlignmentInBytes));
530 __ push_CPU_state();
531
532 // VM needs caller's callsite
533 // VM needs target method
534 // This needs to be a long call since we will relocate this adapter to
535 // the codeBuffer and it may not reach
536
537 // Allocate argument register save area
538 if (frame::arg_reg_save_area_bytes != 0) {
539 __ subptr(rsp, frame::arg_reg_save_area_bytes);
540 }
541 __ mov(c_rarg0, rbx);
542 __ mov(c_rarg1, rax);
543 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
544
545 // De-allocate argument register save area
546 if (frame::arg_reg_save_area_bytes != 0) {
547 __ addptr(rsp, frame::arg_reg_save_area_bytes);
548 }
549
550 __ pop_CPU_state();
551 // restore sp
552 __ mov(rsp, r13);
553 __ bind(L);
554 }
555
556
557 static void gen_c2i_adapter(MacroAssembler *masm,
558 int total_args_passed,
559 int comp_args_on_stack,
560 const BasicType *sig_bt,
561 const VMRegPair *regs,
562 Label& skip_fixup) {
563 // Before we get into the guts of the C2I adapter, see if we should be here
564 // at all. We've come from compiled code and are attempting to jump to the
565 // interpreter, which means the caller made a static call to get here
566 // (vcalls always get a compiled target if there is one). Check for a
567 // compiled target. If there is one, we need to patch the caller's call.
568 patch_callers_callsite(masm);
569
1448 save_or_restore_arguments(masm, stack_slots, total_in_args,
1449 arg_save_area, map, in_regs, in_sig_bt);
1450
1451 address the_pc = __ pc();
1452 oop_maps->add_gc_map( __ offset(), map);
1453 __ set_last_Java_frame(rsp, noreg, the_pc);
1454
1455 __ block_comment("block_for_jni_critical");
1456 __ movptr(c_rarg0, r15_thread);
1457 __ mov(r12, rsp); // remember sp
1458 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
1459 __ andptr(rsp, -16); // align stack as required by ABI
1460 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical)));
1461 __ mov(rsp, r12); // restore sp
1462 __ reinit_heapbase();
1463
1464 __ reset_last_Java_frame(false);
1465
1466 save_or_restore_arguments(masm, stack_slots, total_in_args,
1467 arg_save_area, NULL, in_regs, in_sig_bt);
1468
1469 __ bind(cont);
1470 #ifdef ASSERT
1471 if (StressCriticalJNINatives) {
1472 // Stress register saving
1473 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1474 save_or_restore_arguments(masm, stack_slots, total_in_args,
1475 arg_save_area, map, in_regs, in_sig_bt);
1476 // Destroy argument registers
1477 for (int i = 0; i < total_in_args - 1; i++) {
1478 if (in_regs[i].first()->is_Register()) {
1479 const Register reg = in_regs[i].first()->as_Register();
1480 __ xorptr(reg, reg);
1481 } else if (in_regs[i].first()->is_XMMRegister()) {
1482 __ xorpd(in_regs[i].first()->as_XMMRegister(), in_regs[i].first()->as_XMMRegister());
1483 } else if (in_regs[i].first()->is_FloatRegister()) {
1484 ShouldNotReachHere();
1485 } else if (in_regs[i].first()->is_stack()) {
1486 // Nothing to do
1487 } else {
1488 ShouldNotReachHere();
2468
2469 // check for safepoint operation in progress and/or pending suspend requests
2470 {
2471 Label Continue;
2472
2473 __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
2474 SafepointSynchronize::_not_synchronized);
2475
2476 Label L;
2477 __ jcc(Assembler::notEqual, L);
2478 __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
2479 __ jcc(Assembler::equal, Continue);
2480 __ bind(L);
2481
2482 // Don't use call_VM as it will see a possible pending exception and forward it
2483 // and never return here preventing us from clearing _last_native_pc down below.
2484 // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
2485 // preserved and correspond to the bcp/locals pointers. So we do a runtime call
2486 // by hand.
2487 //
2488 save_native_result(masm, ret_type, stack_slots);
2489 __ mov(c_rarg0, r15_thread);
2490 __ mov(r12, rsp); // remember sp
2491 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2492 __ andptr(rsp, -16); // align stack as required by ABI
2493 if (!is_critical_native) {
2494 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
2495 } else {
2496 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
2497 }
2498 __ mov(rsp, r12); // restore sp
2499 __ reinit_heapbase();
2500 // Restore any method result value
2501 restore_native_result(masm, ret_type, stack_slots);
2502
2503 if (is_critical_native) {
2504 // The call above performed the transition to thread_in_Java so
2505 // skip the transition logic below.
2506 __ jmpb(after_transition);
2507 }
2644 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
2645 restore_args(masm, total_c_args, c_arg, out_regs);
2646
2647 #ifdef ASSERT
2648 { Label L;
2649 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
2650 __ jcc(Assembler::equal, L);
2651 __ stop("no pending exception allowed on exit from monitorenter");
2652 __ bind(L);
2653 }
2654 #endif
2655 __ jmp(lock_done);
2656
2657 // END Slow path lock
2658
2659 // BEGIN Slow path unlock
2660 __ bind(slow_path_unlock);
2661
2662 // If we haven't already saved the native result we must save it now as xmm registers
2663 // are still exposed.
2664
2665 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2666 save_native_result(masm, ret_type, stack_slots);
2667 }
2668
2669 __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
2670
2671 __ mov(c_rarg0, obj_reg);
2672 __ mov(c_rarg2, r15_thread);
2673 __ mov(r12, rsp); // remember sp
2674 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2675 __ andptr(rsp, -16); // align stack as required by ABI
2676
2677 // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
2678 // NOTE that obj_reg == rbx currently
2679 __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset())));
2680 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
2681
2682 // args are (oop obj, BasicLock* lock, JavaThread* thread)
2683 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
2684 __ mov(rsp, r12); // restore sp
2690 __ jcc(Assembler::equal, L);
2691 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
2692 __ bind(L);
2693 }
2694 #endif /* ASSERT */
2695
2696 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx);
2697
2698 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2699 restore_native_result(masm, ret_type, stack_slots);
2700 }
2701 __ jmp(unlock_done);
2702
2703 // END Slow path unlock
2704
2705 } // synchronized
2706
2707 // SLOW PATH Reguard the stack if needed
2708
2709 __ bind(reguard);
2710 save_native_result(masm, ret_type, stack_slots);
2711 __ mov(r12, rsp); // remember sp
2712 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2713 __ andptr(rsp, -16); // align stack as required by ABI
2714 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)));
2715 __ mov(rsp, r12); // restore sp
2716 __ reinit_heapbase();
2717 restore_native_result(masm, ret_type, stack_slots);
2718 // and continue
2719 __ jmp(reguard_done);
2720
2721
2722
2723 __ flush();
2724
2725 nmethod *nm = nmethod::new_native_nmethod(method,
2726 compile_id,
2727 masm->code(),
2728 vep_offset,
2729 frame_complete,
|
30 #include "asm/macroAssembler.inline.hpp"
31 #include "code/debugInfoRec.hpp"
32 #include "code/icBuffer.hpp"
33 #include "code/vtableStubs.hpp"
34 #include "interpreter/interpreter.hpp"
35 #include "logging/log.hpp"
36 #include "memory/resourceArea.hpp"
37 #include "oops/compiledICHolder.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "runtime/vframeArray.hpp"
40 #include "vmreg_x86.inline.hpp"
41 #ifdef COMPILER1
42 #include "c1/c1_Runtime1.hpp"
43 #endif
44 #ifdef COMPILER2
45 #include "opto/runtime.hpp"
46 #endif
47 #if INCLUDE_JVMCI
48 #include "jvmci/jvmciJavaClasses.hpp"
49 #endif
50 #include "vm_version_x86.hpp"
51
52 #define __ masm->
53
54 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
55
56 class SimpleRuntimeFrame {
57
58 public:
59
60 // Most of the runtime stubs have this simple frame layout.
61 // This class exists to make the layout shared in one place.
62 // Offsets are for compiler stack slots, which are jints.
63 enum layout {
64 // The frame sender code expects that rbp will be in the "natural" place and
65 // will override any oopMap setting for it. We must therefore force the layout
66 // so that it agrees with the frame sender code.
67 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
68 rbp_off2,
69 return_off, return_off2,
70 framesize
135
136 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; }
137 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; }
138 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; }
139 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; }
140 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
141
142 // During deoptimization only the result registers need to be restored,
143 // all the other values have already been extracted.
144 static void restore_result_registers(MacroAssembler* masm);
145 };
146
147 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
148 int off = 0;
149 int num_xmm_regs = XMMRegisterImpl::number_of_registers;
150 if (UseAVX < 3) {
151 num_xmm_regs = num_xmm_regs/2;
152 }
153 #if defined(COMPILER2) || INCLUDE_JVMCI
154 if (save_vectors) {
155 assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX");
156 assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
157 }
158 #else
159 assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
160 #endif
161
162 // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
163 int frame_size_in_bytes = round_to(reg_save_size*BytesPerInt, num_xmm_regs);
164 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
165 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
166 // CodeBlob frame size is in words.
167 int frame_size_in_words = frame_size_in_bytes / wordSize;
168 *total_frame_words = frame_size_in_words;
169
170 // Save registers, fpu state, and flags.
171 // We assume caller has already pushed the return address onto the
172 // stack, so rsp is 8-byte aligned here.
173 // We push rpb twice in this sequence because we want the real rbp
174 // to be under the return like a normal enter.
175
176 __ enter(); // rsp becomes 16-byte aligned here
190 __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n));
191 }
192 // Save full ZMM registers(16..num_xmm_regs)
193 base_addr = XSAVE_AREA_UPPERBANK;
194 off = 0;
195 int vector_len = Assembler::AVX_512bit;
196 for (int n = 16; n < num_xmm_regs; n++) {
197 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
198 }
199 }
200 } else {
201 if (VM_Version::supports_evex()) {
202 // Save upper bank of ZMM registers(16..31) for double/float usage
203 int base_addr = XSAVE_AREA_UPPERBANK;
204 off = 0;
205 for (int n = 16; n < num_xmm_regs; n++) {
206 __ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n));
207 }
208 }
209 }
210 __ vzeroupper();
211 if (frame::arg_reg_save_area_bytes != 0) {
212 // Allocate argument register save area
213 __ subptr(rsp, frame::arg_reg_save_area_bytes);
214 }
215
216 // Set an oopmap for the call site. This oopmap will map all
217 // oop-registers and debug-info registers as callee-saved. This
218 // will allow deoptimization at this safepoint to find all possible
219 // debug-info recordings, as well as let GC find all oops.
220
221 OopMapSet *oop_maps = new OopMapSet();
222 OopMap* map = new OopMap(frame_size_in_slots, 0);
223
224 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x))
225
226 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
227 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
228 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
229 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
230 // rbp location is known implicitly by the frame sender code, needs no oopmap
307 off += delta;
308 }
309 }
310 }
311
312 return map;
313 }
314
315 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
316 int num_xmm_regs = XMMRegisterImpl::number_of_registers;
317 if (UseAVX < 3) {
318 num_xmm_regs = num_xmm_regs/2;
319 }
320 if (frame::arg_reg_save_area_bytes != 0) {
321 // Pop arg register save area
322 __ addptr(rsp, frame::arg_reg_save_area_bytes);
323 }
324
325 #if defined(COMPILER2) || INCLUDE_JVMCI
326 if (restore_vectors) {
327 assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX");
328 assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
329 }
330 #else
331 assert(!restore_vectors, "vectors are generated only by C2");
332 #endif
333
334 __ vzeroupper();
335
336 // On EVEX enabled targets everything is handled in pop fpu state
337 if (restore_vectors) {
338 // Restore upper half of YMM registers (0..15)
339 int base_addr = XSAVE_AREA_YMM_BEGIN;
340 for (int n = 0; n < 16; n++) {
341 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16));
342 }
343 if (VM_Version::supports_evex()) {
344 // Restore upper half of ZMM registers (0..15)
345 base_addr = XSAVE_AREA_ZMM_BEGIN;
346 for (int n = 0; n < 16; n++) {
347 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32));
348 }
349 // Restore full ZMM registers(16..num_xmm_regs)
350 base_addr = XSAVE_AREA_UPPERBANK;
351 int vector_len = Assembler::AVX_512bit;
352 int off = 0;
353 for (int n = 16; n < num_xmm_regs; n++) {
354 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
355 }
515
516 return round_to(stk_args, 2);
517 }
518
519 // Patch the callers callsite with entry to compiled code if it exists.
520 static void patch_callers_callsite(MacroAssembler *masm) {
521 Label L;
522 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
523 __ jcc(Assembler::equal, L);
524
525 // Save the current stack pointer
526 __ mov(r13, rsp);
527 // Schedule the branch target address early.
528 // Call into the VM to patch the caller, then jump to compiled callee
529 // rax isn't live so capture return address while we easily can
530 __ movptr(rax, Address(rsp, 0));
531
532 // align stack so push_CPU_state doesn't fault
533 __ andptr(rsp, -(StackAlignmentInBytes));
534 __ push_CPU_state();
535 __ vzeroupper();
536 // VM needs caller's callsite
537 // VM needs target method
538 // This needs to be a long call since we will relocate this adapter to
539 // the codeBuffer and it may not reach
540
541 // Allocate argument register save area
542 if (frame::arg_reg_save_area_bytes != 0) {
543 __ subptr(rsp, frame::arg_reg_save_area_bytes);
544 }
545 __ mov(c_rarg0, rbx);
546 __ mov(c_rarg1, rax);
547 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
548
549 // De-allocate argument register save area
550 if (frame::arg_reg_save_area_bytes != 0) {
551 __ addptr(rsp, frame::arg_reg_save_area_bytes);
552 }
553
554 __ vzeroupper();
555 __ pop_CPU_state();
556 // restore sp
557 __ mov(rsp, r13);
558 __ bind(L);
559 }
560
561
562 static void gen_c2i_adapter(MacroAssembler *masm,
563 int total_args_passed,
564 int comp_args_on_stack,
565 const BasicType *sig_bt,
566 const VMRegPair *regs,
567 Label& skip_fixup) {
568 // Before we get into the guts of the C2I adapter, see if we should be here
569 // at all. We've come from compiled code and are attempting to jump to the
570 // interpreter, which means the caller made a static call to get here
571 // (vcalls always get a compiled target if there is one). Check for a
572 // compiled target. If there is one, we need to patch the caller's call.
573 patch_callers_callsite(masm);
574
1453 save_or_restore_arguments(masm, stack_slots, total_in_args,
1454 arg_save_area, map, in_regs, in_sig_bt);
1455
1456 address the_pc = __ pc();
1457 oop_maps->add_gc_map( __ offset(), map);
1458 __ set_last_Java_frame(rsp, noreg, the_pc);
1459
1460 __ block_comment("block_for_jni_critical");
1461 __ movptr(c_rarg0, r15_thread);
1462 __ mov(r12, rsp); // remember sp
1463 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
1464 __ andptr(rsp, -16); // align stack as required by ABI
1465 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical)));
1466 __ mov(rsp, r12); // restore sp
1467 __ reinit_heapbase();
1468
1469 __ reset_last_Java_frame(false);
1470
1471 save_or_restore_arguments(masm, stack_slots, total_in_args,
1472 arg_save_area, NULL, in_regs, in_sig_bt);
1473 __ bind(cont);
1474 #ifdef ASSERT
1475 if (StressCriticalJNINatives) {
1476 // Stress register saving
1477 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1478 save_or_restore_arguments(masm, stack_slots, total_in_args,
1479 arg_save_area, map, in_regs, in_sig_bt);
1480 // Destroy argument registers
1481 for (int i = 0; i < total_in_args - 1; i++) {
1482 if (in_regs[i].first()->is_Register()) {
1483 const Register reg = in_regs[i].first()->as_Register();
1484 __ xorptr(reg, reg);
1485 } else if (in_regs[i].first()->is_XMMRegister()) {
1486 __ xorpd(in_regs[i].first()->as_XMMRegister(), in_regs[i].first()->as_XMMRegister());
1487 } else if (in_regs[i].first()->is_FloatRegister()) {
1488 ShouldNotReachHere();
1489 } else if (in_regs[i].first()->is_stack()) {
1490 // Nothing to do
1491 } else {
1492 ShouldNotReachHere();
2472
2473 // check for safepoint operation in progress and/or pending suspend requests
2474 {
2475 Label Continue;
2476
2477 __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
2478 SafepointSynchronize::_not_synchronized);
2479
2480 Label L;
2481 __ jcc(Assembler::notEqual, L);
2482 __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
2483 __ jcc(Assembler::equal, Continue);
2484 __ bind(L);
2485
2486 // Don't use call_VM as it will see a possible pending exception and forward it
2487 // and never return here preventing us from clearing _last_native_pc down below.
2488 // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
2489 // preserved and correspond to the bcp/locals pointers. So we do a runtime call
2490 // by hand.
2491 //
2492 __ vzeroupper();
2493 save_native_result(masm, ret_type, stack_slots);
2494 __ mov(c_rarg0, r15_thread);
2495 __ mov(r12, rsp); // remember sp
2496 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2497 __ andptr(rsp, -16); // align stack as required by ABI
2498 if (!is_critical_native) {
2499 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
2500 } else {
2501 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
2502 }
2503 __ mov(rsp, r12); // restore sp
2504 __ reinit_heapbase();
2505 // Restore any method result value
2506 restore_native_result(masm, ret_type, stack_slots);
2507
2508 if (is_critical_native) {
2509 // The call above performed the transition to thread_in_Java so
2510 // skip the transition logic below.
2511 __ jmpb(after_transition);
2512 }
2649 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
2650 restore_args(masm, total_c_args, c_arg, out_regs);
2651
2652 #ifdef ASSERT
2653 { Label L;
2654 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
2655 __ jcc(Assembler::equal, L);
2656 __ stop("no pending exception allowed on exit from monitorenter");
2657 __ bind(L);
2658 }
2659 #endif
2660 __ jmp(lock_done);
2661
2662 // END Slow path lock
2663
2664 // BEGIN Slow path unlock
2665 __ bind(slow_path_unlock);
2666
2667 // If we haven't already saved the native result we must save it now as xmm registers
2668 // are still exposed.
2669 __ vzeroupper();
2670 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2671 save_native_result(masm, ret_type, stack_slots);
2672 }
2673
2674 __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
2675
2676 __ mov(c_rarg0, obj_reg);
2677 __ mov(c_rarg2, r15_thread);
2678 __ mov(r12, rsp); // remember sp
2679 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2680 __ andptr(rsp, -16); // align stack as required by ABI
2681
2682 // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
2683 // NOTE that obj_reg == rbx currently
2684 __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset())));
2685 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
2686
2687 // args are (oop obj, BasicLock* lock, JavaThread* thread)
2688 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
2689 __ mov(rsp, r12); // restore sp
2695 __ jcc(Assembler::equal, L);
2696 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
2697 __ bind(L);
2698 }
2699 #endif /* ASSERT */
2700
2701 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx);
2702
2703 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2704 restore_native_result(masm, ret_type, stack_slots);
2705 }
2706 __ jmp(unlock_done);
2707
2708 // END Slow path unlock
2709
2710 } // synchronized
2711
2712 // SLOW PATH Reguard the stack if needed
2713
2714 __ bind(reguard);
2715 __ vzeroupper();
2716 save_native_result(masm, ret_type, stack_slots);
2717 __ mov(r12, rsp); // remember sp
2718 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2719 __ andptr(rsp, -16); // align stack as required by ABI
2720 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)));
2721 __ mov(rsp, r12); // restore sp
2722 __ reinit_heapbase();
2723 restore_native_result(masm, ret_type, stack_slots);
2724 // and continue
2725 __ jmp(reguard_done);
2726
2727
2728
2729 __ flush();
2730
2731 nmethod *nm = nmethod::new_native_nmethod(method,
2732 compile_id,
2733 masm->code(),
2734 vep_offset,
2735 frame_complete,
|