1 #ifdef USE_PRAGMA_IDENT_SRC
2 #pragma ident "@(#)sharedRuntime_x86_32.cpp 1.56 07/09/17 09:26:01 JVM"
3 #endif
4 /*
5 * Copyright 2003-2007 Sun Microsystems, Inc. All Rights Reserved.
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7 *
8 * This code is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License version 2 only, as
10 * published by the Free Software Foundation.
11 *
12 * This code is distributed in the hope that it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 * version 2 for more details (a copy is included in the LICENSE file that
16 * accompanied this code).
17 *
18 * You should have received a copy of the GNU General Public License version
19 * 2 along with this work; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
24 * have any questions.
25 *
104
105 // During deoptimization only the result register need to be restored
106 // all the other values have already been extracted.
107
108 static void restore_result_registers(MacroAssembler* masm);
109
110 };
111
112 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words,
113 int* total_frame_words, bool verify_fpu) {
114
115 int frame_size_in_bytes = (reg_save_size + additional_frame_words) * wordSize;
116 int frame_words = frame_size_in_bytes / wordSize;
117 *total_frame_words = frame_words;
118
119 assert(FPUStateSizeInWords == 27, "update stack layout");
120
121 // save registers, fpu state, and flags
122 // We assume caller has already has return address slot on the stack
123 // We push epb twice in this sequence because we want the real rbp,
124 // to be under the return like a normal enter and we want to use pushad
125 // We push by hand instead of pusing push
126 __ enter();
127 __ pushad();
128 __ pushfd();
129 __ subl(rsp,FPU_regs_live*sizeof(jdouble)); // Push FPU registers space
130 __ push_FPU_state(); // Save FPU state & init
131
132 if (verify_fpu) {
133 // Some stubs may have non standard FPU control word settings so
134 // only check and reset the value when it required to be the
135 // standard value. The safepoint blob in particular can be used
136 // in methods which are using the 24 bit control word for
137 // optimized float math.
138
139 #ifdef ASSERT
140 // Make sure the control word has the expected value
141 Label ok;
142 __ cmpw(Address(rsp, 0), StubRoutines::fpu_cntrl_wrd_std());
143 __ jccb(Assembler::equal, ok);
144 __ stop("corrupted control word detected");
145 __ bind(ok);
146 #endif
147
148 // Reset the control word to guard against exceptions being unmasked
149 // since fstp_d can cause FPU stack underflow exceptions. Write it
256 if( UseSSE == 1 ) {
257 __ movflt(xmm0,Address(rsp,xmm0_off*wordSize));
258 __ movflt(xmm1,Address(rsp,xmm1_off*wordSize));
259 __ movflt(xmm2,Address(rsp,xmm2_off*wordSize));
260 __ movflt(xmm3,Address(rsp,xmm3_off*wordSize));
261 __ movflt(xmm4,Address(rsp,xmm4_off*wordSize));
262 __ movflt(xmm5,Address(rsp,xmm5_off*wordSize));
263 __ movflt(xmm6,Address(rsp,xmm6_off*wordSize));
264 __ movflt(xmm7,Address(rsp,xmm7_off*wordSize));
265 } else if( UseSSE >= 2 ) {
266 __ movdbl(xmm0,Address(rsp,xmm0_off*wordSize));
267 __ movdbl(xmm1,Address(rsp,xmm1_off*wordSize));
268 __ movdbl(xmm2,Address(rsp,xmm2_off*wordSize));
269 __ movdbl(xmm3,Address(rsp,xmm3_off*wordSize));
270 __ movdbl(xmm4,Address(rsp,xmm4_off*wordSize));
271 __ movdbl(xmm5,Address(rsp,xmm5_off*wordSize));
272 __ movdbl(xmm6,Address(rsp,xmm6_off*wordSize));
273 __ movdbl(xmm7,Address(rsp,xmm7_off*wordSize));
274 }
275 __ pop_FPU_state();
276 __ addl(rsp,FPU_regs_live*sizeof(jdouble)); // Pop FPU registers
277
278 __ popfd();
279 __ popad();
280 // Get the rbp, described implicitly by the frame sender code (no oopMap)
281 __ popl(rbp);
282
283 }
284
285 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
286
287 // Just restore result register. Only used by deoptimization. By
288 // now any callee save register that needs to be restore to a c2
289 // caller of the deoptee has been extracted into the vframeArray
290 // and will be stuffed into the c2i adapter we create for later
291 // restoration so only result registers need to be restored here.
292 //
293
294 __ frstor(Address(rsp, 0)); // Restore fpu state
295
296 // Recover XMM & FPU state
297 if( UseSSE == 1 ) {
298 __ movflt(xmm0, Address(rsp, xmm0_off*wordSize));
299 } else if( UseSSE >= 2 ) {
300 __ movdbl(xmm0, Address(rsp, xmm0_off*wordSize));
301 }
302 __ movl(rax, Address(rsp, rax_off*wordSize));
303 __ movl(rdx, Address(rsp, rdx_off*wordSize));
304 // Pop all of the register save are off the stack except the return address
305 __ addl(rsp, return_off * wordSize);
306 }
307
308 // The java_calling_convention describes stack locations as ideal slots on
309 // a frame with no abi restrictions. Since we must observe abi restrictions
310 // (like the placement of the register window) the slots must be biased by
311 // the following value.
312 static int reg2offset_in(VMReg r) {
313 // Account for saved rbp, and return address
314 // This should really be in_preserve_stack_slots
315 return (r->reg2stack() + 2) * VMRegImpl::stack_slot_size;
316 }
317
318 static int reg2offset_out(VMReg r) {
319 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
320 }
321
322 // ---------------------------------------------------------------------------
323 // Read the array of BasicTypes from a signature, and compute where the
324 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
325 // quantities. Values less than SharedInfo::stack0 are registers, those above
434 regs[i].set2(VMRegImpl::stack2reg(dstack));
435 dstack += 2;
436 }
437 break;
438 case T_VOID: regs[i].set_bad(); break;
439 break;
440 default:
441 ShouldNotReachHere();
442 break;
443 }
444 }
445
446 // return value can be odd number of VMRegImpl stack slots make multiple of 2
447 return round_to(stack, 2);
448 }
449
450 // Patch the callers callsite with entry to compiled code if it exists.
451 static void patch_callers_callsite(MacroAssembler *masm) {
452 Label L;
453 __ verify_oop(rbx);
454 __ cmpl(Address(rbx, in_bytes(methodOopDesc::code_offset())), NULL_WORD);
455 __ jcc(Assembler::equal, L);
456 // Schedule the branch target address early.
457 // Call into the VM to patch the caller, then jump to compiled callee
458 // rax, isn't live so capture return address while we easily can
459 __ movl(rax, Address(rsp, 0));
460 __ pushad();
461 __ pushfd();
462
463 if (UseSSE == 1) {
464 __ subl(rsp, 2*wordSize);
465 __ movflt(Address(rsp, 0), xmm0);
466 __ movflt(Address(rsp, wordSize), xmm1);
467 }
468 if (UseSSE >= 2) {
469 __ subl(rsp, 4*wordSize);
470 __ movdbl(Address(rsp, 0), xmm0);
471 __ movdbl(Address(rsp, 2*wordSize), xmm1);
472 }
473 #ifdef COMPILER2
474 // C2 may leave the stack dirty if not in SSE2+ mode
475 if (UseSSE >= 2) {
476 __ verify_FPU(0, "c2i transition should have clean FPU stack");
477 } else {
478 __ empty_FPU_stack();
479 }
480 #endif /* COMPILER2 */
481
482 // VM needs caller's callsite
483 __ pushl(rax);
484 // VM needs target method
485 __ pushl(rbx);
486 __ verify_oop(rbx);
487 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
488 __ addl(rsp, 2*wordSize);
489
490 if (UseSSE == 1) {
491 __ movflt(xmm0, Address(rsp, 0));
492 __ movflt(xmm1, Address(rsp, wordSize));
493 __ addl(rsp, 2*wordSize);
494 }
495 if (UseSSE >= 2) {
496 __ movdbl(xmm0, Address(rsp, 0));
497 __ movdbl(xmm1, Address(rsp, 2*wordSize));
498 __ addl(rsp, 4*wordSize);
499 }
500
501 __ popfd();
502 __ popad();
503 __ bind(L);
504 }
505
506
507 // Helper function to put tags in interpreter stack.
508 static void tag_stack(MacroAssembler *masm, const BasicType sig, int st_off) {
509 if (TaggedStackInterpreter) {
510 int tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(0);
511 if (sig == T_OBJECT || sig == T_ARRAY) {
512 __ movl(Address(rsp, tag_offset), frame::TagReference);
513 } else if (sig == T_LONG || sig == T_DOUBLE) {
514 int next_tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(1);
515 __ movl(Address(rsp, next_tag_offset), frame::TagValue);
516 __ movl(Address(rsp, tag_offset), frame::TagValue);
517 } else {
518 __ movl(Address(rsp, tag_offset), frame::TagValue);
519 }
520 }
521 }
522
523 // Double and long values with Tagged stacks are not contiguous.
524 static void move_c2i_double(MacroAssembler *masm, XMMRegister r, int st_off) {
525 int next_off = st_off - Interpreter::stackElementSize();
526 if (TaggedStackInterpreter) {
527 __ movdbl(Address(rsp, next_off), r);
528 // Move top half up and put tag in the middle.
529 __ movl(rdi, Address(rsp, next_off+wordSize));
530 __ movl(Address(rsp, st_off), rdi);
531 tag_stack(masm, T_DOUBLE, next_off);
532 } else {
533 __ movdbl(Address(rsp, next_off), r);
534 }
535 }
536
537 static void gen_c2i_adapter(MacroAssembler *masm,
538 int total_args_passed,
547 // compiled target. If there is one, we need to patch the caller's call.
548 patch_callers_callsite(masm);
549
550 __ bind(skip_fixup);
551
552 #ifdef COMPILER2
553 // C2 may leave the stack dirty if not in SSE2+ mode
554 if (UseSSE >= 2) {
555 __ verify_FPU(0, "c2i transition should have clean FPU stack");
556 } else {
557 __ empty_FPU_stack();
558 }
559 #endif /* COMPILER2 */
560
561 // Since all args are passed on the stack, total_args_passed * interpreter_
562 // stack_element_size is the
563 // space we need.
564 int extraspace = total_args_passed * Interpreter::stackElementSize();
565
566 // Get return address
567 __ popl(rax);
568
569 // set senderSP value
570 __ movl(rsi, rsp);
571
572 __ subl(rsp, extraspace);
573
574 // Now write the args into the outgoing interpreter space
575 for (int i = 0; i < total_args_passed; i++) {
576 if (sig_bt[i] == T_VOID) {
577 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
578 continue;
579 }
580
581 // st_off points to lowest address on stack.
582 int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize();
583 // Say 4 args:
584 // i st_off
585 // 0 12 T_LONG
586 // 1 8 T_VOID
587 // 2 4 T_OBJECT
588 // 3 0 T_BOOL
589 VMReg r_1 = regs[i].first();
590 VMReg r_2 = regs[i].second();
591 if (!r_1->is_valid()) {
592 assert(!r_2->is_valid(), "");
593 continue;
594 }
595
596 if (r_1->is_stack()) {
597 // memory to memory use fpu stack top
598 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
599
600 if (!r_2->is_valid()) {
601 __ movl(rdi, Address(rsp, ld_off));
602 __ movl(Address(rsp, st_off), rdi);
603 tag_stack(masm, sig_bt[i], st_off);
604 } else {
605
606 // ld_off == LSW, ld_off+VMRegImpl::stack_slot_size == MSW
607 // st_off == MSW, st_off-wordSize == LSW
608
609 int next_off = st_off - Interpreter::stackElementSize();
610 __ movl(rdi, Address(rsp, ld_off));
611 __ movl(Address(rsp, next_off), rdi);
612 __ movl(rdi, Address(rsp, ld_off + wordSize));
613 __ movl(Address(rsp, st_off), rdi);
614 tag_stack(masm, sig_bt[i], next_off);
615 }
616 } else if (r_1->is_Register()) {
617 Register r = r_1->as_Register();
618 if (!r_2->is_valid()) {
619 __ movl(Address(rsp, st_off), r);
620 tag_stack(masm, sig_bt[i], st_off);
621 } else {
622 // long/double in gpr
623 ShouldNotReachHere();
624 }
625 } else {
626 assert(r_1->is_XMMRegister(), "");
627 if (!r_2->is_valid()) {
628 __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
629 tag_stack(masm, sig_bt[i], st_off);
630 } else {
631 assert(sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG, "wrong type");
632 move_c2i_double(masm, r_1->as_XMMRegister(), st_off);
633 }
634 }
635 }
636
637 // Schedule the branch target address early.
638 __ movl(rcx, Address(rbx, in_bytes(methodOopDesc::interpreter_entry_offset())));
639 // And repush original return address
640 __ pushl(rax);
641 __ jmp(rcx);
642 }
643
644
645 // For tagged stacks, double or long value aren't contiguous on the stack
646 // so get them contiguous for the xmm load
647 static void move_i2c_double(MacroAssembler *masm, XMMRegister r, Register saved_sp, int ld_off) {
648 int next_val_off = ld_off - Interpreter::stackElementSize();
649 if (TaggedStackInterpreter) {
650 // use tag slot temporarily for MSW
651 __ movl(rsi, Address(saved_sp, ld_off));
652 __ movl(Address(saved_sp, next_val_off+wordSize), rsi);
653 __ movdbl(r, Address(saved_sp, next_val_off));
654 // restore tag
655 __ movl(Address(saved_sp, next_val_off+wordSize), frame::TagValue);
656 } else {
657 __ movdbl(r, Address(saved_sp, next_val_off));
658 }
659 }
660
661 static void gen_i2c_adapter(MacroAssembler *masm,
662 int total_args_passed,
663 int comp_args_on_stack,
664 const BasicType *sig_bt,
665 const VMRegPair *regs) {
666 // we're being called from the interpreter but need to find the
667 // compiled return entry point. The return address on the stack
668 // should point at it and we just need to pull the old value out.
669 // load up the pointer to the compiled return entry point and
670 // rewrite our return pc. The code is arranged like so:
671 //
672 // .word Interpreter::return_sentinel
673 // .word address_of_compiled_return_point
674 // return_entry_point: blah_blah_blah
675 //
676 // So we can find the appropriate return point by loading up the word
677 // just prior to the current return address we have on the stack.
678 //
679 // We will only enter here from an interpreted frame and never from after
680 // passing thru a c2i. Azul allowed this but we do not. If we lose the
681 // race and use a c2i we will remain interpreted for the race loser(s).
682 // This removes all sorts of headaches on the x86 side and also eliminates
683 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
684
685
686 // Note: rsi contains the senderSP on entry. We must preserve it since
687 // we may do a i2c -> c2i transition if we lose a race where compiled
688 // code goes non-entrant while we get args ready.
689
690 // Pick up the return address
691 __ movl(rax, Address(rsp, 0));
692
693 // If UseSSE >= 2 then no cleanup is needed on the return to the
694 // interpreter so skip fixing up the return entry point unless
695 // VerifyFPU is enabled.
696 if (UseSSE < 2 || VerifyFPU) {
697 Label skip, chk_int;
698 // If we were called from the call stub we need to do a little bit different
699 // cleanup than if the interpreter returned to the call stub.
700
701 ExternalAddress stub_return_address(StubRoutines::_call_stub_return_address);
702 __ cmp32(rax, stub_return_address.addr());
703 __ jcc(Assembler::notEqual, chk_int);
704 assert(StubRoutines::i486::get_call_stub_compiled_return() != NULL, "must be set");
705 __ lea(rax, ExternalAddress(StubRoutines::i486::get_call_stub_compiled_return()));
706 __ jmp(skip);
707
708 // It must be the interpreter since we never get here via a c2i (unlike Azul)
709
710 __ bind(chk_int);
711 #ifdef ASSERT
712 {
713 Label ok;
714 __ cmpl(Address(rax, -8), Interpreter::return_sentinel);
715 __ jcc(Assembler::equal, ok);
716 __ int3();
717 __ bind(ok);
718 }
719 #endif // ASSERT
720 __ movl(rax, Address(rax, -4));
721 __ bind(skip);
722 }
723
724 // rax, now contains the compiled return entry point which will do an
725 // cleanup needed for the return from compiled to interpreted.
726
727 // Must preserve original SP for loading incoming arguments because
728 // we need to align the outgoing SP for compiled code.
729 __ movl(rdi, rsp);
730
731 // Cut-out for having no stack args. Since up to 2 int/oop args are passed
732 // in registers, we will occasionally have no stack args.
733 int comp_words_on_stack = 0;
734 if (comp_args_on_stack) {
735 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
736 // registers are below. By subtracting stack0, we either get a negative
737 // number (all values in registers) or the maximum stack slot accessed.
738 // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
739 // Convert 4-byte stack slots to words.
740 comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
741 // Round up to miminum stack alignment, in wordSize
742 comp_words_on_stack = round_to(comp_words_on_stack, 2);
743 __ subl(rsp, comp_words_on_stack * wordSize);
744 }
745
746 // Align the outgoing SP
747 __ andl(rsp, -(StackAlignmentInBytes));
748
749 // push the return address on the stack (note that pushing, rather
750 // than storing it, yields the correct frame alignment for the callee)
751 __ pushl(rax);
752
753 // Put saved SP in another register
754 const Register saved_sp = rax;
755 __ movl(saved_sp, rdi);
756
757
758 // Will jump to the compiled code just as if compiled code was doing it.
759 // Pre-load the register-jump target early, to schedule it better.
760 __ movl(rdi, Address(rbx, in_bytes(methodOopDesc::from_compiled_offset())));
761
762 // Now generate the shuffle code. Pick up all register args and move the
763 // rest through the floating point stack top.
764 for (int i = 0; i < total_args_passed; i++) {
765 if (sig_bt[i] == T_VOID) {
766 // Longs and doubles are passed in native word order, but misaligned
767 // in the 32-bit build.
768 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
769 continue;
770 }
771
772 // Pick up 0, 1 or 2 words from SP+offset.
773
774 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
775 "scrambled load targets?");
776 // Load in argument order going down.
777 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize() + Interpreter::value_offset_in_bytes();
778 // Point to interpreter value (vs. tag)
779 int next_off = ld_off - Interpreter::stackElementSize();
780 //
781 //
782 //
783 VMReg r_1 = regs[i].first();
784 VMReg r_2 = regs[i].second();
785 if (!r_1->is_valid()) {
786 assert(!r_2->is_valid(), "");
787 continue;
788 }
789 if (r_1->is_stack()) {
790 // Convert stack slot to an SP offset (+ wordSize to account for return address )
791 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
792
793 // We can use rsi as a temp here because compiled code doesn't need rsi as an input
794 // and if we end up going thru a c2i because of a miss a reasonable value of rsi
795 // we be generated.
796 if (!r_2->is_valid()) {
797 // __ fld_s(Address(saved_sp, ld_off));
798 // __ fstp_s(Address(rsp, st_off));
799 __ movl(rsi, Address(saved_sp, ld_off));
800 __ movl(Address(rsp, st_off), rsi);
801 } else {
802 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
803 // are accessed as negative so LSW is at LOW address
804
805 // ld_off is MSW so get LSW
806 // st_off is LSW (i.e. reg.first())
807 // __ fld_d(Address(saved_sp, next_off));
808 // __ fstp_d(Address(rsp, st_off));
809 __ movl(rsi, Address(saved_sp, next_off));
810 __ movl(Address(rsp, st_off), rsi);
811 __ movl(rsi, Address(saved_sp, ld_off));
812 __ movl(Address(rsp, st_off + wordSize), rsi);
813 }
814 } else if (r_1->is_Register()) { // Register argument
815 Register r = r_1->as_Register();
816 assert(r != rax, "must be different");
817 if (r_2->is_valid()) {
818 assert(r_2->as_Register() != rax, "need another temporary register");
819 // Remember r_1 is low address (and LSB on x86)
820 // So r_2 gets loaded from high address regardless of the platform
821 __ movl(r_2->as_Register(), Address(saved_sp, ld_off));
822 __ movl(r, Address(saved_sp, next_off));
823 } else {
824 __ movl(r, Address(saved_sp, ld_off));
825 }
826 } else {
827 assert(r_1->is_XMMRegister(), "");
828 if (!r_2->is_valid()) {
829 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
830 } else {
831 move_i2c_double(masm, r_1->as_XMMRegister(), saved_sp, ld_off);
832 }
833 }
834 }
835
836 // 6243940 We might end up in handle_wrong_method if
837 // the callee is deoptimized as we race thru here. If that
838 // happens we don't want to take a safepoint because the
839 // caller frame will look interpreted and arguments are now
840 // "compiled" so it is much better to make this transition
841 // invisible to the stack walking code. Unfortunately if
842 // we try and find the callee by normal means a safepoint
843 // is possible. So we stash the desired callee in the thread
844 // and the vm will find there should this case occur.
845
846 __ get_thread(rax);
847 __ movl(Address(rax, JavaThread::callee_target_offset()), rbx);
848
849 // move methodOop to rax, in case we end up in an c2i adapter.
850 // the c2i adapters expect methodOop in rax, (c2) because c2's
851 // resolve stubs return the result (the method) in rax,.
852 // I'd love to fix this.
853 __ movl(rax, rbx);
854
855 __ jmp(rdi);
856 }
857
858 // ---------------------------------------------------------------
859 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
860 int total_args_passed,
861 int comp_args_on_stack,
862 const BasicType *sig_bt,
863 const VMRegPair *regs) {
864 address i2c_entry = __ pc();
865
866 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
867
868 // -------------------------------------------------------------------------
869 // Generate a C2I adapter. On entry we know rbx, holds the methodOop during calls
870 // to the interpreter. The args start out packed in the compiled layout. They
871 // need to be unpacked into the interpreter layout. This will almost always
872 // require some stack space. We grow the current (compiled) stack, then repack
873 // the args. We finally end in a jump to the generic interpreter entry point.
874 // On exit from the interpreter, the interpreter will restore our SP (lest the
875 // compiled code, which relys solely on SP and not EBP, get sick).
876
877 address c2i_unverified_entry = __ pc();
878 Label skip_fixup;
879
880 Register holder = rax;
881 Register receiver = rcx;
882 Register temp = rbx;
883
884 {
885
886 Label missed;
887
888 __ verify_oop(holder);
889 __ movl(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
890 __ verify_oop(temp);
891
892 __ cmpl(temp, Address(holder, compiledICHolderOopDesc::holder_klass_offset()));
893 __ movl(rbx, Address(holder, compiledICHolderOopDesc::holder_method_offset()));
894 __ jcc(Assembler::notEqual, missed);
895 // Method might have been compiled since the call site was patched to
896 // interpreted if that is the case treat it as a miss so we can get
897 // the call site corrected.
898 __ cmpl(Address(rbx, in_bytes(methodOopDesc::code_offset())), NULL_WORD);
899 __ jcc(Assembler::equal, skip_fixup);
900
901 __ bind(missed);
902 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
903 }
904
905 address c2i_entry = __ pc();
906
907 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
908
909 __ flush();
910 return new AdapterHandlerEntry(i2c_entry, c2i_entry, c2i_unverified_entry);
911 }
912
913 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
914 VMRegPair *regs,
915 int total_args_passed) {
916 // We return the amount of VMRegImpl stack slots we need to reserve for all
917 // the arguments NOT counting out_preserve_stack_slots.
918
939 assert(sig_bt[i+1] == T_VOID, "missing Half" );
940 regs[i].set2(VMRegImpl::stack2reg(stack));
941 stack += 2;
942 break;
943 case T_VOID: regs[i].set_bad(); break;
944 default:
945 ShouldNotReachHere();
946 break;
947 }
948 }
949 return stack;
950 }
951
952 // A simple move of integer like type
953 static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
954 if (src.first()->is_stack()) {
955 if (dst.first()->is_stack()) {
956 // stack to stack
957 // __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
958 // __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
959 __ movl(rax, Address(rbp, reg2offset_in(src.first())));
960 __ movl(Address(rsp, reg2offset_out(dst.first())), rax);
961 } else {
962 // stack to reg
963 __ movl(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first())));
964 }
965 } else if (dst.first()->is_stack()) {
966 // reg to stack
967 __ movl(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
968 } else {
969 __ movl(dst.first()->as_Register(), src.first()->as_Register());
970 }
971 }
972
973 // An oop arg. Must pass a handle not the oop itself
974 static void object_move(MacroAssembler* masm,
975 OopMap* map,
976 int oop_handle_offset,
977 int framesize_in_slots,
978 VMRegPair src,
979 VMRegPair dst,
980 bool is_receiver,
981 int* receiver_offset) {
982
983 // Because of the calling conventions we know that src can be a
984 // register or a stack location. dst can only be a stack location.
985
986 assert(dst.first()->is_stack(), "must be stack");
987 // must pass a handle. First figure out the location we use as a handle
988
989 if (src.first()->is_stack()) {
990 // Oop is already on the stack as an argument
991 Register rHandle = rax;
992 Label nil;
993 __ xorl(rHandle, rHandle);
994 __ cmpl(Address(rbp, reg2offset_in(src.first())), NULL_WORD);
995 __ jcc(Assembler::equal, nil);
996 __ leal(rHandle, Address(rbp, reg2offset_in(src.first())));
997 __ bind(nil);
998 __ movl(Address(rsp, reg2offset_out(dst.first())), rHandle);
999
1000 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1001 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
1002 if (is_receiver) {
1003 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
1004 }
1005 } else {
1006 // Oop is in an a register we must store it to the space we reserve
1007 // on the stack for oop_handles
1008 const Register rOop = src.first()->as_Register();
1009 const Register rHandle = rax;
1010 int oop_slot = (rOop == rcx ? 0 : 1) * VMRegImpl::slots_per_word + oop_handle_offset;
1011 int offset = oop_slot*VMRegImpl::stack_slot_size;
1012 Label skip;
1013 __ movl(Address(rsp, offset), rOop);
1014 map->set_oop(VMRegImpl::stack2reg(oop_slot));
1015 __ xorl(rHandle, rHandle);
1016 __ cmpl(rOop, NULL_WORD);
1017 __ jcc(Assembler::equal, skip);
1018 __ leal(rHandle, Address(rsp, offset));
1019 __ bind(skip);
1020 // Store the handle parameter
1021 __ movl(Address(rsp, reg2offset_out(dst.first())), rHandle);
1022 if (is_receiver) {
1023 *receiver_offset = offset;
1024 }
1025 }
1026 }
1027
1028 // A float arg may have to do float reg int reg conversion
1029 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1030 assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
1031
1032 // Because of the calling convention we know that src is either a stack location
1033 // or an xmm register. dst can only be a stack location.
1034
1035 assert(dst.first()->is_stack() && ( src.first()->is_stack() || src.first()->is_XMMRegister()), "bad parameters");
1036
1037 if (src.first()->is_stack()) {
1038 __ movl(rax, Address(rbp, reg2offset_in(src.first())));
1039 __ movl(Address(rsp, reg2offset_out(dst.first())), rax);
1040 } else {
1041 // reg to stack
1042 __ movflt(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
1043 }
1044 }
1045
1046 // A long move
1047 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1048
1049 // The only legal possibility for a long_move VMRegPair is:
1050 // 1: two stack slots (possibly unaligned)
1051 // as neither the java or C calling convention will use registers
1052 // for longs.
1053
1054 if (src.first()->is_stack() && dst.first()->is_stack()) {
1055 assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
1056 __ movl(rax, Address(rbp, reg2offset_in(src.first())));
1057 __ movl(rbx, Address(rbp, reg2offset_in(src.second())));
1058 __ movl(Address(rsp, reg2offset_out(dst.first())), rax);
1059 __ movl(Address(rsp, reg2offset_out(dst.second())), rbx);
1060 } else {
1061 ShouldNotReachHere();
1062 }
1063 }
1064
1065 // A double move
1066 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1067
1068 // The only legal possibilities for a double_move VMRegPair are:
1069 // The painful thing here is that like long_move a VMRegPair might be
1070
1071 // Because of the calling convention we know that src is either
1072 // 1: a single physical register (xmm registers only)
1073 // 2: two stack slots (possibly unaligned)
1074 // dst can only be a pair of stack slots.
1075
1076 assert(dst.first()->is_stack() && (src.first()->is_XMMRegister() || src.first()->is_stack()), "bad args");
1077
1078 if (src.first()->is_stack()) {
1079 // source is all stack
1080 __ movl(rax, Address(rbp, reg2offset_in(src.first())));
1081 __ movl(rbx, Address(rbp, reg2offset_in(src.second())));
1082 __ movl(Address(rsp, reg2offset_out(dst.first())), rax);
1083 __ movl(Address(rsp, reg2offset_out(dst.second())), rbx);
1084 } else {
1085 // reg to stack
1086 // No worries about stack alignment
1087 __ movdbl(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
1088 }
1089 }
1090
1091
1092 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1093 // We always ignore the frame_slots arg and just use the space just below frame pointer
1094 // which by this time is free to use
1095 switch (ret_type) {
1096 case T_FLOAT:
1097 __ fstp_s(Address(rbp, -wordSize));
1098 break;
1099 case T_DOUBLE:
1100 __ fstp_d(Address(rbp, -2*wordSize));
1101 break;
1102 case T_VOID: break;
1103 case T_LONG:
1104 __ movl(Address(rbp, -wordSize), rax);
1105 __ movl(Address(rbp, -2*wordSize), rdx);
1106 break;
1107 default: {
1108 __ movl(Address(rbp, -wordSize), rax);
1109 }
1110 }
1111 }
1112
1113 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1114 // We always ignore the frame_slots arg and just use the space just below frame pointer
1115 // which by this time is free to use
1116 switch (ret_type) {
1117 case T_FLOAT:
1118 __ fld_s(Address(rbp, -wordSize));
1119 break;
1120 case T_DOUBLE:
1121 __ fld_d(Address(rbp, -2*wordSize));
1122 break;
1123 case T_LONG:
1124 __ movl(rax, Address(rbp, -wordSize));
1125 __ movl(rdx, Address(rbp, -2*wordSize));
1126 break;
1127 case T_VOID: break;
1128 default: {
1129 __ movl(rax, Address(rbp, -wordSize));
1130 }
1131 }
1132 }
1133
1134 // ---------------------------------------------------------------------------
1135 // Generate a native wrapper for a given method. The method takes arguments
1136 // in the Java compiled code convention, marshals them to the native
1137 // convention (handlizes oops, etc), transitions to native, makes the call,
1138 // returns to java state (possibly blocking), unhandlizes any result and
1139 // returns.
1140 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1141 methodHandle method,
1142 int total_in_args,
1143 int comp_args_on_stack,
1144 BasicType *in_sig_bt,
1145 VMRegPair *in_regs,
1146 BasicType ret_type) {
1147
1148 // An OopMap for lock (and class if static)
1149 OopMapSet *oop_maps = new OopMapSet();
1254 stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word);
1255
1256 int stack_size = stack_slots * VMRegImpl::stack_slot_size;
1257
1258 intptr_t start = (intptr_t)__ pc();
1259
1260 // First thing make an ic check to see if we should even be here
1261
1262 // We are free to use all registers as temps without saving them and
1263 // restoring them except rbp,. rbp, is the only callee save register
1264 // as far as the interpreter and the compiler(s) are concerned.
1265
1266
1267 const Register ic_reg = rax;
1268 const Register receiver = rcx;
1269 Label hit;
1270 Label exception_pending;
1271
1272
1273 __ verify_oop(receiver);
1274 __ cmpl(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes()));
1275 __ jcc(Assembler::equal, hit);
1276
1277 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1278
1279 // verified entry must be aligned for code patching.
1280 // and the first 5 bytes must be in the same cache line
1281 // if we align at 8 then we will be sure 5 bytes are in the same line
1282 __ align(8);
1283
1284 __ bind(hit);
1285
1286 int vep_offset = ((intptr_t)__ pc()) - start;
1287
1288 #ifdef COMPILER1
1289 if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
1290 // Object.hashCode can pull the hashCode from the header word
1291 // instead of doing a full VM transition once it's been computed.
1292 // Since hashCode is usually polymorphic at call sites we can't do
1293 // this optimization at the call site without a lot of work.
1294 Label slowCase;
1295 Register receiver = rcx;
1296 Register result = rax;
1297 __ movl(result, Address(receiver, oopDesc::mark_offset_in_bytes()));
1298
1299 // check if locked
1300 __ testl (result, markOopDesc::unlocked_value);
1301 __ jcc (Assembler::zero, slowCase);
1302
1303 if (UseBiasedLocking) {
1304 // Check if biased and fall through to runtime if so
1305 __ testl (result, markOopDesc::biased_lock_bit_in_place);
1306 __ jcc (Assembler::notZero, slowCase);
1307 }
1308
1309 // get hash
1310 __ andl (result, markOopDesc::hash_mask_in_place);
1311 // test if hashCode exists
1312 __ jcc (Assembler::zero, slowCase);
1313 __ shrl (result, markOopDesc::hash_shift);
1314 __ ret(0);
1315 __ bind (slowCase);
1316 }
1317 #endif // COMPILER1
1318
1319 // The instruction at the verified entry point must be 5 bytes or longer
1320 // because it can be patched on the fly by make_non_entrant. The stack bang
1321 // instruction fits that requirement.
1322
1323 // Generate stack overflow check
1324
1325 if (UseStackBanging) {
1326 __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
1327 } else {
1328 // need a 5 byte instruction to allow MT safe patching to non-entrant
1329 __ fat_nop();
1330 }
1331
1332 // Generate a new frame for the wrapper.
1333 __ enter();
1334 // -2 because return address is already present and so is saved rbp,
1335 __ subl(rsp, stack_size - 2*wordSize);
1336
1337 // Frame is now completed as far a size and linkage.
1338
1339 int frame_complete = ((intptr_t)__ pc()) - start;
1340
1341 // Calculate the difference between rsp and rbp,. We need to know it
1342 // after the native call because on windows Java Natives will pop
1343 // the arguments and it is painful to do rsp relative addressing
1344 // in a platform independent way. So after the call we switch to
1345 // rbp, relative addressing.
1346
1347 int fp_adjustment = stack_size - 2*wordSize;
1348
1349 #ifdef COMPILER2
1350 // C2 may leave the stack dirty if not in SSE2+ mode
1351 if (UseSSE >= 2) {
1352 __ verify_FPU(0, "c2i transition should have clean FPU stack");
1353 } else {
1354 __ empty_FPU_stack();
1355 }
1436
1437 case T_LONG :
1438 long_move(masm, in_regs[i], out_regs[c_arg]);
1439 break;
1440
1441 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
1442
1443 default:
1444 simple_move32(masm, in_regs[i], out_regs[c_arg]);
1445 }
1446 }
1447
1448 // Pre-load a static method's oop into rsi. Used both by locking code and
1449 // the normal JNI call code.
1450 if (method->is_static()) {
1451
1452 // load opp into a register
1453 __ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
1454
1455 // Now handlize the static class mirror it's known not-null.
1456 __ movl(Address(rsp, klass_offset), oop_handle_reg);
1457 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1458
1459 // Now get the handle
1460 __ leal(oop_handle_reg, Address(rsp, klass_offset));
1461 // store the klass handle as second argument
1462 __ movl(Address(rsp, wordSize), oop_handle_reg);
1463 }
1464
1465 // Change state to native (we save the return address in the thread, since it might not
1466 // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
1467 // points into the right code segment. It does not have to be the correct return pc.
1468 // We use the same pc/oopMap repeatedly when we call out
1469
1470 intptr_t the_pc = (intptr_t) __ pc();
1471 oop_maps->add_gc_map(the_pc - start, map);
1472
1473 __ set_last_Java_frame(thread, rsp, noreg, (address)the_pc);
1474
1475
1476 // We have all of the arguments setup at this point. We must not touch any register
1477 // argument registers at this point (what if we save/restore them there are no oop?
1478
1479 {
1480 SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
1481 __ movoop(rax, JNIHandles::make_local(method()));
1482 __ call_VM_leaf(
1483 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
1484 thread, rax);
1485 }
1486
1487
1488 // These are register definitions we need for locking/unlocking
1489 const Register swap_reg = rax; // Must use rax, for cmpxchg instruction
1490 const Register obj_reg = rcx; // Will contain the oop
1491 const Register lock_reg = rdx; // Address of compiler lock object (BasicLock)
1492
1493 Label slow_path_lock;
1494 Label lock_done;
1495
1496 // Lock a synchronized method
1497 if (method->is_synchronized()) {
1498
1499
1500 const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
1501
1502 // Get the handle (the 2nd argument)
1503 __ movl(oop_handle_reg, Address(rsp, wordSize));
1504
1505 // Get address of the box
1506
1507 __ leal(lock_reg, Address(rbp, lock_slot_rbp_offset));
1508
1509 // Load the oop from the handle
1510 __ movl(obj_reg, Address(oop_handle_reg, 0));
1511
1512 if (UseBiasedLocking) {
1513 // Note that oop_handle_reg is trashed during this call
1514 __ biased_locking_enter(lock_reg, obj_reg, swap_reg, oop_handle_reg, false, lock_done, &slow_path_lock);
1515 }
1516
1517 // Load immediate 1 into swap_reg %rax,
1518 __ movl(swap_reg, 1);
1519
1520 // Load (object->mark() | 1) into swap_reg %rax,
1521 __ orl(swap_reg, Address(obj_reg, 0));
1522
1523 // Save (object->mark() | 1) into BasicLock's displaced header
1524 __ movl(Address(lock_reg, mark_word_offset), swap_reg);
1525
1526 if (os::is_MP()) {
1527 __ lock();
1528 }
1529
1530 // src -> dest iff dest == rax, else rax, <- dest
1531 // *obj_reg = lock_reg iff *obj_reg == rax, else rax, = *(obj_reg)
1532 __ cmpxchg(lock_reg, Address(obj_reg, 0));
1533 __ jcc(Assembler::equal, lock_done);
1534
1535 // Test if the oopMark is an obvious stack pointer, i.e.,
1536 // 1) (mark & 3) == 0, and
1537 // 2) rsp <= mark < mark + os::pagesize()
1538 // These 3 tests can be done by evaluating the following
1539 // expression: ((mark - rsp) & (3 - os::vm_page_size())),
1540 // assuming both stack pointer and pagesize have their
1541 // least significant 2 bits clear.
1542 // NOTE: the oopMark is in swap_reg %rax, as the result of cmpxchg
1543
1544 __ subl(swap_reg, rsp);
1545 __ andl(swap_reg, 3 - os::vm_page_size());
1546
1547 // Save the test result, for recursive case, the result is zero
1548 __ movl(Address(lock_reg, mark_word_offset), swap_reg);
1549 __ jcc(Assembler::notEqual, slow_path_lock);
1550 // Slow path will re-enter here
1551 __ bind(lock_done);
1552
1553 if (UseBiasedLocking) {
1554 // Re-fetch oop_handle_reg as we trashed it above
1555 __ movl(oop_handle_reg, Address(rsp, wordSize));
1556 }
1557 }
1558
1559
1560 // Finally just about ready to make the JNI call
1561
1562
1563 // get JNIEnv* which is first argument to native
1564
1565 __ leal(rdx, Address(thread, in_bytes(JavaThread::jni_environment_offset())));
1566 __ movl(Address(rsp, 0), rdx);
1567
1568 // Now set thread in native
1569 __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native);
1570
1571 __ call(RuntimeAddress(method->native_function()));
1572
1573 // WARNING - on Windows Java Natives use pascal calling convention and pop the
1574 // arguments off of the stack. We could just re-adjust the stack pointer here
1575 // and continue to do SP relative addressing but we instead switch to FP
1576 // relative addressing.
1577
1578 // Unpack native results.
1579 switch (ret_type) {
1580 case T_BOOLEAN: __ c2bool(rax); break;
1581 case T_CHAR : __ andl(rax, 0xFFFF); break;
1582 case T_BYTE : __ sign_extend_byte (rax); break;
1583 case T_SHORT : __ sign_extend_short(rax); break;
1584 case T_INT : /* nothing to do */ break;
1585 case T_DOUBLE :
1586 case T_FLOAT :
1587 // Result is in st0 we'll save as needed
1588 break;
1589 case T_ARRAY: // Really a handle
1590 case T_OBJECT: // Really a handle
1591 break; // can't de-handlize until after safepoint check
1592 case T_VOID: break;
1593 case T_LONG: break;
1594 default : ShouldNotReachHere();
1595 }
1596
1597 // Switch thread to "native transition" state before reading the synchronization state.
1598 // This additional state is necessary because reading and testing the synchronization
1599 // state is not atomic w.r.t. GC, as this scenario demonstrates:
1600 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1601 // VM thread changes sync state to synchronizing and suspends threads for GC.
1602 // Thread A is resumed to finish this native method, but doesn't block here since it
1603 // didn't see any synchronization is progress, and escapes.
1604 __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
1605
1606 if(os::is_MP()) {
1607 if (UseMembar) {
1608 __ membar(); // Force this write out before the read below
1609 } else {
1610 // Write serialization page so VM thread can do a pseudo remote membar.
1611 // We use the current thread pointer to calculate a thread specific
1612 // offset to write to within the page. This minimizes bus traffic
1613 // due to cache line collision.
1614 __ serialize_memory(thread, rcx);
1615 }
1616 }
1617
1618 if (AlwaysRestoreFPU) {
1619 // Make sure the control word is correct.
1620 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1621 }
1622
1623 // check for safepoint operation in progress and/or pending suspend requests
1624 { Label Continue;
1625
1626 __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
1627 SafepointSynchronize::_not_synchronized);
1628
1629 Label L;
1630 __ jcc(Assembler::notEqual, L);
1631 __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0);
1632 __ jcc(Assembler::equal, Continue);
1633 __ bind(L);
1634
1635 // Don't use call_VM as it will see a possible pending exception and forward it
1636 // and never return here preventing us from clearing _last_native_pc down below.
1637 // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
1638 // preserved and correspond to the bcp/locals pointers. So we do a runtime call
1639 // by hand.
1640 //
1641 save_native_result(masm, ret_type, stack_slots);
1642 __ pushl(thread);
1643 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
1644 JavaThread::check_special_condition_for_native_trans)));
1645 __ increment(rsp, wordSize);
1646 // Restore any method result value
1647 restore_native_result(masm, ret_type, stack_slots);
1648
1649 __ bind(Continue);
1650 }
1651
1652 // change thread state
1653 __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_Java);
1654
1655 Label reguard;
1656 Label reguard_done;
1657 __ cmpl(Address(thread, JavaThread::stack_guard_state_offset()), JavaThread::stack_guard_yellow_disabled);
1658 __ jcc(Assembler::equal, reguard);
1659
1660 // slow path reguard re-enters here
1661 __ bind(reguard_done);
1662
1663 // Handle possible exception (will unlock if necessary)
1664
1665 // native result if any is live
1666
1667 // Unlock
1668 Label slow_path_unlock;
1669 Label unlock_done;
1670 if (method->is_synchronized()) {
1671
1672 Label done;
1673
1674 // Get locked oop from the handle we passed to jni
1675 __ movl(obj_reg, Address(oop_handle_reg, 0));
1676
1677 if (UseBiasedLocking) {
1678 __ biased_locking_exit(obj_reg, rbx, done);
1679 }
1680
1681 // Simple recursive lock?
1682
1683 __ cmpl(Address(rbp, lock_slot_rbp_offset), NULL_WORD);
1684 __ jcc(Assembler::equal, done);
1685
1686 // Must save rax, if if it is live now because cmpxchg must use it
1687 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
1688 save_native_result(masm, ret_type, stack_slots);
1689 }
1690
1691 // get old displaced header
1692 __ movl(rbx, Address(rbp, lock_slot_rbp_offset));
1693
1694 // get address of the stack lock
1695 __ leal(rax, Address(rbp, lock_slot_rbp_offset));
1696
1697 // Atomic swap old header if oop still contains the stack lock
1698 if (os::is_MP()) {
1699 __ lock();
1700 }
1701
1702 // src -> dest iff dest == rax, else rax, <- dest
1703 // *obj_reg = rbx, iff *obj_reg == rax, else rax, = *(obj_reg)
1704 __ cmpxchg(rbx, Address(obj_reg, 0));
1705 __ jcc(Assembler::notEqual, slow_path_unlock);
1706
1707 // slow path re-enters here
1708 __ bind(unlock_done);
1709 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
1710 restore_native_result(masm, ret_type, stack_slots);
1711 }
1712
1713 __ bind(done);
1714
1715 }
1716
1717 {
1718 SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
1719 // Tell dtrace about this method exit
1720 save_native_result(masm, ret_type, stack_slots);
1721 __ movoop(rax, JNIHandles::make_local(method()));
1722 __ call_VM_leaf(
1723 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
1724 thread, rax);
1725 restore_native_result(masm, ret_type, stack_slots);
1726 }
1727
1728 // We can finally stop using that last_Java_frame we setup ages ago
1729
1730 __ reset_last_Java_frame(thread, false, true);
1731
1732 // Unpack oop result
1733 if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
1734 Label L;
1735 __ cmpl(rax, NULL_WORD);
1736 __ jcc(Assembler::equal, L);
1737 __ movl(rax, Address(rax, 0));
1738 __ bind(L);
1739 __ verify_oop(rax);
1740 }
1741
1742 // reset handle block
1743 __ movl(rcx, Address(thread, JavaThread::active_handles_offset()));
1744
1745 __ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), 0);
1746
1747 // Any exception pending?
1748 __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
1749 __ jcc(Assembler::notEqual, exception_pending);
1750
1751
1752 // no exception, we're almost done
1753
1754 // check that only result value is on FPU stack
1755 __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
1756
1757 // Fixup floating pointer results so that result looks like a return from a compiled method
1758 if (ret_type == T_FLOAT) {
1759 if (UseSSE >= 1) {
1760 // Pop st0 and store as float and reload into xmm register
1761 __ fstp_s(Address(rbp, -4));
1762 __ movflt(xmm0, Address(rbp, -4));
1763 }
1764 } else if (ret_type == T_DOUBLE) {
1765 if (UseSSE >= 2) {
1766 // Pop st0 and store as double and reload into xmm register
1767 __ fstp_d(Address(rbp, -8));
1768 __ movdbl(xmm0, Address(rbp, -8));
1769 }
1770 }
1771
1772 // Return
1773
1774 __ leave();
1775 __ ret(0);
1776
1777 // Unexpected paths are out of line and go here
1778
1779 // Slow path locking & unlocking
1780 if (method->is_synchronized()) {
1781
1782 // BEGIN Slow path lock
1783
1784 __ bind(slow_path_lock);
1785
1786 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
1787 // args are (oop obj, BasicLock* lock, JavaThread* thread)
1788 __ pushl(thread);
1789 __ pushl(lock_reg);
1790 __ pushl(obj_reg);
1791 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C)));
1792 __ addl(rsp, 3*wordSize);
1793
1794 #ifdef ASSERT
1795 { Label L;
1796 __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD);
1797 __ jcc(Assembler::equal, L);
1798 __ stop("no pending exception allowed on exit from monitorenter");
1799 __ bind(L);
1800 }
1801 #endif
1802 __ jmp(lock_done);
1803
1804 // END Slow path lock
1805
1806 // BEGIN Slow path unlock
1807 __ bind(slow_path_unlock);
1808
1809 // Slow path unlock
1810
1811 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
1812 save_native_result(masm, ret_type, stack_slots);
1813 }
1814 // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
1815
1816 __ pushl(Address(thread, in_bytes(Thread::pending_exception_offset())));
1817 __ movl(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
1818
1819
1820 // should be a peal
1821 // +wordSize because of the push above
1822 __ leal(rax, Address(rbp, lock_slot_rbp_offset));
1823 __ pushl(rax);
1824
1825 __ pushl(obj_reg);
1826 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
1827 __ addl(rsp, 2*wordSize);
1828 #ifdef ASSERT
1829 {
1830 Label L;
1831 __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
1832 __ jcc(Assembler::equal, L);
1833 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
1834 __ bind(L);
1835 }
1836 #endif /* ASSERT */
1837
1838 __ popl(Address(thread, in_bytes(Thread::pending_exception_offset())));
1839
1840 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
1841 restore_native_result(masm, ret_type, stack_slots);
1842 }
1843 __ jmp(unlock_done);
1844 // END Slow path unlock
1845
1846 }
1847
1848 // SLOW PATH Reguard the stack if needed
1849
1850 __ bind(reguard);
1851 save_native_result(masm, ret_type, stack_slots);
1852 {
1853 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)));
1854 }
1855 restore_native_result(masm, ret_type, stack_slots);
1856 __ jmp(reguard_done);
1857
1858
1866
1867 // pop our frame
1868 __ leave();
1869 // and forward the exception
1870 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
1871
1872 __ flush();
1873
1874 nmethod *nm = nmethod::new_native_nmethod(method,
1875 masm->code(),
1876 vep_offset,
1877 frame_complete,
1878 stack_slots / VMRegImpl::slots_per_word,
1879 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
1880 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
1881 oop_maps);
1882 return nm;
1883
1884 }
1885
1886 // this function returns the adjust size (in number of words) to a c2i adapter
1887 // activation for use during deoptimization
1888 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) {
1889 return (callee_locals - callee_parameters) * Interpreter::stackElementWords();
1890 }
1891
1892
1893 uint SharedRuntime::out_preserve_stack_slots() {
1894 return 0;
1895 }
1896
1897
1898 //------------------------------generate_deopt_blob----------------------------
1899 void SharedRuntime::generate_deopt_blob() {
1900 // allocate space for the code
1901 ResourceMark rm;
1902 // setup code generation tools
1903 CodeBuffer buffer("deopt_blob", 1024, 1024);
1904 MacroAssembler* masm = new MacroAssembler(&buffer);
1905 int frame_size_in_words;
1931 // At this point we need to de-opt. We save the argument return
1932 // registers. We call the first C routine, fetch_unroll_info(). This
1933 // routine captures the return values and returns a structure which
1934 // describes the current frame size and the sizes of all replacement frames.
1935 // The current frame is compiled code and may contain many inlined
1936 // functions, each with their own JVM state. We pop the current frame, then
1937 // push all the new frames. Then we call the C routine unpack_frames() to
1938 // populate these frames. Finally unpack_frames() returns us the new target
1939 // address. Notice that callee-save registers are BLOWN here; they have
1940 // already been captured in the vframeArray at the time the return PC was
1941 // patched.
1942 address start = __ pc();
1943 Label cont;
1944
1945 // Prolog for non exception case!
1946
1947 // Save everything in sight.
1948
1949 map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
1950 // Normal deoptimization
1951 __ pushl(Deoptimization::Unpack_deopt);
1952 __ jmp(cont);
1953
1954 int reexecute_offset = __ pc() - start;
1955
1956 // Reexecute case
1957 // return address is the pc describes what bci to do re-execute at
1958
1959 // No need to update map as each call to save_live_registers will produce identical oopmap
1960 (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
1961
1962 __ pushl(Deoptimization::Unpack_reexecute);
1963 __ jmp(cont);
1964
1965 int exception_offset = __ pc() - start;
1966
1967 // Prolog for exception case
1968
1969 // all registers are dead at this entry point, except for rax, and
1970 // rdx which contain the exception oop and exception pc
1971 // respectively. Set them in TLS and fall thru to the
1972 // unpack_with_exception_in_tls entry point.
1973
1974 __ get_thread(rdi);
1975 __ movl(Address(rdi, JavaThread::exception_pc_offset()), rdx);
1976 __ movl(Address(rdi, JavaThread::exception_oop_offset()), rax);
1977
1978 int exception_in_tls_offset = __ pc() - start;
1979
1980 // new implementation because exception oop is now passed in JavaThread
1981
1982 // Prolog for exception case
1983 // All registers must be preserved because they might be used by LinearScan
1984 // Exceptiop oop and throwing PC are passed in JavaThread
1985 // tos: stack at point of call to method that threw the exception (i.e. only
1986 // args are on the stack, no return address)
1987
1988 // make room on stack for the return address
1989 // It will be patched later with the throwing pc. The correct value is not
1990 // available now because loading it from memory would destroy registers.
1991 __ pushl(0);
1992
1993 // Save everything in sight.
1994
1995 // No need to update map as each call to save_live_registers will produce identical oopmap
1996 (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
1997
1998 // Now it is safe to overwrite any register
1999
2000 // store the correct deoptimization type
2001 __ pushl(Deoptimization::Unpack_exception);
2002
2003 // load throwing pc from JavaThread and patch it as the return address
2004 // of the current frame. Then clear the field in JavaThread
2005 __ get_thread(rdi);
2006 __ movl(rdx, Address(rdi, JavaThread::exception_pc_offset()));
2007 __ movl(Address(rbp, wordSize), rdx);
2008 __ movl(Address(rdi, JavaThread::exception_pc_offset()), NULL_WORD);
2009
2010 #ifdef ASSERT
2011 // verify that there is really an exception oop in JavaThread
2012 __ movl(rax, Address(rdi, JavaThread::exception_oop_offset()));
2013 __ verify_oop(rax);
2014
2015 // verify that there is no pending exception
2016 Label no_pending_exception;
2017 __ movl(rax, Address(rdi, Thread::pending_exception_offset()));
2018 __ testl(rax, rax);
2019 __ jcc(Assembler::zero, no_pending_exception);
2020 __ stop("must not have pending exception here");
2021 __ bind(no_pending_exception);
2022 #endif
2023
2024 __ bind(cont);
2025
2026 // Compiled code leaves the floating point stack dirty, empty it.
2027 __ empty_FPU_stack();
2028
2029
2030 // Call C code. Need thread and this frame, but NOT official VM entry
2031 // crud. We cannot block on this call, no GC can happen.
2032 __ get_thread(rcx);
2033 __ pushl(rcx);
2034 // fetch_unroll_info needs to call last_java_frame()
2035 __ set_last_Java_frame(rcx, noreg, noreg, NULL);
2036
2037 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2038
2039 // Need to have an oopmap that tells fetch_unroll_info where to
2040 // find any register it might need.
2041
2042 oop_maps->add_gc_map( __ pc()-start, map);
2043
2044 // Discard arg to fetch_unroll_info
2045 __ popl(rcx);
2046
2047 __ get_thread(rcx);
2048 __ reset_last_Java_frame(rcx, false, false);
2049
2050 // Load UnrollBlock into EDI
2051 __ movl(rdi, rax);
2052
2053 // Move the unpack kind to a safe place in the UnrollBlock because
2054 // we are very short of registers
2055
2056 Address unpack_kind(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
2057 // retrieve the deopt kind from where we left it.
2058 __ popl(rax);
2059 __ movl(unpack_kind, rax); // save the unpack_kind value
2060
2061 Label noException;
2062 __ cmpl(rax, Deoptimization::Unpack_exception); // Was exception pending?
2063 __ jcc(Assembler::notEqual, noException);
2064 __ movl(rax, Address(rcx, JavaThread::exception_oop_offset()));
2065 __ movl(rdx, Address(rcx, JavaThread::exception_pc_offset()));
2066 __ movl(Address(rcx, JavaThread::exception_oop_offset()), NULL_WORD);
2067 __ movl(Address(rcx, JavaThread::exception_pc_offset()), NULL_WORD);
2068
2069 __ verify_oop(rax);
2070
2071 // Overwrite the result registers with the exception results.
2072 __ movl(Address(rsp, RegisterSaver::raxOffset()*wordSize), rax);
2073 __ movl(Address(rsp, RegisterSaver::rdxOffset()*wordSize), rdx);
2074
2075 __ bind(noException);
2076
2077 // Stack is back to only having register save data on the stack.
2078 // Now restore the result registers. Everything else is either dead or captured
2079 // in the vframeArray.
2080
2081 RegisterSaver::restore_result_registers(masm);
2082
2083 // All of the register save area has been popped of the stack. Only the
2084 // return address remains.
2085
2086 // Pop all the frames we must move/replace.
2087 //
2088 // Frame picture (youngest to oldest)
2089 // 1: self-frame (no frame link)
2090 // 2: deopting frame (no frame link)
2091 // 3: caller of deopting frame (could be compiled/interpreted).
2092 //
2093 // Note: by leaving the return address of self-frame on the stack
2094 // and using the size of frame 2 to adjust the stack
2095 // when we are done the return to frame 3 will still be on the stack.
2096
2097 // Pop deoptimized frame
2098 __ addl(rsp,Address(rdi,Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
2099
2100 // sp should be pointing at the return address to the caller (3)
2101
2102 // Stack bang to make sure there's enough room for these interpreter frames.
2103 if (UseStackBanging) {
2104 __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2105 __ bang_stack_size(rbx, rcx);
2106 }
2107
2108 // Load array of frame pcs into ECX
2109 __ movl(rcx,Address(rdi,Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2110
2111 __ popl(rsi); // trash the old pc
2112
2113 // Load array of frame sizes into ESI
2114 __ movl(rsi,Address(rdi,Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2115
2116 Address counter(rdi, Deoptimization::UnrollBlock::counter_temp_offset_in_bytes());
2117
2118 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2119 __ movl(counter, rbx);
2120
2121 // Pick up the initial fp we should save
2122 __ movl(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_fp_offset_in_bytes()));
2123
2124 // Now adjust the caller's stack to make up for the extra locals
2125 // but record the original sp so that we can save it in the skeletal interpreter
2126 // frame and the stack walking of interpreter_sender will get the unextended sp
2127 // value and not the "real" sp value.
2128
2129 Address sp_temp(rdi, Deoptimization::UnrollBlock::sender_sp_temp_offset_in_bytes());
2130 __ movl(sp_temp, rsp);
2131 __ subl(rsp, Address(rdi, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2132
2133 // Push interpreter frames in a loop
2134 Label loop;
2135 __ bind(loop);
2136 __ movl(rbx, Address(rsi, 0)); // Load frame size
2137 #ifdef CC_INTERP
2138 __ subl(rbx, 4*wordSize); // we'll push pc and ebp by hand and
2139 #ifdef ASSERT
2140 __ pushl(0xDEADDEAD); // Make a recognizable pattern
2141 __ pushl(0xDEADDEAD);
2142 #else /* ASSERT */
2143 __ subl(rsp, 2*wordSize); // skip the "static long no_param"
2144 #endif /* ASSERT */
2145 #else /* CC_INTERP */
2146 __ subl(rbx, 2*wordSize); // we'll push pc and rbp, by hand
2147 #endif /* CC_INTERP */
2148 __ pushl(Address(rcx, 0)); // save return address
2149 __ enter(); // save old & set new rbp,
2150 __ subl(rsp, rbx); // Prolog!
2151 __ movl(rbx, sp_temp); // sender's sp
2152 #ifdef CC_INTERP
2153 __ movl(Address(rbp,
2154 -(sizeof(BytecodeInterpreter)) + in_bytes(byte_offset_of(BytecodeInterpreter, _sender_sp))),
2155 rbx); // Make it walkable
2156 #else /* CC_INTERP */
2157 // This value is corrected by layout_activation_impl
2158 __ movl(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD );
2159 __ movl(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), rbx); // Make it walkable
2160 #endif /* CC_INTERP */
2161 __ movl(sp_temp, rsp); // pass to next frame
2162 __ addl(rsi, 4); // Bump array pointer (sizes)
2163 __ addl(rcx, 4); // Bump array pointer (pcs)
2164 __ decrement(counter); // decrement counter
2165 __ jcc(Assembler::notZero, loop);
2166 __ pushl(Address(rcx, 0)); // save final return address
2167
2168 // Re-push self-frame
2169 __ enter(); // save old & set new rbp,
2170
2171 // Return address and rbp, are in place
2172 // We'll push additional args later. Just allocate a full sized
2173 // register save area
2174 __ subl(rsp, (frame_size_in_words-additional_words - 2) * wordSize);
2175
2176 // Restore frame locals after moving the frame
2177 __ movl(Address(rsp, RegisterSaver::raxOffset()*wordSize), rax);
2178 __ movl(Address(rsp, RegisterSaver::rdxOffset()*wordSize), rdx);
2179 __ fstp_d(Address(rsp, RegisterSaver::fpResultOffset()*wordSize)); // Pop float stack and store in local
2180 if( UseSSE>=2 ) __ movdbl(Address(rsp, RegisterSaver::xmm0Offset()*wordSize), xmm0);
2181 if( UseSSE==1 ) __ movflt(Address(rsp, RegisterSaver::xmm0Offset()*wordSize), xmm0);
2182
2183 // Set up the args to unpack_frame
2184
2185 __ pushl(unpack_kind); // get the unpack_kind value
2186 __ get_thread(rcx);
2187 __ pushl(rcx);
2188
2189 // set last_Java_sp, last_Java_fp
2190 __ set_last_Java_frame(rcx, noreg, rbp, NULL);
2191
2192 // Call C code. Need thread but NOT official VM entry
2193 // crud. We cannot block on this call, no GC can happen. Call should
2194 // restore return values to their stack-slots with the new SP.
2195 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
2196 // Set an oopmap for the call site
2197 oop_maps->add_gc_map( __ pc()-start, new OopMap( frame_size_in_words, 0 ));
2198
2199 // rax, contains the return result type
2200 __ pushl(rax);
2201
2202 __ get_thread(rcx);
2203 __ reset_last_Java_frame(rcx, false, false);
2204
2205 // Collect return values
2206 __ movl(rax,Address(rsp, (RegisterSaver::raxOffset() + additional_words + 1)*wordSize));
2207 __ movl(rdx,Address(rsp, (RegisterSaver::rdxOffset() + additional_words + 1)*wordSize));
2208
2209 // Clear floating point stack before returning to interpreter
2210 __ empty_FPU_stack();
2211
2212 // Check if we should push the float or double return value.
2213 Label results_done, yes_double_value;
2214 __ cmpl(Address(rsp, 0), T_DOUBLE);
2215 __ jcc (Assembler::zero, yes_double_value);
2216 __ cmpl(Address(rsp, 0), T_FLOAT);
2217 __ jcc (Assembler::notZero, results_done);
2218
2219 // return float value as expected by interpreter
2220 if( UseSSE>=1 ) __ movflt(xmm0, Address(rsp, (RegisterSaver::xmm0Offset() + additional_words + 1)*wordSize));
2221 else __ fld_d(Address(rsp, (RegisterSaver::fpResultOffset() + additional_words + 1)*wordSize));
2222 __ jmp(results_done);
2223
2224 // return double value as expected by interpreter
2225 __ bind(yes_double_value);
2226 if( UseSSE>=2 ) __ movdbl(xmm0, Address(rsp, (RegisterSaver::xmm0Offset() + additional_words + 1)*wordSize));
2227 else __ fld_d(Address(rsp, (RegisterSaver::fpResultOffset() + additional_words + 1)*wordSize));
2248 void SharedRuntime::generate_uncommon_trap_blob() {
2249 // allocate space for the code
2250 ResourceMark rm;
2251 // setup code generation tools
2252 CodeBuffer buffer("uncommon_trap_blob", 512, 512);
2253 MacroAssembler* masm = new MacroAssembler(&buffer);
2254
2255 enum frame_layout {
2256 arg0_off, // thread sp + 0 // Arg location for
2257 arg1_off, // unloaded_class_index sp + 1 // calling C
2258 // The frame sender code expects that rbp will be in the "natural" place and
2259 // will override any oopMap setting for it. We must therefore force the layout
2260 // so that it agrees with the frame sender code.
2261 rbp_off, // callee saved register sp + 2
2262 return_off, // slot for return address sp + 3
2263 framesize
2264 };
2265
2266 address start = __ pc();
2267 // Push self-frame.
2268 __ subl(rsp, return_off*wordSize); // Epilog!
2269
2270 // rbp, is an implicitly saved callee saved register (i.e. the calling
2271 // convention will save restore it in prolog/epilog) Other than that
2272 // there are no callee save registers no that adapter frames are gone.
2273 __ movl(Address(rsp, rbp_off*wordSize),rbp);
2274
2275 // Clear the floating point exception stack
2276 __ empty_FPU_stack();
2277
2278 // set last_Java_sp
2279 __ get_thread(rdx);
2280 __ set_last_Java_frame(rdx, noreg, noreg, NULL);
2281
2282 // Call C code. Need thread but NOT official VM entry
2283 // crud. We cannot block on this call, no GC can happen. Call should
2284 // capture callee-saved registers as well as return values.
2285 __ movl(Address(rsp, arg0_off*wordSize),rdx);
2286 // argument already in ECX
2287 __ movl(Address(rsp, arg1_off*wordSize),rcx);
2288 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
2289
2290 // Set an oopmap for the call site
2291 OopMapSet *oop_maps = new OopMapSet();
2292 OopMap* map = new OopMap( framesize, 0 );
2293 // No oopMap for rbp, it is known implicitly
2294
2295 oop_maps->add_gc_map( __ pc()-start, map);
2296
2297 __ get_thread(rcx);
2298
2299 __ reset_last_Java_frame(rcx, false, false);
2300
2301 // Load UnrollBlock into EDI
2302 __ movl(rdi, rax);
2303
2304 // Pop all the frames we must move/replace.
2305 //
2306 // Frame picture (youngest to oldest)
2307 // 1: self-frame (no frame link)
2308 // 2: deopting frame (no frame link)
2309 // 3: caller of deopting frame (could be compiled/interpreted).
2310
2311 // Pop self-frame. We have no frame, and must rely only on EAX and ESP.
2312 __ addl(rsp,(framesize-1)*wordSize); // Epilog!
2313
2314 // Pop deoptimized frame
2315 __ addl(rsp,Address(rdi,Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
2316
2317 // sp should be pointing at the return address to the caller (3)
2318
2319 // Stack bang to make sure there's enough room for these interpreter frames.
2320 if (UseStackBanging) {
2321 __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2322 __ bang_stack_size(rbx, rcx);
2323 }
2324
2325
2326 // Load array of frame pcs into ECX
2327 __ movl(rcx,Address(rdi,Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2328
2329 __ popl(rsi); // trash the pc
2330
2331 // Load array of frame sizes into ESI
2332 __ movl(rsi,Address(rdi,Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2333
2334 Address counter(rdi, Deoptimization::UnrollBlock::counter_temp_offset_in_bytes());
2335
2336 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2337 __ movl(counter, rbx);
2338
2339 // Pick up the initial fp we should save
2340 __ movl(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_fp_offset_in_bytes()));
2341
2342 // Now adjust the caller's stack to make up for the extra locals
2343 // but record the original sp so that we can save it in the skeletal interpreter
2344 // frame and the stack walking of interpreter_sender will get the unextended sp
2345 // value and not the "real" sp value.
2346
2347 Address sp_temp(rdi, Deoptimization::UnrollBlock::sender_sp_temp_offset_in_bytes());
2348 __ movl(sp_temp, rsp);
2349 __ subl(rsp, Address(rdi, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2350
2351 // Push interpreter frames in a loop
2352 Label loop;
2353 __ bind(loop);
2354 __ movl(rbx, Address(rsi, 0)); // Load frame size
2355 #ifdef CC_INTERP
2356 __ subl(rbx, 4*wordSize); // we'll push pc and ebp by hand and
2357 #ifdef ASSERT
2358 __ pushl(0xDEADDEAD); // Make a recognizable pattern
2359 __ pushl(0xDEADDEAD); // (parm to RecursiveInterpreter...)
2360 #else /* ASSERT */
2361 __ subl(rsp, 2*wordSize); // skip the "static long no_param"
2362 #endif /* ASSERT */
2363 #else /* CC_INTERP */
2364 __ subl(rbx, 2*wordSize); // we'll push pc and rbp, by hand
2365 #endif /* CC_INTERP */
2366 __ pushl(Address(rcx, 0)); // save return address
2367 __ enter(); // save old & set new rbp,
2368 __ subl(rsp, rbx); // Prolog!
2369 __ movl(rbx, sp_temp); // sender's sp
2370 #ifdef CC_INTERP
2371 __ movl(Address(rbp,
2372 -(sizeof(BytecodeInterpreter)) + in_bytes(byte_offset_of(BytecodeInterpreter, _sender_sp))),
2373 rbx); // Make it walkable
2374 #else /* CC_INTERP */
2375 // This value is corrected by layout_activation_impl
2376 __ movl(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD );
2377 __ movl(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), rbx); // Make it walkable
2378 #endif /* CC_INTERP */
2379 __ movl(sp_temp, rsp); // pass to next frame
2380 __ addl(rsi, 4); // Bump array pointer (sizes)
2381 __ addl(rcx, 4); // Bump array pointer (pcs)
2382 __ decrement(counter); // decrement counter
2383 __ jcc(Assembler::notZero, loop);
2384 __ pushl(Address(rcx, 0)); // save final return address
2385
2386 // Re-push self-frame
2387 __ enter(); // save old & set new rbp,
2388 __ subl(rsp, (framesize-2) * wordSize); // Prolog!
2389
2390
2391 // set last_Java_sp, last_Java_fp
2392 __ get_thread(rdi);
2393 __ set_last_Java_frame(rdi, noreg, rbp, NULL);
2394
2395 // Call C code. Need thread but NOT official VM entry
2396 // crud. We cannot block on this call, no GC can happen. Call should
2397 // restore return values to their stack-slots with the new SP.
2398 __ movl(Address(rsp,arg0_off*wordSize),rdi);
2399 __ movl(Address(rsp,arg1_off*wordSize), Deoptimization::Unpack_uncommon_trap);
2400 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
2401 // Set an oopmap for the call site
2402 oop_maps->add_gc_map( __ pc()-start, new OopMap( framesize, 0 ) );
2403
2404 __ get_thread(rdi);
2405 __ reset_last_Java_frame(rdi, true, false);
2406
2407 // Pop self-frame.
2408 __ leave(); // Epilog!
2409
2410 // Jump to interpreter
2411 __ ret(0);
2412
2413 // -------------
2414 // make sure all code is generated
2415 masm->flush();
2416
2417 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize);
2418 }
2435 ResourceMark rm;
2436 OopMapSet *oop_maps = new OopMapSet();
2437 OopMap* map;
2438
2439 // allocate space for the code
2440 // setup code generation tools
2441 CodeBuffer buffer("handler_blob", 1024, 512);
2442 MacroAssembler* masm = new MacroAssembler(&buffer);
2443
2444 const Register java_thread = rdi; // callee-saved for VC++
2445 address start = __ pc();
2446 address call_pc = NULL;
2447
2448 // If cause_return is true we are at a poll_return and there is
2449 // the return address on the stack to the caller on the nmethod
2450 // that is safepoint. We can leave this return on the stack and
2451 // effectively complete the return and safepoint in the caller.
2452 // Otherwise we push space for a return address that the safepoint
2453 // handler will install later to make the stack walking sensible.
2454 if( !cause_return )
2455 __ pushl(rbx); // Make room for return address (or push it again)
2456
2457 map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false);
2458
2459 // The following is basically a call_VM. However, we need the precise
2460 // address of the call in order to generate an oopmap. Hence, we do all the
2461 // work ourselves.
2462
2463 // Push thread argument and setup last_Java_sp
2464 __ get_thread(java_thread);
2465 __ pushl(java_thread);
2466 __ set_last_Java_frame(java_thread, noreg, noreg, NULL);
2467
2468 // if this was not a poll_return then we need to correct the return address now.
2469 if( !cause_return ) {
2470 __ movl(rax, Address(java_thread, JavaThread::saved_exception_pc_offset()));
2471 __ movl(Address(rbp, wordSize), rax);
2472 }
2473
2474 // do the call
2475 __ call(RuntimeAddress(call_ptr));
2476
2477 // Set an oopmap for the call site. This oopmap will map all
2478 // oop-registers and debug-info registers as callee-saved. This
2479 // will allow deoptimization at this safepoint to find all possible
2480 // debug-info recordings, as well as let GC find all oops.
2481
2482 oop_maps->add_gc_map( __ pc() - start, map);
2483
2484 // Discard arg
2485 __ popl(rcx);
2486
2487 Label noException;
2488
2489 // Clear last_Java_sp again
2490 __ get_thread(java_thread);
2491 __ reset_last_Java_frame(java_thread, false, false);
2492
2493 __ cmpl(Address(java_thread, Thread::pending_exception_offset()), NULL_WORD);
2494 __ jcc(Assembler::equal, noException);
2495
2496 // Exception pending
2497
2498 RegisterSaver::restore_live_registers(masm);
2499
2500 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2501
2502 __ bind(noException);
2503
2504 // Normal exit, register restoring and exit
2505 RegisterSaver::restore_live_registers(masm);
2506
2507 __ ret(0);
2508
2509 // make sure all code is generated
2510 masm->flush();
2511
2512 // Fill-out other meta info
2513 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
2530 CodeBuffer buffer(name, 1000, 512);
2531 MacroAssembler* masm = new MacroAssembler(&buffer);
2532
2533 int frame_size_words;
2534 enum frame_layout {
2535 thread_off,
2536 extra_words };
2537
2538 OopMapSet *oop_maps = new OopMapSet();
2539 OopMap* map = NULL;
2540
2541 int start = __ offset();
2542
2543 map = RegisterSaver::save_live_registers(masm, extra_words, &frame_size_words);
2544
2545 int frame_complete = __ offset();
2546
2547 const Register thread = rdi;
2548 __ get_thread(rdi);
2549
2550 __ pushl(thread);
2551 __ set_last_Java_frame(thread, noreg, rbp, NULL);
2552
2553 __ call(RuntimeAddress(destination));
2554
2555
2556 // Set an oopmap for the call site.
2557 // We need this not only for callee-saved registers, but also for volatile
2558 // registers that the compiler might be keeping live across a safepoint.
2559
2560 oop_maps->add_gc_map( __ offset() - start, map);
2561
2562 // rax, contains the address we are going to jump to assuming no exception got installed
2563
2564 __ addl(rsp, wordSize);
2565
2566 // clear last_Java_sp
2567 __ reset_last_Java_frame(thread, true, false);
2568 // check for pending exceptions
2569 Label pending;
2570 __ cmpl(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
2571 __ jcc(Assembler::notEqual, pending);
2572
2573 // get the returned methodOop
2574 __ movl(rbx, Address(thread, JavaThread::vm_result_offset()));
2575 __ movl(Address(rsp, RegisterSaver::rbx_offset() * wordSize), rbx);
2576
2577 __ movl(Address(rsp, RegisterSaver::rax_offset() * wordSize), rax);
2578
2579 RegisterSaver::restore_live_registers(masm);
2580
2581 // We are back the the original state on entry and ready to go.
2582
2583 __ jmp(rax);
2584
2585 // Pending exception after the safepoint
2586
2587 __ bind(pending);
2588
2589 RegisterSaver::restore_live_registers(masm);
2590
2591 // exception pending => remove activation and forward to exception handler
2592
2593 __ get_thread(thread);
2594 __ movl(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
2595 __ movl(rax, Address(thread, Thread::pending_exception_offset()));
2596 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2597
2598 // -------------
2599 // make sure all code is generated
2600 masm->flush();
2601
2602 // return the blob
2603 // frame_size_words or bytes??
2604 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
2605 }
2606
2607 void SharedRuntime::generate_stubs() {
2608
2609 _wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method),
2610 "wrong_method_stub");
2611
2612 _ic_miss_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method_ic_miss),
2613 "ic_miss_stub");
2614
2615 _resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_opt_virtual_call_C),
|
1 /*
2 * Copyright 2003-2008 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
101
102 // During deoptimization only the result register need to be restored
103 // all the other values have already been extracted.
104
105 static void restore_result_registers(MacroAssembler* masm);
106
107 };
108
109 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words,
110 int* total_frame_words, bool verify_fpu) {
111
112 int frame_size_in_bytes = (reg_save_size + additional_frame_words) * wordSize;
113 int frame_words = frame_size_in_bytes / wordSize;
114 *total_frame_words = frame_words;
115
116 assert(FPUStateSizeInWords == 27, "update stack layout");
117
118 // save registers, fpu state, and flags
119 // We assume caller has already has return address slot on the stack
120 // We push epb twice in this sequence because we want the real rbp,
121 // to be under the return like a normal enter and we want to use pusha
122 // We push by hand instead of pusing push
123 __ enter();
124 __ pusha();
125 __ pushf();
126 __ subptr(rsp,FPU_regs_live*sizeof(jdouble)); // Push FPU registers space
127 __ push_FPU_state(); // Save FPU state & init
128
129 if (verify_fpu) {
130 // Some stubs may have non standard FPU control word settings so
131 // only check and reset the value when it required to be the
132 // standard value. The safepoint blob in particular can be used
133 // in methods which are using the 24 bit control word for
134 // optimized float math.
135
136 #ifdef ASSERT
137 // Make sure the control word has the expected value
138 Label ok;
139 __ cmpw(Address(rsp, 0), StubRoutines::fpu_cntrl_wrd_std());
140 __ jccb(Assembler::equal, ok);
141 __ stop("corrupted control word detected");
142 __ bind(ok);
143 #endif
144
145 // Reset the control word to guard against exceptions being unmasked
146 // since fstp_d can cause FPU stack underflow exceptions. Write it
253 if( UseSSE == 1 ) {
254 __ movflt(xmm0,Address(rsp,xmm0_off*wordSize));
255 __ movflt(xmm1,Address(rsp,xmm1_off*wordSize));
256 __ movflt(xmm2,Address(rsp,xmm2_off*wordSize));
257 __ movflt(xmm3,Address(rsp,xmm3_off*wordSize));
258 __ movflt(xmm4,Address(rsp,xmm4_off*wordSize));
259 __ movflt(xmm5,Address(rsp,xmm5_off*wordSize));
260 __ movflt(xmm6,Address(rsp,xmm6_off*wordSize));
261 __ movflt(xmm7,Address(rsp,xmm7_off*wordSize));
262 } else if( UseSSE >= 2 ) {
263 __ movdbl(xmm0,Address(rsp,xmm0_off*wordSize));
264 __ movdbl(xmm1,Address(rsp,xmm1_off*wordSize));
265 __ movdbl(xmm2,Address(rsp,xmm2_off*wordSize));
266 __ movdbl(xmm3,Address(rsp,xmm3_off*wordSize));
267 __ movdbl(xmm4,Address(rsp,xmm4_off*wordSize));
268 __ movdbl(xmm5,Address(rsp,xmm5_off*wordSize));
269 __ movdbl(xmm6,Address(rsp,xmm6_off*wordSize));
270 __ movdbl(xmm7,Address(rsp,xmm7_off*wordSize));
271 }
272 __ pop_FPU_state();
273 __ addptr(rsp, FPU_regs_live*sizeof(jdouble)); // Pop FPU registers
274
275 __ popf();
276 __ popa();
277 // Get the rbp, described implicitly by the frame sender code (no oopMap)
278 __ pop(rbp);
279
280 }
281
282 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
283
284 // Just restore result register. Only used by deoptimization. By
285 // now any callee save register that needs to be restore to a c2
286 // caller of the deoptee has been extracted into the vframeArray
287 // and will be stuffed into the c2i adapter we create for later
288 // restoration so only result registers need to be restored here.
289 //
290
291 __ frstor(Address(rsp, 0)); // Restore fpu state
292
293 // Recover XMM & FPU state
294 if( UseSSE == 1 ) {
295 __ movflt(xmm0, Address(rsp, xmm0_off*wordSize));
296 } else if( UseSSE >= 2 ) {
297 __ movdbl(xmm0, Address(rsp, xmm0_off*wordSize));
298 }
299 __ movptr(rax, Address(rsp, rax_off*wordSize));
300 __ movptr(rdx, Address(rsp, rdx_off*wordSize));
301 // Pop all of the register save are off the stack except the return address
302 __ addptr(rsp, return_off * wordSize);
303 }
304
305 // The java_calling_convention describes stack locations as ideal slots on
306 // a frame with no abi restrictions. Since we must observe abi restrictions
307 // (like the placement of the register window) the slots must be biased by
308 // the following value.
309 static int reg2offset_in(VMReg r) {
310 // Account for saved rbp, and return address
311 // This should really be in_preserve_stack_slots
312 return (r->reg2stack() + 2) * VMRegImpl::stack_slot_size;
313 }
314
315 static int reg2offset_out(VMReg r) {
316 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
317 }
318
319 // ---------------------------------------------------------------------------
320 // Read the array of BasicTypes from a signature, and compute where the
321 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
322 // quantities. Values less than SharedInfo::stack0 are registers, those above
431 regs[i].set2(VMRegImpl::stack2reg(dstack));
432 dstack += 2;
433 }
434 break;
435 case T_VOID: regs[i].set_bad(); break;
436 break;
437 default:
438 ShouldNotReachHere();
439 break;
440 }
441 }
442
443 // return value can be odd number of VMRegImpl stack slots make multiple of 2
444 return round_to(stack, 2);
445 }
446
447 // Patch the callers callsite with entry to compiled code if it exists.
448 static void patch_callers_callsite(MacroAssembler *masm) {
449 Label L;
450 __ verify_oop(rbx);
451 __ cmpptr(Address(rbx, in_bytes(methodOopDesc::code_offset())), (int32_t)NULL_WORD);
452 __ jcc(Assembler::equal, L);
453 // Schedule the branch target address early.
454 // Call into the VM to patch the caller, then jump to compiled callee
455 // rax, isn't live so capture return address while we easily can
456 __ movptr(rax, Address(rsp, 0));
457 __ pusha();
458 __ pushf();
459
460 if (UseSSE == 1) {
461 __ subptr(rsp, 2*wordSize);
462 __ movflt(Address(rsp, 0), xmm0);
463 __ movflt(Address(rsp, wordSize), xmm1);
464 }
465 if (UseSSE >= 2) {
466 __ subptr(rsp, 4*wordSize);
467 __ movdbl(Address(rsp, 0), xmm0);
468 __ movdbl(Address(rsp, 2*wordSize), xmm1);
469 }
470 #ifdef COMPILER2
471 // C2 may leave the stack dirty if not in SSE2+ mode
472 if (UseSSE >= 2) {
473 __ verify_FPU(0, "c2i transition should have clean FPU stack");
474 } else {
475 __ empty_FPU_stack();
476 }
477 #endif /* COMPILER2 */
478
479 // VM needs caller's callsite
480 __ push(rax);
481 // VM needs target method
482 __ push(rbx);
483 __ verify_oop(rbx);
484 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
485 __ addptr(rsp, 2*wordSize);
486
487 if (UseSSE == 1) {
488 __ movflt(xmm0, Address(rsp, 0));
489 __ movflt(xmm1, Address(rsp, wordSize));
490 __ addptr(rsp, 2*wordSize);
491 }
492 if (UseSSE >= 2) {
493 __ movdbl(xmm0, Address(rsp, 0));
494 __ movdbl(xmm1, Address(rsp, 2*wordSize));
495 __ addptr(rsp, 4*wordSize);
496 }
497
498 __ popf();
499 __ popa();
500 __ bind(L);
501 }
502
503
504 // Helper function to put tags in interpreter stack.
505 static void tag_stack(MacroAssembler *masm, const BasicType sig, int st_off) {
506 if (TaggedStackInterpreter) {
507 int tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(0);
508 if (sig == T_OBJECT || sig == T_ARRAY) {
509 __ movptr(Address(rsp, tag_offset), frame::TagReference);
510 } else if (sig == T_LONG || sig == T_DOUBLE) {
511 int next_tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(1);
512 __ movptr(Address(rsp, next_tag_offset), frame::TagValue);
513 __ movptr(Address(rsp, tag_offset), frame::TagValue);
514 } else {
515 __ movptr(Address(rsp, tag_offset), frame::TagValue);
516 }
517 }
518 }
519
520 // Double and long values with Tagged stacks are not contiguous.
521 static void move_c2i_double(MacroAssembler *masm, XMMRegister r, int st_off) {
522 int next_off = st_off - Interpreter::stackElementSize();
523 if (TaggedStackInterpreter) {
524 __ movdbl(Address(rsp, next_off), r);
525 // Move top half up and put tag in the middle.
526 __ movl(rdi, Address(rsp, next_off+wordSize));
527 __ movl(Address(rsp, st_off), rdi);
528 tag_stack(masm, T_DOUBLE, next_off);
529 } else {
530 __ movdbl(Address(rsp, next_off), r);
531 }
532 }
533
534 static void gen_c2i_adapter(MacroAssembler *masm,
535 int total_args_passed,
544 // compiled target. If there is one, we need to patch the caller's call.
545 patch_callers_callsite(masm);
546
547 __ bind(skip_fixup);
548
549 #ifdef COMPILER2
550 // C2 may leave the stack dirty if not in SSE2+ mode
551 if (UseSSE >= 2) {
552 __ verify_FPU(0, "c2i transition should have clean FPU stack");
553 } else {
554 __ empty_FPU_stack();
555 }
556 #endif /* COMPILER2 */
557
558 // Since all args are passed on the stack, total_args_passed * interpreter_
559 // stack_element_size is the
560 // space we need.
561 int extraspace = total_args_passed * Interpreter::stackElementSize();
562
563 // Get return address
564 __ pop(rax);
565
566 // set senderSP value
567 __ movptr(rsi, rsp);
568
569 __ subptr(rsp, extraspace);
570
571 // Now write the args into the outgoing interpreter space
572 for (int i = 0; i < total_args_passed; i++) {
573 if (sig_bt[i] == T_VOID) {
574 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
575 continue;
576 }
577
578 // st_off points to lowest address on stack.
579 int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize();
580 int next_off = st_off - Interpreter::stackElementSize();
581
582 // Say 4 args:
583 // i st_off
584 // 0 12 T_LONG
585 // 1 8 T_VOID
586 // 2 4 T_OBJECT
587 // 3 0 T_BOOL
588 VMReg r_1 = regs[i].first();
589 VMReg r_2 = regs[i].second();
590 if (!r_1->is_valid()) {
591 assert(!r_2->is_valid(), "");
592 continue;
593 }
594
595 if (r_1->is_stack()) {
596 // memory to memory use fpu stack top
597 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
598
599 if (!r_2->is_valid()) {
600 __ movl(rdi, Address(rsp, ld_off));
601 __ movptr(Address(rsp, st_off), rdi);
602 tag_stack(masm, sig_bt[i], st_off);
603 } else {
604
605 // ld_off == LSW, ld_off+VMRegImpl::stack_slot_size == MSW
606 // st_off == MSW, st_off-wordSize == LSW
607
608 __ movptr(rdi, Address(rsp, ld_off));
609 __ movptr(Address(rsp, next_off), rdi);
610 #ifndef _LP64
611 __ movptr(rdi, Address(rsp, ld_off + wordSize));
612 __ movptr(Address(rsp, st_off), rdi);
613 #else
614 #ifdef ASSERT
615 // Overwrite the unused slot with known junk
616 __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
617 __ movptr(Address(rsp, st_off), rax);
618 #endif /* ASSERT */
619 #endif // _LP64
620 tag_stack(masm, sig_bt[i], next_off);
621 }
622 } else if (r_1->is_Register()) {
623 Register r = r_1->as_Register();
624 if (!r_2->is_valid()) {
625 __ movl(Address(rsp, st_off), r);
626 tag_stack(masm, sig_bt[i], st_off);
627 } else {
628 // long/double in gpr
629 NOT_LP64(ShouldNotReachHere());
630 // Two VMRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
631 // T_DOUBLE and T_LONG use two slots in the interpreter
632 if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
633 // long/double in gpr
634 #ifdef ASSERT
635 // Overwrite the unused slot with known junk
636 LP64_ONLY(__ mov64(rax, CONST64(0xdeadffffdeadaaab)));
637 __ movptr(Address(rsp, st_off), rax);
638 #endif /* ASSERT */
639 __ movptr(Address(rsp, next_off), r);
640 tag_stack(masm, sig_bt[i], next_off);
641 } else {
642 __ movptr(Address(rsp, st_off), r);
643 tag_stack(masm, sig_bt[i], st_off);
644 }
645 }
646 } else {
647 assert(r_1->is_XMMRegister(), "");
648 if (!r_2->is_valid()) {
649 __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
650 tag_stack(masm, sig_bt[i], st_off);
651 } else {
652 assert(sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG, "wrong type");
653 move_c2i_double(masm, r_1->as_XMMRegister(), st_off);
654 }
655 }
656 }
657
658 // Schedule the branch target address early.
659 __ movptr(rcx, Address(rbx, in_bytes(methodOopDesc::interpreter_entry_offset())));
660 // And repush original return address
661 __ push(rax);
662 __ jmp(rcx);
663 }
664
665
666 // For tagged stacks, double or long value aren't contiguous on the stack
667 // so get them contiguous for the xmm load
668 static void move_i2c_double(MacroAssembler *masm, XMMRegister r, Register saved_sp, int ld_off) {
669 int next_val_off = ld_off - Interpreter::stackElementSize();
670 if (TaggedStackInterpreter) {
671 // use tag slot temporarily for MSW
672 __ movptr(rsi, Address(saved_sp, ld_off));
673 __ movptr(Address(saved_sp, next_val_off+wordSize), rsi);
674 __ movdbl(r, Address(saved_sp, next_val_off));
675 // restore tag
676 __ movptr(Address(saved_sp, next_val_off+wordSize), frame::TagValue);
677 } else {
678 __ movdbl(r, Address(saved_sp, next_val_off));
679 }
680 }
681
682 static void gen_i2c_adapter(MacroAssembler *masm,
683 int total_args_passed,
684 int comp_args_on_stack,
685 const BasicType *sig_bt,
686 const VMRegPair *regs) {
687 // we're being called from the interpreter but need to find the
688 // compiled return entry point. The return address on the stack
689 // should point at it and we just need to pull the old value out.
690 // load up the pointer to the compiled return entry point and
691 // rewrite our return pc. The code is arranged like so:
692 //
693 // .word Interpreter::return_sentinel
694 // .word address_of_compiled_return_point
695 // return_entry_point: blah_blah_blah
696 //
697 // So we can find the appropriate return point by loading up the word
698 // just prior to the current return address we have on the stack.
699 //
700 // We will only enter here from an interpreted frame and never from after
701 // passing thru a c2i. Azul allowed this but we do not. If we lose the
702 // race and use a c2i we will remain interpreted for the race loser(s).
703 // This removes all sorts of headaches on the x86 side and also eliminates
704 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
705
706
707 // Note: rsi contains the senderSP on entry. We must preserve it since
708 // we may do a i2c -> c2i transition if we lose a race where compiled
709 // code goes non-entrant while we get args ready.
710
711 // Pick up the return address
712 __ movptr(rax, Address(rsp, 0));
713
714 // If UseSSE >= 2 then no cleanup is needed on the return to the
715 // interpreter so skip fixing up the return entry point unless
716 // VerifyFPU is enabled.
717 if (UseSSE < 2 || VerifyFPU) {
718 Label skip, chk_int;
719 // If we were called from the call stub we need to do a little bit different
720 // cleanup than if the interpreter returned to the call stub.
721
722 ExternalAddress stub_return_address(StubRoutines::_call_stub_return_address);
723 __ cmpptr(rax, stub_return_address.addr());
724 __ jcc(Assembler::notEqual, chk_int);
725 assert(StubRoutines::x86::get_call_stub_compiled_return() != NULL, "must be set");
726 __ lea(rax, ExternalAddress(StubRoutines::x86::get_call_stub_compiled_return()));
727 __ jmp(skip);
728
729 // It must be the interpreter since we never get here via a c2i (unlike Azul)
730
731 __ bind(chk_int);
732 #ifdef ASSERT
733 {
734 Label ok;
735 __ cmpl(Address(rax, -2*wordSize), Interpreter::return_sentinel);
736 __ jcc(Assembler::equal, ok);
737 __ int3();
738 __ bind(ok);
739 }
740 #endif // ASSERT
741 __ movptr(rax, Address(rax, -wordSize));
742 __ bind(skip);
743 }
744
745 // rax, now contains the compiled return entry point which will do an
746 // cleanup needed for the return from compiled to interpreted.
747
748 // Must preserve original SP for loading incoming arguments because
749 // we need to align the outgoing SP for compiled code.
750 __ movptr(rdi, rsp);
751
752 // Cut-out for having no stack args. Since up to 2 int/oop args are passed
753 // in registers, we will occasionally have no stack args.
754 int comp_words_on_stack = 0;
755 if (comp_args_on_stack) {
756 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
757 // registers are below. By subtracting stack0, we either get a negative
758 // number (all values in registers) or the maximum stack slot accessed.
759 // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
760 // Convert 4-byte stack slots to words.
761 comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
762 // Round up to miminum stack alignment, in wordSize
763 comp_words_on_stack = round_to(comp_words_on_stack, 2);
764 __ subptr(rsp, comp_words_on_stack * wordSize);
765 }
766
767 // Align the outgoing SP
768 __ andptr(rsp, -(StackAlignmentInBytes));
769
770 // push the return address on the stack (note that pushing, rather
771 // than storing it, yields the correct frame alignment for the callee)
772 __ push(rax);
773
774 // Put saved SP in another register
775 const Register saved_sp = rax;
776 __ movptr(saved_sp, rdi);
777
778
779 // Will jump to the compiled code just as if compiled code was doing it.
780 // Pre-load the register-jump target early, to schedule it better.
781 __ movptr(rdi, Address(rbx, in_bytes(methodOopDesc::from_compiled_offset())));
782
783 // Now generate the shuffle code. Pick up all register args and move the
784 // rest through the floating point stack top.
785 for (int i = 0; i < total_args_passed; i++) {
786 if (sig_bt[i] == T_VOID) {
787 // Longs and doubles are passed in native word order, but misaligned
788 // in the 32-bit build.
789 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
790 continue;
791 }
792
793 // Pick up 0, 1 or 2 words from SP+offset.
794
795 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
796 "scrambled load targets?");
797 // Load in argument order going down.
798 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize() + Interpreter::value_offset_in_bytes();
799 // Point to interpreter value (vs. tag)
800 int next_off = ld_off - Interpreter::stackElementSize();
801 //
802 //
803 //
804 VMReg r_1 = regs[i].first();
805 VMReg r_2 = regs[i].second();
806 if (!r_1->is_valid()) {
807 assert(!r_2->is_valid(), "");
808 continue;
809 }
810 if (r_1->is_stack()) {
811 // Convert stack slot to an SP offset (+ wordSize to account for return address )
812 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
813
814 // We can use rsi as a temp here because compiled code doesn't need rsi as an input
815 // and if we end up going thru a c2i because of a miss a reasonable value of rsi
816 // we be generated.
817 if (!r_2->is_valid()) {
818 // __ fld_s(Address(saved_sp, ld_off));
819 // __ fstp_s(Address(rsp, st_off));
820 __ movl(rsi, Address(saved_sp, ld_off));
821 __ movptr(Address(rsp, st_off), rsi);
822 } else {
823 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
824 // are accessed as negative so LSW is at LOW address
825
826 // ld_off is MSW so get LSW
827 // st_off is LSW (i.e. reg.first())
828 // __ fld_d(Address(saved_sp, next_off));
829 // __ fstp_d(Address(rsp, st_off));
830 //
831 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
832 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
833 // So we must adjust where to pick up the data to match the interpreter.
834 //
835 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
836 // are accessed as negative so LSW is at LOW address
837
838 // ld_off is MSW so get LSW
839 const int offset = (NOT_LP64(true ||) sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
840 next_off : ld_off;
841 __ movptr(rsi, Address(saved_sp, offset));
842 __ movptr(Address(rsp, st_off), rsi);
843 #ifndef _LP64
844 __ movptr(rsi, Address(saved_sp, ld_off));
845 __ movptr(Address(rsp, st_off + wordSize), rsi);
846 #endif // _LP64
847 }
848 } else if (r_1->is_Register()) { // Register argument
849 Register r = r_1->as_Register();
850 assert(r != rax, "must be different");
851 if (r_2->is_valid()) {
852 //
853 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
854 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
855 // So we must adjust where to pick up the data to match the interpreter.
856
857 const int offset = (NOT_LP64(true ||) sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
858 next_off : ld_off;
859
860 // this can be a misaligned move
861 __ movptr(r, Address(saved_sp, offset));
862 #ifndef _LP64
863 assert(r_2->as_Register() != rax, "need another temporary register");
864 // Remember r_1 is low address (and LSB on x86)
865 // So r_2 gets loaded from high address regardless of the platform
866 __ movptr(r_2->as_Register(), Address(saved_sp, ld_off));
867 #endif // _LP64
868 } else {
869 __ movl(r, Address(saved_sp, ld_off));
870 }
871 } else {
872 assert(r_1->is_XMMRegister(), "");
873 if (!r_2->is_valid()) {
874 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
875 } else {
876 move_i2c_double(masm, r_1->as_XMMRegister(), saved_sp, ld_off);
877 }
878 }
879 }
880
881 // 6243940 We might end up in handle_wrong_method if
882 // the callee is deoptimized as we race thru here. If that
883 // happens we don't want to take a safepoint because the
884 // caller frame will look interpreted and arguments are now
885 // "compiled" so it is much better to make this transition
886 // invisible to the stack walking code. Unfortunately if
887 // we try and find the callee by normal means a safepoint
888 // is possible. So we stash the desired callee in the thread
889 // and the vm will find there should this case occur.
890
891 __ get_thread(rax);
892 __ movptr(Address(rax, JavaThread::callee_target_offset()), rbx);
893
894 // move methodOop to rax, in case we end up in an c2i adapter.
895 // the c2i adapters expect methodOop in rax, (c2) because c2's
896 // resolve stubs return the result (the method) in rax,.
897 // I'd love to fix this.
898 __ mov(rax, rbx);
899
900 __ jmp(rdi);
901 }
902
903 // ---------------------------------------------------------------
904 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
905 int total_args_passed,
906 int comp_args_on_stack,
907 const BasicType *sig_bt,
908 const VMRegPair *regs) {
909 address i2c_entry = __ pc();
910
911 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
912
913 // -------------------------------------------------------------------------
914 // Generate a C2I adapter. On entry we know rbx, holds the methodOop during calls
915 // to the interpreter. The args start out packed in the compiled layout. They
916 // need to be unpacked into the interpreter layout. This will almost always
917 // require some stack space. We grow the current (compiled) stack, then repack
918 // the args. We finally end in a jump to the generic interpreter entry point.
919 // On exit from the interpreter, the interpreter will restore our SP (lest the
920 // compiled code, which relys solely on SP and not EBP, get sick).
921
922 address c2i_unverified_entry = __ pc();
923 Label skip_fixup;
924
925 Register holder = rax;
926 Register receiver = rcx;
927 Register temp = rbx;
928
929 {
930
931 Label missed;
932
933 __ verify_oop(holder);
934 __ movptr(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
935 __ verify_oop(temp);
936
937 __ cmpptr(temp, Address(holder, compiledICHolderOopDesc::holder_klass_offset()));
938 __ movptr(rbx, Address(holder, compiledICHolderOopDesc::holder_method_offset()));
939 __ jcc(Assembler::notEqual, missed);
940 // Method might have been compiled since the call site was patched to
941 // interpreted if that is the case treat it as a miss so we can get
942 // the call site corrected.
943 __ cmpptr(Address(rbx, in_bytes(methodOopDesc::code_offset())), (int32_t)NULL_WORD);
944 __ jcc(Assembler::equal, skip_fixup);
945
946 __ bind(missed);
947 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
948 }
949
950 address c2i_entry = __ pc();
951
952 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
953
954 __ flush();
955 return new AdapterHandlerEntry(i2c_entry, c2i_entry, c2i_unverified_entry);
956 }
957
958 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
959 VMRegPair *regs,
960 int total_args_passed) {
961 // We return the amount of VMRegImpl stack slots we need to reserve for all
962 // the arguments NOT counting out_preserve_stack_slots.
963
984 assert(sig_bt[i+1] == T_VOID, "missing Half" );
985 regs[i].set2(VMRegImpl::stack2reg(stack));
986 stack += 2;
987 break;
988 case T_VOID: regs[i].set_bad(); break;
989 default:
990 ShouldNotReachHere();
991 break;
992 }
993 }
994 return stack;
995 }
996
997 // A simple move of integer like type
998 static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
999 if (src.first()->is_stack()) {
1000 if (dst.first()->is_stack()) {
1001 // stack to stack
1002 // __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
1003 // __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
1004 __ movl2ptr(rax, Address(rbp, reg2offset_in(src.first())));
1005 __ movptr(Address(rsp, reg2offset_out(dst.first())), rax);
1006 } else {
1007 // stack to reg
1008 __ movl2ptr(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first())));
1009 }
1010 } else if (dst.first()->is_stack()) {
1011 // reg to stack
1012 // no need to sign extend on 64bit
1013 __ movptr(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
1014 } else {
1015 if (dst.first() != src.first()) {
1016 __ mov(dst.first()->as_Register(), src.first()->as_Register());
1017 }
1018 }
1019 }
1020
1021 // An oop arg. Must pass a handle not the oop itself
1022 static void object_move(MacroAssembler* masm,
1023 OopMap* map,
1024 int oop_handle_offset,
1025 int framesize_in_slots,
1026 VMRegPair src,
1027 VMRegPair dst,
1028 bool is_receiver,
1029 int* receiver_offset) {
1030
1031 // Because of the calling conventions we know that src can be a
1032 // register or a stack location. dst can only be a stack location.
1033
1034 assert(dst.first()->is_stack(), "must be stack");
1035 // must pass a handle. First figure out the location we use as a handle
1036
1037 if (src.first()->is_stack()) {
1038 // Oop is already on the stack as an argument
1039 Register rHandle = rax;
1040 Label nil;
1041 __ xorptr(rHandle, rHandle);
1042 __ cmpptr(Address(rbp, reg2offset_in(src.first())), (int32_t)NULL_WORD);
1043 __ jcc(Assembler::equal, nil);
1044 __ lea(rHandle, Address(rbp, reg2offset_in(src.first())));
1045 __ bind(nil);
1046 __ movptr(Address(rsp, reg2offset_out(dst.first())), rHandle);
1047
1048 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1049 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
1050 if (is_receiver) {
1051 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
1052 }
1053 } else {
1054 // Oop is in an a register we must store it to the space we reserve
1055 // on the stack for oop_handles
1056 const Register rOop = src.first()->as_Register();
1057 const Register rHandle = rax;
1058 int oop_slot = (rOop == rcx ? 0 : 1) * VMRegImpl::slots_per_word + oop_handle_offset;
1059 int offset = oop_slot*VMRegImpl::stack_slot_size;
1060 Label skip;
1061 __ movptr(Address(rsp, offset), rOop);
1062 map->set_oop(VMRegImpl::stack2reg(oop_slot));
1063 __ xorptr(rHandle, rHandle);
1064 __ cmpptr(rOop, (int32_t)NULL_WORD);
1065 __ jcc(Assembler::equal, skip);
1066 __ lea(rHandle, Address(rsp, offset));
1067 __ bind(skip);
1068 // Store the handle parameter
1069 __ movptr(Address(rsp, reg2offset_out(dst.first())), rHandle);
1070 if (is_receiver) {
1071 *receiver_offset = offset;
1072 }
1073 }
1074 }
1075
1076 // A float arg may have to do float reg int reg conversion
1077 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1078 assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
1079
1080 // Because of the calling convention we know that src is either a stack location
1081 // or an xmm register. dst can only be a stack location.
1082
1083 assert(dst.first()->is_stack() && ( src.first()->is_stack() || src.first()->is_XMMRegister()), "bad parameters");
1084
1085 if (src.first()->is_stack()) {
1086 __ movl(rax, Address(rbp, reg2offset_in(src.first())));
1087 __ movptr(Address(rsp, reg2offset_out(dst.first())), rax);
1088 } else {
1089 // reg to stack
1090 __ movflt(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
1091 }
1092 }
1093
1094 // A long move
1095 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1096
1097 // The only legal possibility for a long_move VMRegPair is:
1098 // 1: two stack slots (possibly unaligned)
1099 // as neither the java or C calling convention will use registers
1100 // for longs.
1101
1102 if (src.first()->is_stack() && dst.first()->is_stack()) {
1103 assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
1104 __ movptr(rax, Address(rbp, reg2offset_in(src.first())));
1105 NOT_LP64(__ movptr(rbx, Address(rbp, reg2offset_in(src.second()))));
1106 __ movptr(Address(rsp, reg2offset_out(dst.first())), rax);
1107 NOT_LP64(__ movptr(Address(rsp, reg2offset_out(dst.second())), rbx));
1108 } else {
1109 ShouldNotReachHere();
1110 }
1111 }
1112
1113 // A double move
1114 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1115
1116 // The only legal possibilities for a double_move VMRegPair are:
1117 // The painful thing here is that like long_move a VMRegPair might be
1118
1119 // Because of the calling convention we know that src is either
1120 // 1: a single physical register (xmm registers only)
1121 // 2: two stack slots (possibly unaligned)
1122 // dst can only be a pair of stack slots.
1123
1124 assert(dst.first()->is_stack() && (src.first()->is_XMMRegister() || src.first()->is_stack()), "bad args");
1125
1126 if (src.first()->is_stack()) {
1127 // source is all stack
1128 __ movptr(rax, Address(rbp, reg2offset_in(src.first())));
1129 NOT_LP64(__ movptr(rbx, Address(rbp, reg2offset_in(src.second()))));
1130 __ movptr(Address(rsp, reg2offset_out(dst.first())), rax);
1131 NOT_LP64(__ movptr(Address(rsp, reg2offset_out(dst.second())), rbx));
1132 } else {
1133 // reg to stack
1134 // No worries about stack alignment
1135 __ movdbl(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
1136 }
1137 }
1138
1139
1140 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1141 // We always ignore the frame_slots arg and just use the space just below frame pointer
1142 // which by this time is free to use
1143 switch (ret_type) {
1144 case T_FLOAT:
1145 __ fstp_s(Address(rbp, -wordSize));
1146 break;
1147 case T_DOUBLE:
1148 __ fstp_d(Address(rbp, -2*wordSize));
1149 break;
1150 case T_VOID: break;
1151 case T_LONG:
1152 __ movptr(Address(rbp, -wordSize), rax);
1153 NOT_LP64(__ movptr(Address(rbp, -2*wordSize), rdx));
1154 break;
1155 default: {
1156 __ movptr(Address(rbp, -wordSize), rax);
1157 }
1158 }
1159 }
1160
1161 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1162 // We always ignore the frame_slots arg and just use the space just below frame pointer
1163 // which by this time is free to use
1164 switch (ret_type) {
1165 case T_FLOAT:
1166 __ fld_s(Address(rbp, -wordSize));
1167 break;
1168 case T_DOUBLE:
1169 __ fld_d(Address(rbp, -2*wordSize));
1170 break;
1171 case T_LONG:
1172 __ movptr(rax, Address(rbp, -wordSize));
1173 NOT_LP64(__ movptr(rdx, Address(rbp, -2*wordSize)));
1174 break;
1175 case T_VOID: break;
1176 default: {
1177 __ movptr(rax, Address(rbp, -wordSize));
1178 }
1179 }
1180 }
1181
1182 // ---------------------------------------------------------------------------
1183 // Generate a native wrapper for a given method. The method takes arguments
1184 // in the Java compiled code convention, marshals them to the native
1185 // convention (handlizes oops, etc), transitions to native, makes the call,
1186 // returns to java state (possibly blocking), unhandlizes any result and
1187 // returns.
1188 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1189 methodHandle method,
1190 int total_in_args,
1191 int comp_args_on_stack,
1192 BasicType *in_sig_bt,
1193 VMRegPair *in_regs,
1194 BasicType ret_type) {
1195
1196 // An OopMap for lock (and class if static)
1197 OopMapSet *oop_maps = new OopMapSet();
1302 stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word);
1303
1304 int stack_size = stack_slots * VMRegImpl::stack_slot_size;
1305
1306 intptr_t start = (intptr_t)__ pc();
1307
1308 // First thing make an ic check to see if we should even be here
1309
1310 // We are free to use all registers as temps without saving them and
1311 // restoring them except rbp,. rbp, is the only callee save register
1312 // as far as the interpreter and the compiler(s) are concerned.
1313
1314
1315 const Register ic_reg = rax;
1316 const Register receiver = rcx;
1317 Label hit;
1318 Label exception_pending;
1319
1320
1321 __ verify_oop(receiver);
1322 __ cmpptr(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes()));
1323 __ jcc(Assembler::equal, hit);
1324
1325 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1326
1327 // verified entry must be aligned for code patching.
1328 // and the first 5 bytes must be in the same cache line
1329 // if we align at 8 then we will be sure 5 bytes are in the same line
1330 __ align(8);
1331
1332 __ bind(hit);
1333
1334 int vep_offset = ((intptr_t)__ pc()) - start;
1335
1336 #ifdef COMPILER1
1337 if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
1338 // Object.hashCode can pull the hashCode from the header word
1339 // instead of doing a full VM transition once it's been computed.
1340 // Since hashCode is usually polymorphic at call sites we can't do
1341 // this optimization at the call site without a lot of work.
1342 Label slowCase;
1343 Register receiver = rcx;
1344 Register result = rax;
1345 __ movptr(result, Address(receiver, oopDesc::mark_offset_in_bytes()));
1346
1347 // check if locked
1348 __ testptr(result, markOopDesc::unlocked_value);
1349 __ jcc (Assembler::zero, slowCase);
1350
1351 if (UseBiasedLocking) {
1352 // Check if biased and fall through to runtime if so
1353 __ testptr(result, markOopDesc::biased_lock_bit_in_place);
1354 __ jcc (Assembler::notZero, slowCase);
1355 }
1356
1357 // get hash
1358 __ andptr(result, markOopDesc::hash_mask_in_place);
1359 // test if hashCode exists
1360 __ jcc (Assembler::zero, slowCase);
1361 __ shrptr(result, markOopDesc::hash_shift);
1362 __ ret(0);
1363 __ bind (slowCase);
1364 }
1365 #endif // COMPILER1
1366
1367 // The instruction at the verified entry point must be 5 bytes or longer
1368 // because it can be patched on the fly by make_non_entrant. The stack bang
1369 // instruction fits that requirement.
1370
1371 // Generate stack overflow check
1372
1373 if (UseStackBanging) {
1374 __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
1375 } else {
1376 // need a 5 byte instruction to allow MT safe patching to non-entrant
1377 __ fat_nop();
1378 }
1379
1380 // Generate a new frame for the wrapper.
1381 __ enter();
1382 // -2 because return address is already present and so is saved rbp,
1383 __ subptr(rsp, stack_size - 2*wordSize);
1384
1385 // Frame is now completed as far a size and linkage.
1386
1387 int frame_complete = ((intptr_t)__ pc()) - start;
1388
1389 // Calculate the difference between rsp and rbp,. We need to know it
1390 // after the native call because on windows Java Natives will pop
1391 // the arguments and it is painful to do rsp relative addressing
1392 // in a platform independent way. So after the call we switch to
1393 // rbp, relative addressing.
1394
1395 int fp_adjustment = stack_size - 2*wordSize;
1396
1397 #ifdef COMPILER2
1398 // C2 may leave the stack dirty if not in SSE2+ mode
1399 if (UseSSE >= 2) {
1400 __ verify_FPU(0, "c2i transition should have clean FPU stack");
1401 } else {
1402 __ empty_FPU_stack();
1403 }
1484
1485 case T_LONG :
1486 long_move(masm, in_regs[i], out_regs[c_arg]);
1487 break;
1488
1489 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
1490
1491 default:
1492 simple_move32(masm, in_regs[i], out_regs[c_arg]);
1493 }
1494 }
1495
1496 // Pre-load a static method's oop into rsi. Used both by locking code and
1497 // the normal JNI call code.
1498 if (method->is_static()) {
1499
1500 // load opp into a register
1501 __ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
1502
1503 // Now handlize the static class mirror it's known not-null.
1504 __ movptr(Address(rsp, klass_offset), oop_handle_reg);
1505 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1506
1507 // Now get the handle
1508 __ lea(oop_handle_reg, Address(rsp, klass_offset));
1509 // store the klass handle as second argument
1510 __ movptr(Address(rsp, wordSize), oop_handle_reg);
1511 }
1512
1513 // Change state to native (we save the return address in the thread, since it might not
1514 // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
1515 // points into the right code segment. It does not have to be the correct return pc.
1516 // We use the same pc/oopMap repeatedly when we call out
1517
1518 intptr_t the_pc = (intptr_t) __ pc();
1519 oop_maps->add_gc_map(the_pc - start, map);
1520
1521 __ set_last_Java_frame(thread, rsp, noreg, (address)the_pc);
1522
1523
1524 // We have all of the arguments setup at this point. We must not touch any register
1525 // argument registers at this point (what if we save/restore them there are no oop?
1526
1527 {
1528 SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
1529 __ movoop(rax, JNIHandles::make_local(method()));
1530 __ call_VM_leaf(
1531 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
1532 thread, rax);
1533 }
1534
1535 // RedefineClasses() tracing support for obsolete method entry
1536 if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
1537 __ movoop(rax, JNIHandles::make_local(method()));
1538 __ call_VM_leaf(
1539 CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
1540 thread, rax);
1541 }
1542
1543
1544 // These are register definitions we need for locking/unlocking
1545 const Register swap_reg = rax; // Must use rax, for cmpxchg instruction
1546 const Register obj_reg = rcx; // Will contain the oop
1547 const Register lock_reg = rdx; // Address of compiler lock object (BasicLock)
1548
1549 Label slow_path_lock;
1550 Label lock_done;
1551
1552 // Lock a synchronized method
1553 if (method->is_synchronized()) {
1554
1555
1556 const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
1557
1558 // Get the handle (the 2nd argument)
1559 __ movptr(oop_handle_reg, Address(rsp, wordSize));
1560
1561 // Get address of the box
1562
1563 __ lea(lock_reg, Address(rbp, lock_slot_rbp_offset));
1564
1565 // Load the oop from the handle
1566 __ movptr(obj_reg, Address(oop_handle_reg, 0));
1567
1568 if (UseBiasedLocking) {
1569 // Note that oop_handle_reg is trashed during this call
1570 __ biased_locking_enter(lock_reg, obj_reg, swap_reg, oop_handle_reg, false, lock_done, &slow_path_lock);
1571 }
1572
1573 // Load immediate 1 into swap_reg %rax,
1574 __ movptr(swap_reg, 1);
1575
1576 // Load (object->mark() | 1) into swap_reg %rax,
1577 __ orptr(swap_reg, Address(obj_reg, 0));
1578
1579 // Save (object->mark() | 1) into BasicLock's displaced header
1580 __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
1581
1582 if (os::is_MP()) {
1583 __ lock();
1584 }
1585
1586 // src -> dest iff dest == rax, else rax, <- dest
1587 // *obj_reg = lock_reg iff *obj_reg == rax, else rax, = *(obj_reg)
1588 __ cmpxchgptr(lock_reg, Address(obj_reg, 0));
1589 __ jcc(Assembler::equal, lock_done);
1590
1591 // Test if the oopMark is an obvious stack pointer, i.e.,
1592 // 1) (mark & 3) == 0, and
1593 // 2) rsp <= mark < mark + os::pagesize()
1594 // These 3 tests can be done by evaluating the following
1595 // expression: ((mark - rsp) & (3 - os::vm_page_size())),
1596 // assuming both stack pointer and pagesize have their
1597 // least significant 2 bits clear.
1598 // NOTE: the oopMark is in swap_reg %rax, as the result of cmpxchg
1599
1600 __ subptr(swap_reg, rsp);
1601 __ andptr(swap_reg, 3 - os::vm_page_size());
1602
1603 // Save the test result, for recursive case, the result is zero
1604 __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
1605 __ jcc(Assembler::notEqual, slow_path_lock);
1606 // Slow path will re-enter here
1607 __ bind(lock_done);
1608
1609 if (UseBiasedLocking) {
1610 // Re-fetch oop_handle_reg as we trashed it above
1611 __ movptr(oop_handle_reg, Address(rsp, wordSize));
1612 }
1613 }
1614
1615
1616 // Finally just about ready to make the JNI call
1617
1618
1619 // get JNIEnv* which is first argument to native
1620
1621 __ lea(rdx, Address(thread, in_bytes(JavaThread::jni_environment_offset())));
1622 __ movptr(Address(rsp, 0), rdx);
1623
1624 // Now set thread in native
1625 __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native);
1626
1627 __ call(RuntimeAddress(method->native_function()));
1628
1629 // WARNING - on Windows Java Natives use pascal calling convention and pop the
1630 // arguments off of the stack. We could just re-adjust the stack pointer here
1631 // and continue to do SP relative addressing but we instead switch to FP
1632 // relative addressing.
1633
1634 // Unpack native results.
1635 switch (ret_type) {
1636 case T_BOOLEAN: __ c2bool(rax); break;
1637 case T_CHAR : __ andptr(rax, 0xFFFF); break;
1638 case T_BYTE : __ sign_extend_byte (rax); break;
1639 case T_SHORT : __ sign_extend_short(rax); break;
1640 case T_INT : /* nothing to do */ break;
1641 case T_DOUBLE :
1642 case T_FLOAT :
1643 // Result is in st0 we'll save as needed
1644 break;
1645 case T_ARRAY: // Really a handle
1646 case T_OBJECT: // Really a handle
1647 break; // can't de-handlize until after safepoint check
1648 case T_VOID: break;
1649 case T_LONG: break;
1650 default : ShouldNotReachHere();
1651 }
1652
1653 // Switch thread to "native transition" state before reading the synchronization state.
1654 // This additional state is necessary because reading and testing the synchronization
1655 // state is not atomic w.r.t. GC, as this scenario demonstrates:
1656 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1657 // VM thread changes sync state to synchronizing and suspends threads for GC.
1658 // Thread A is resumed to finish this native method, but doesn't block here since it
1659 // didn't see any synchronization is progress, and escapes.
1660 __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
1661
1662 if(os::is_MP()) {
1663 if (UseMembar) {
1664 // Force this write out before the read below
1665 __ membar(Assembler::Membar_mask_bits(
1666 Assembler::LoadLoad | Assembler::LoadStore |
1667 Assembler::StoreLoad | Assembler::StoreStore));
1668 } else {
1669 // Write serialization page so VM thread can do a pseudo remote membar.
1670 // We use the current thread pointer to calculate a thread specific
1671 // offset to write to within the page. This minimizes bus traffic
1672 // due to cache line collision.
1673 __ serialize_memory(thread, rcx);
1674 }
1675 }
1676
1677 if (AlwaysRestoreFPU) {
1678 // Make sure the control word is correct.
1679 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1680 }
1681
1682 // check for safepoint operation in progress and/or pending suspend requests
1683 { Label Continue;
1684
1685 __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
1686 SafepointSynchronize::_not_synchronized);
1687
1688 Label L;
1689 __ jcc(Assembler::notEqual, L);
1690 __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0);
1691 __ jcc(Assembler::equal, Continue);
1692 __ bind(L);
1693
1694 // Don't use call_VM as it will see a possible pending exception and forward it
1695 // and never return here preventing us from clearing _last_native_pc down below.
1696 // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
1697 // preserved and correspond to the bcp/locals pointers. So we do a runtime call
1698 // by hand.
1699 //
1700 save_native_result(masm, ret_type, stack_slots);
1701 __ push(thread);
1702 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
1703 JavaThread::check_special_condition_for_native_trans)));
1704 __ increment(rsp, wordSize);
1705 // Restore any method result value
1706 restore_native_result(masm, ret_type, stack_slots);
1707
1708 __ bind(Continue);
1709 }
1710
1711 // change thread state
1712 __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_Java);
1713
1714 Label reguard;
1715 Label reguard_done;
1716 __ cmpl(Address(thread, JavaThread::stack_guard_state_offset()), JavaThread::stack_guard_yellow_disabled);
1717 __ jcc(Assembler::equal, reguard);
1718
1719 // slow path reguard re-enters here
1720 __ bind(reguard_done);
1721
1722 // Handle possible exception (will unlock if necessary)
1723
1724 // native result if any is live
1725
1726 // Unlock
1727 Label slow_path_unlock;
1728 Label unlock_done;
1729 if (method->is_synchronized()) {
1730
1731 Label done;
1732
1733 // Get locked oop from the handle we passed to jni
1734 __ movptr(obj_reg, Address(oop_handle_reg, 0));
1735
1736 if (UseBiasedLocking) {
1737 __ biased_locking_exit(obj_reg, rbx, done);
1738 }
1739
1740 // Simple recursive lock?
1741
1742 __ cmpptr(Address(rbp, lock_slot_rbp_offset), (int32_t)NULL_WORD);
1743 __ jcc(Assembler::equal, done);
1744
1745 // Must save rax, if if it is live now because cmpxchg must use it
1746 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
1747 save_native_result(masm, ret_type, stack_slots);
1748 }
1749
1750 // get old displaced header
1751 __ movptr(rbx, Address(rbp, lock_slot_rbp_offset));
1752
1753 // get address of the stack lock
1754 __ lea(rax, Address(rbp, lock_slot_rbp_offset));
1755
1756 // Atomic swap old header if oop still contains the stack lock
1757 if (os::is_MP()) {
1758 __ lock();
1759 }
1760
1761 // src -> dest iff dest == rax, else rax, <- dest
1762 // *obj_reg = rbx, iff *obj_reg == rax, else rax, = *(obj_reg)
1763 __ cmpxchgptr(rbx, Address(obj_reg, 0));
1764 __ jcc(Assembler::notEqual, slow_path_unlock);
1765
1766 // slow path re-enters here
1767 __ bind(unlock_done);
1768 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
1769 restore_native_result(masm, ret_type, stack_slots);
1770 }
1771
1772 __ bind(done);
1773
1774 }
1775
1776 {
1777 SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
1778 // Tell dtrace about this method exit
1779 save_native_result(masm, ret_type, stack_slots);
1780 __ movoop(rax, JNIHandles::make_local(method()));
1781 __ call_VM_leaf(
1782 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
1783 thread, rax);
1784 restore_native_result(masm, ret_type, stack_slots);
1785 }
1786
1787 // We can finally stop using that last_Java_frame we setup ages ago
1788
1789 __ reset_last_Java_frame(thread, false, true);
1790
1791 // Unpack oop result
1792 if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
1793 Label L;
1794 __ cmpptr(rax, (int32_t)NULL_WORD);
1795 __ jcc(Assembler::equal, L);
1796 __ movptr(rax, Address(rax, 0));
1797 __ bind(L);
1798 __ verify_oop(rax);
1799 }
1800
1801 // reset handle block
1802 __ movptr(rcx, Address(thread, JavaThread::active_handles_offset()));
1803
1804 __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
1805
1806 // Any exception pending?
1807 __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
1808 __ jcc(Assembler::notEqual, exception_pending);
1809
1810
1811 // no exception, we're almost done
1812
1813 // check that only result value is on FPU stack
1814 __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
1815
1816 // Fixup floating pointer results so that result looks like a return from a compiled method
1817 if (ret_type == T_FLOAT) {
1818 if (UseSSE >= 1) {
1819 // Pop st0 and store as float and reload into xmm register
1820 __ fstp_s(Address(rbp, -4));
1821 __ movflt(xmm0, Address(rbp, -4));
1822 }
1823 } else if (ret_type == T_DOUBLE) {
1824 if (UseSSE >= 2) {
1825 // Pop st0 and store as double and reload into xmm register
1826 __ fstp_d(Address(rbp, -8));
1827 __ movdbl(xmm0, Address(rbp, -8));
1828 }
1829 }
1830
1831 // Return
1832
1833 __ leave();
1834 __ ret(0);
1835
1836 // Unexpected paths are out of line and go here
1837
1838 // Slow path locking & unlocking
1839 if (method->is_synchronized()) {
1840
1841 // BEGIN Slow path lock
1842
1843 __ bind(slow_path_lock);
1844
1845 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
1846 // args are (oop obj, BasicLock* lock, JavaThread* thread)
1847 __ push(thread);
1848 __ push(lock_reg);
1849 __ push(obj_reg);
1850 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C)));
1851 __ addptr(rsp, 3*wordSize);
1852
1853 #ifdef ASSERT
1854 { Label L;
1855 __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD);
1856 __ jcc(Assembler::equal, L);
1857 __ stop("no pending exception allowed on exit from monitorenter");
1858 __ bind(L);
1859 }
1860 #endif
1861 __ jmp(lock_done);
1862
1863 // END Slow path lock
1864
1865 // BEGIN Slow path unlock
1866 __ bind(slow_path_unlock);
1867
1868 // Slow path unlock
1869
1870 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
1871 save_native_result(masm, ret_type, stack_slots);
1872 }
1873 // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
1874
1875 __ pushptr(Address(thread, in_bytes(Thread::pending_exception_offset())));
1876 __ movptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
1877
1878
1879 // should be a peal
1880 // +wordSize because of the push above
1881 __ lea(rax, Address(rbp, lock_slot_rbp_offset));
1882 __ push(rax);
1883
1884 __ push(obj_reg);
1885 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
1886 __ addptr(rsp, 2*wordSize);
1887 #ifdef ASSERT
1888 {
1889 Label L;
1890 __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
1891 __ jcc(Assembler::equal, L);
1892 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
1893 __ bind(L);
1894 }
1895 #endif /* ASSERT */
1896
1897 __ popptr(Address(thread, in_bytes(Thread::pending_exception_offset())));
1898
1899 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
1900 restore_native_result(masm, ret_type, stack_slots);
1901 }
1902 __ jmp(unlock_done);
1903 // END Slow path unlock
1904
1905 }
1906
1907 // SLOW PATH Reguard the stack if needed
1908
1909 __ bind(reguard);
1910 save_native_result(masm, ret_type, stack_slots);
1911 {
1912 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)));
1913 }
1914 restore_native_result(masm, ret_type, stack_slots);
1915 __ jmp(reguard_done);
1916
1917
1925
1926 // pop our frame
1927 __ leave();
1928 // and forward the exception
1929 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
1930
1931 __ flush();
1932
1933 nmethod *nm = nmethod::new_native_nmethod(method,
1934 masm->code(),
1935 vep_offset,
1936 frame_complete,
1937 stack_slots / VMRegImpl::slots_per_word,
1938 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
1939 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
1940 oop_maps);
1941 return nm;
1942
1943 }
1944
1945 #ifdef HAVE_DTRACE_H
1946 // ---------------------------------------------------------------------------
1947 // Generate a dtrace nmethod for a given signature. The method takes arguments
1948 // in the Java compiled code convention, marshals them to the native
1949 // abi and then leaves nops at the position you would expect to call a native
1950 // function. When the probe is enabled the nops are replaced with a trap
1951 // instruction that dtrace inserts and the trace will cause a notification
1952 // to dtrace.
1953 //
1954 // The probes are only able to take primitive types and java/lang/String as
1955 // arguments. No other java types are allowed. Strings are converted to utf8
1956 // strings so that from dtrace point of view java strings are converted to C
1957 // strings. There is an arbitrary fixed limit on the total space that a method
1958 // can use for converting the strings. (256 chars per string in the signature).
1959 // So any java string larger then this is truncated.
1960
1961 nmethod *SharedRuntime::generate_dtrace_nmethod(
1962 MacroAssembler *masm, methodHandle method) {
1963
1964 // generate_dtrace_nmethod is guarded by a mutex so we are sure to
1965 // be single threaded in this method.
1966 assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
1967
1968 // Fill in the signature array, for the calling-convention call.
1969 int total_args_passed = method->size_of_parameters();
1970
1971 BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
1972 VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
1973
1974 // The signature we are going to use for the trap that dtrace will see
1975 // java/lang/String is converted. We drop "this" and any other object
1976 // is converted to NULL. (A one-slot java/lang/Long object reference
1977 // is converted to a two-slot long, which is why we double the allocation).
1978 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
1979 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
1980
1981 int i=0;
1982 int total_strings = 0;
1983 int first_arg_to_pass = 0;
1984 int total_c_args = 0;
1985
1986 if( !method->is_static() ) { // Pass in receiver first
1987 in_sig_bt[i++] = T_OBJECT;
1988 first_arg_to_pass = 1;
1989 }
1990
1991 // We need to convert the java args to where a native (non-jni) function
1992 // would expect them. To figure out where they go we convert the java
1993 // signature to a C signature.
1994
1995 SignatureStream ss(method->signature());
1996 for ( ; !ss.at_return_type(); ss.next()) {
1997 BasicType bt = ss.type();
1998 in_sig_bt[i++] = bt; // Collect remaining bits of signature
1999 out_sig_bt[total_c_args++] = bt;
2000 if( bt == T_OBJECT) {
2001 symbolOop s = ss.as_symbol_or_null();
2002 if (s == vmSymbols::java_lang_String()) {
2003 total_strings++;
2004 out_sig_bt[total_c_args-1] = T_ADDRESS;
2005 } else if (s == vmSymbols::java_lang_Boolean() ||
2006 s == vmSymbols::java_lang_Character() ||
2007 s == vmSymbols::java_lang_Byte() ||
2008 s == vmSymbols::java_lang_Short() ||
2009 s == vmSymbols::java_lang_Integer() ||
2010 s == vmSymbols::java_lang_Float()) {
2011 out_sig_bt[total_c_args-1] = T_INT;
2012 } else if (s == vmSymbols::java_lang_Long() ||
2013 s == vmSymbols::java_lang_Double()) {
2014 out_sig_bt[total_c_args-1] = T_LONG;
2015 out_sig_bt[total_c_args++] = T_VOID;
2016 }
2017 } else if ( bt == T_LONG || bt == T_DOUBLE ) {
2018 in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots
2019 out_sig_bt[total_c_args++] = T_VOID;
2020 }
2021 }
2022
2023 assert(i==total_args_passed, "validly parsed signature");
2024
2025 // Now get the compiled-Java layout as input arguments
2026 int comp_args_on_stack;
2027 comp_args_on_stack = SharedRuntime::java_calling_convention(
2028 in_sig_bt, in_regs, total_args_passed, false);
2029
2030 // Now figure out where the args must be stored and how much stack space
2031 // they require (neglecting out_preserve_stack_slots).
2032
2033 int out_arg_slots;
2034 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
2035
2036 // Calculate the total number of stack slots we will need.
2037
2038 // First count the abi requirement plus all of the outgoing args
2039 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
2040
2041 // Now space for the string(s) we must convert
2042
2043 int* string_locs = NEW_RESOURCE_ARRAY(int, total_strings + 1);
2044 for (i = 0; i < total_strings ; i++) {
2045 string_locs[i] = stack_slots;
2046 stack_slots += max_dtrace_string_size / VMRegImpl::stack_slot_size;
2047 }
2048
2049 // + 2 for return address (which we own) and saved rbp,
2050
2051 stack_slots += 2;
2052
2053 // Ok The space we have allocated will look like:
2054 //
2055 //
2056 // FP-> | |
2057 // |---------------------|
2058 // | string[n] |
2059 // |---------------------| <- string_locs[n]
2060 // | string[n-1] |
2061 // |---------------------| <- string_locs[n-1]
2062 // | ... |
2063 // | ... |
2064 // |---------------------| <- string_locs[1]
2065 // | string[0] |
2066 // |---------------------| <- string_locs[0]
2067 // | outbound memory |
2068 // | based arguments |
2069 // | |
2070 // |---------------------|
2071 // | |
2072 // SP-> | out_preserved_slots |
2073 //
2074 //
2075
2076 // Now compute actual number of stack words we need rounding to make
2077 // stack properly aligned.
2078 stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word);
2079
2080 int stack_size = stack_slots * VMRegImpl::stack_slot_size;
2081
2082 intptr_t start = (intptr_t)__ pc();
2083
2084 // First thing make an ic check to see if we should even be here
2085
2086 // We are free to use all registers as temps without saving them and
2087 // restoring them except rbp. rbp, is the only callee save register
2088 // as far as the interpreter and the compiler(s) are concerned.
2089
2090 const Register ic_reg = rax;
2091 const Register receiver = rcx;
2092 Label hit;
2093 Label exception_pending;
2094
2095
2096 __ verify_oop(receiver);
2097 __ cmpl(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes()));
2098 __ jcc(Assembler::equal, hit);
2099
2100 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2101
2102 // verified entry must be aligned for code patching.
2103 // and the first 5 bytes must be in the same cache line
2104 // if we align at 8 then we will be sure 5 bytes are in the same line
2105 __ align(8);
2106
2107 __ bind(hit);
2108
2109 int vep_offset = ((intptr_t)__ pc()) - start;
2110
2111
2112 // The instruction at the verified entry point must be 5 bytes or longer
2113 // because it can be patched on the fly by make_non_entrant. The stack bang
2114 // instruction fits that requirement.
2115
2116 // Generate stack overflow check
2117
2118
2119 if (UseStackBanging) {
2120 if (stack_size <= StackShadowPages*os::vm_page_size()) {
2121 __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
2122 } else {
2123 __ movl(rax, stack_size);
2124 __ bang_stack_size(rax, rbx);
2125 }
2126 } else {
2127 // need a 5 byte instruction to allow MT safe patching to non-entrant
2128 __ fat_nop();
2129 }
2130
2131 assert(((int)__ pc() - start - vep_offset) >= 5,
2132 "valid size for make_non_entrant");
2133
2134 // Generate a new frame for the wrapper.
2135 __ enter();
2136
2137 // -2 because return address is already present and so is saved rbp,
2138 if (stack_size - 2*wordSize != 0) {
2139 __ subl(rsp, stack_size - 2*wordSize);
2140 }
2141
2142 // Frame is now completed as far a size and linkage.
2143
2144 int frame_complete = ((intptr_t)__ pc()) - start;
2145
2146 // First thing we do store all the args as if we are doing the call.
2147 // Since the C calling convention is stack based that ensures that
2148 // all the Java register args are stored before we need to convert any
2149 // string we might have.
2150
2151 int sid = 0;
2152 int c_arg, j_arg;
2153 int string_reg = 0;
2154
2155 for (j_arg = first_arg_to_pass, c_arg = 0 ;
2156 j_arg < total_args_passed ; j_arg++, c_arg++ ) {
2157
2158 VMRegPair src = in_regs[j_arg];
2159 VMRegPair dst = out_regs[c_arg];
2160 assert(dst.first()->is_stack() || in_sig_bt[j_arg] == T_VOID,
2161 "stack based abi assumed");
2162
2163 switch (in_sig_bt[j_arg]) {
2164
2165 case T_ARRAY:
2166 case T_OBJECT:
2167 if (out_sig_bt[c_arg] == T_ADDRESS) {
2168 // Any register based arg for a java string after the first
2169 // will be destroyed by the call to get_utf so we store
2170 // the original value in the location the utf string address
2171 // will eventually be stored.
2172 if (src.first()->is_reg()) {
2173 if (string_reg++ != 0) {
2174 simple_move32(masm, src, dst);
2175 }
2176 }
2177 } else if (out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
2178 // need to unbox a one-word value
2179 Register in_reg = rax;
2180 if ( src.first()->is_reg() ) {
2181 in_reg = src.first()->as_Register();
2182 } else {
2183 simple_move32(masm, src, in_reg->as_VMReg());
2184 }
2185 Label skipUnbox;
2186 __ movl(Address(rsp, reg2offset_out(dst.first())), NULL_WORD);
2187 if ( out_sig_bt[c_arg] == T_LONG ) {
2188 __ movl(Address(rsp, reg2offset_out(dst.second())), NULL_WORD);
2189 }
2190 __ testl(in_reg, in_reg);
2191 __ jcc(Assembler::zero, skipUnbox);
2192 assert(dst.first()->is_stack() &&
2193 (!dst.second()->is_valid() || dst.second()->is_stack()),
2194 "value(s) must go into stack slots");
2195
2196 BasicType bt = out_sig_bt[c_arg];
2197 int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
2198 if ( bt == T_LONG ) {
2199 __ movl(rbx, Address(in_reg,
2200 box_offset + VMRegImpl::stack_slot_size));
2201 __ movl(Address(rsp, reg2offset_out(dst.second())), rbx);
2202 }
2203 __ movl(in_reg, Address(in_reg, box_offset));
2204 __ movl(Address(rsp, reg2offset_out(dst.first())), in_reg);
2205 __ bind(skipUnbox);
2206 } else {
2207 // Convert the arg to NULL
2208 __ movl(Address(rsp, reg2offset_out(dst.first())), NULL_WORD);
2209 }
2210 if (out_sig_bt[c_arg] == T_LONG) {
2211 assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
2212 ++c_arg; // Move over the T_VOID To keep the loop indices in sync
2213 }
2214 break;
2215
2216 case T_VOID:
2217 break;
2218
2219 case T_FLOAT:
2220 float_move(masm, src, dst);
2221 break;
2222
2223 case T_DOUBLE:
2224 assert( j_arg + 1 < total_args_passed &&
2225 in_sig_bt[j_arg + 1] == T_VOID, "bad arg list");
2226 double_move(masm, src, dst);
2227 break;
2228
2229 case T_LONG :
2230 long_move(masm, src, dst);
2231 break;
2232
2233 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
2234
2235 default:
2236 simple_move32(masm, src, dst);
2237 }
2238 }
2239
2240 // Now we must convert any string we have to utf8
2241 //
2242
2243 for (sid = 0, j_arg = first_arg_to_pass, c_arg = 0 ;
2244 sid < total_strings ; j_arg++, c_arg++ ) {
2245
2246 if (out_sig_bt[c_arg] == T_ADDRESS) {
2247
2248 Address utf8_addr = Address(
2249 rsp, string_locs[sid++] * VMRegImpl::stack_slot_size);
2250 __ leal(rax, utf8_addr);
2251
2252 // The first string we find might still be in the original java arg
2253 // register
2254 VMReg orig_loc = in_regs[j_arg].first();
2255 Register string_oop;
2256
2257 // This is where the argument will eventually reside
2258 Address dest = Address(rsp, reg2offset_out(out_regs[c_arg].first()));
2259
2260 if (sid == 1 && orig_loc->is_reg()) {
2261 string_oop = orig_loc->as_Register();
2262 assert(string_oop != rax, "smashed arg");
2263 } else {
2264
2265 if (orig_loc->is_reg()) {
2266 // Get the copy of the jls object
2267 __ movl(rcx, dest);
2268 } else {
2269 // arg is still in the original location
2270 __ movl(rcx, Address(rbp, reg2offset_in(orig_loc)));
2271 }
2272 string_oop = rcx;
2273
2274 }
2275 Label nullString;
2276 __ movl(dest, NULL_WORD);
2277 __ testl(string_oop, string_oop);
2278 __ jcc(Assembler::zero, nullString);
2279
2280 // Now we can store the address of the utf string as the argument
2281 __ movl(dest, rax);
2282
2283 // And do the conversion
2284 __ call_VM_leaf(CAST_FROM_FN_PTR(
2285 address, SharedRuntime::get_utf), string_oop, rax);
2286 __ bind(nullString);
2287 }
2288
2289 if (in_sig_bt[j_arg] == T_OBJECT && out_sig_bt[c_arg] == T_LONG) {
2290 assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
2291 ++c_arg; // Move over the T_VOID To keep the loop indices in sync
2292 }
2293 }
2294
2295
2296 // Ok now we are done. Need to place the nop that dtrace wants in order to
2297 // patch in the trap
2298
2299 int patch_offset = ((intptr_t)__ pc()) - start;
2300
2301 __ nop();
2302
2303
2304 // Return
2305
2306 __ leave();
2307 __ ret(0);
2308
2309 __ flush();
2310
2311 nmethod *nm = nmethod::new_dtrace_nmethod(
2312 method, masm->code(), vep_offset, patch_offset, frame_complete,
2313 stack_slots / VMRegImpl::slots_per_word);
2314 return nm;
2315
2316 }
2317
2318 #endif // HAVE_DTRACE_H
2319
2320 // this function returns the adjust size (in number of words) to a c2i adapter
2321 // activation for use during deoptimization
2322 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) {
2323 return (callee_locals - callee_parameters) * Interpreter::stackElementWords();
2324 }
2325
2326
2327 uint SharedRuntime::out_preserve_stack_slots() {
2328 return 0;
2329 }
2330
2331
2332 //------------------------------generate_deopt_blob----------------------------
2333 void SharedRuntime::generate_deopt_blob() {
2334 // allocate space for the code
2335 ResourceMark rm;
2336 // setup code generation tools
2337 CodeBuffer buffer("deopt_blob", 1024, 1024);
2338 MacroAssembler* masm = new MacroAssembler(&buffer);
2339 int frame_size_in_words;
2365 // At this point we need to de-opt. We save the argument return
2366 // registers. We call the first C routine, fetch_unroll_info(). This
2367 // routine captures the return values and returns a structure which
2368 // describes the current frame size and the sizes of all replacement frames.
2369 // The current frame is compiled code and may contain many inlined
2370 // functions, each with their own JVM state. We pop the current frame, then
2371 // push all the new frames. Then we call the C routine unpack_frames() to
2372 // populate these frames. Finally unpack_frames() returns us the new target
2373 // address. Notice that callee-save registers are BLOWN here; they have
2374 // already been captured in the vframeArray at the time the return PC was
2375 // patched.
2376 address start = __ pc();
2377 Label cont;
2378
2379 // Prolog for non exception case!
2380
2381 // Save everything in sight.
2382
2383 map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
2384 // Normal deoptimization
2385 __ push(Deoptimization::Unpack_deopt);
2386 __ jmp(cont);
2387
2388 int reexecute_offset = __ pc() - start;
2389
2390 // Reexecute case
2391 // return address is the pc describes what bci to do re-execute at
2392
2393 // No need to update map as each call to save_live_registers will produce identical oopmap
2394 (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
2395
2396 __ push(Deoptimization::Unpack_reexecute);
2397 __ jmp(cont);
2398
2399 int exception_offset = __ pc() - start;
2400
2401 // Prolog for exception case
2402
2403 // all registers are dead at this entry point, except for rax, and
2404 // rdx which contain the exception oop and exception pc
2405 // respectively. Set them in TLS and fall thru to the
2406 // unpack_with_exception_in_tls entry point.
2407
2408 __ get_thread(rdi);
2409 __ movptr(Address(rdi, JavaThread::exception_pc_offset()), rdx);
2410 __ movptr(Address(rdi, JavaThread::exception_oop_offset()), rax);
2411
2412 int exception_in_tls_offset = __ pc() - start;
2413
2414 // new implementation because exception oop is now passed in JavaThread
2415
2416 // Prolog for exception case
2417 // All registers must be preserved because they might be used by LinearScan
2418 // Exceptiop oop and throwing PC are passed in JavaThread
2419 // tos: stack at point of call to method that threw the exception (i.e. only
2420 // args are on the stack, no return address)
2421
2422 // make room on stack for the return address
2423 // It will be patched later with the throwing pc. The correct value is not
2424 // available now because loading it from memory would destroy registers.
2425 __ push(0);
2426
2427 // Save everything in sight.
2428
2429 // No need to update map as each call to save_live_registers will produce identical oopmap
2430 (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
2431
2432 // Now it is safe to overwrite any register
2433
2434 // store the correct deoptimization type
2435 __ push(Deoptimization::Unpack_exception);
2436
2437 // load throwing pc from JavaThread and patch it as the return address
2438 // of the current frame. Then clear the field in JavaThread
2439 __ get_thread(rdi);
2440 __ movptr(rdx, Address(rdi, JavaThread::exception_pc_offset()));
2441 __ movptr(Address(rbp, wordSize), rdx);
2442 __ movptr(Address(rdi, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
2443
2444 #ifdef ASSERT
2445 // verify that there is really an exception oop in JavaThread
2446 __ movptr(rax, Address(rdi, JavaThread::exception_oop_offset()));
2447 __ verify_oop(rax);
2448
2449 // verify that there is no pending exception
2450 Label no_pending_exception;
2451 __ movptr(rax, Address(rdi, Thread::pending_exception_offset()));
2452 __ testptr(rax, rax);
2453 __ jcc(Assembler::zero, no_pending_exception);
2454 __ stop("must not have pending exception here");
2455 __ bind(no_pending_exception);
2456 #endif
2457
2458 __ bind(cont);
2459
2460 // Compiled code leaves the floating point stack dirty, empty it.
2461 __ empty_FPU_stack();
2462
2463
2464 // Call C code. Need thread and this frame, but NOT official VM entry
2465 // crud. We cannot block on this call, no GC can happen.
2466 __ get_thread(rcx);
2467 __ push(rcx);
2468 // fetch_unroll_info needs to call last_java_frame()
2469 __ set_last_Java_frame(rcx, noreg, noreg, NULL);
2470
2471 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2472
2473 // Need to have an oopmap that tells fetch_unroll_info where to
2474 // find any register it might need.
2475
2476 oop_maps->add_gc_map( __ pc()-start, map);
2477
2478 // Discard arg to fetch_unroll_info
2479 __ pop(rcx);
2480
2481 __ get_thread(rcx);
2482 __ reset_last_Java_frame(rcx, false, false);
2483
2484 // Load UnrollBlock into EDI
2485 __ mov(rdi, rax);
2486
2487 // Move the unpack kind to a safe place in the UnrollBlock because
2488 // we are very short of registers
2489
2490 Address unpack_kind(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
2491 // retrieve the deopt kind from where we left it.
2492 __ pop(rax);
2493 __ movl(unpack_kind, rax); // save the unpack_kind value
2494
2495 Label noException;
2496 __ cmpl(rax, Deoptimization::Unpack_exception); // Was exception pending?
2497 __ jcc(Assembler::notEqual, noException);
2498 __ movptr(rax, Address(rcx, JavaThread::exception_oop_offset()));
2499 __ movptr(rdx, Address(rcx, JavaThread::exception_pc_offset()));
2500 __ movptr(Address(rcx, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD);
2501 __ movptr(Address(rcx, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
2502
2503 __ verify_oop(rax);
2504
2505 // Overwrite the result registers with the exception results.
2506 __ movptr(Address(rsp, RegisterSaver::raxOffset()*wordSize), rax);
2507 __ movptr(Address(rsp, RegisterSaver::rdxOffset()*wordSize), rdx);
2508
2509 __ bind(noException);
2510
2511 // Stack is back to only having register save data on the stack.
2512 // Now restore the result registers. Everything else is either dead or captured
2513 // in the vframeArray.
2514
2515 RegisterSaver::restore_result_registers(masm);
2516
2517 // All of the register save area has been popped of the stack. Only the
2518 // return address remains.
2519
2520 // Pop all the frames we must move/replace.
2521 //
2522 // Frame picture (youngest to oldest)
2523 // 1: self-frame (no frame link)
2524 // 2: deopting frame (no frame link)
2525 // 3: caller of deopting frame (could be compiled/interpreted).
2526 //
2527 // Note: by leaving the return address of self-frame on the stack
2528 // and using the size of frame 2 to adjust the stack
2529 // when we are done the return to frame 3 will still be on the stack.
2530
2531 // Pop deoptimized frame
2532 __ addptr(rsp, Address(rdi,Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
2533
2534 // sp should be pointing at the return address to the caller (3)
2535
2536 // Stack bang to make sure there's enough room for these interpreter frames.
2537 if (UseStackBanging) {
2538 __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2539 __ bang_stack_size(rbx, rcx);
2540 }
2541
2542 // Load array of frame pcs into ECX
2543 __ movptr(rcx,Address(rdi,Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2544
2545 __ pop(rsi); // trash the old pc
2546
2547 // Load array of frame sizes into ESI
2548 __ movptr(rsi,Address(rdi,Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2549
2550 Address counter(rdi, Deoptimization::UnrollBlock::counter_temp_offset_in_bytes());
2551
2552 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2553 __ movl(counter, rbx);
2554
2555 // Pick up the initial fp we should save
2556 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_fp_offset_in_bytes()));
2557
2558 // Now adjust the caller's stack to make up for the extra locals
2559 // but record the original sp so that we can save it in the skeletal interpreter
2560 // frame and the stack walking of interpreter_sender will get the unextended sp
2561 // value and not the "real" sp value.
2562
2563 Address sp_temp(rdi, Deoptimization::UnrollBlock::sender_sp_temp_offset_in_bytes());
2564 __ movptr(sp_temp, rsp);
2565 __ movl2ptr(rbx, Address(rdi, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2566 __ subptr(rsp, rbx);
2567
2568 // Push interpreter frames in a loop
2569 Label loop;
2570 __ bind(loop);
2571 __ movptr(rbx, Address(rsi, 0)); // Load frame size
2572 #ifdef CC_INTERP
2573 __ subptr(rbx, 4*wordSize); // we'll push pc and ebp by hand and
2574 #ifdef ASSERT
2575 __ push(0xDEADDEAD); // Make a recognizable pattern
2576 __ push(0xDEADDEAD);
2577 #else /* ASSERT */
2578 __ subptr(rsp, 2*wordSize); // skip the "static long no_param"
2579 #endif /* ASSERT */
2580 #else /* CC_INTERP */
2581 __ subptr(rbx, 2*wordSize); // we'll push pc and rbp, by hand
2582 #endif /* CC_INTERP */
2583 __ pushptr(Address(rcx, 0)); // save return address
2584 __ enter(); // save old & set new rbp,
2585 __ subptr(rsp, rbx); // Prolog!
2586 __ movptr(rbx, sp_temp); // sender's sp
2587 #ifdef CC_INTERP
2588 __ movptr(Address(rbp,
2589 -(sizeof(BytecodeInterpreter)) + in_bytes(byte_offset_of(BytecodeInterpreter, _sender_sp))),
2590 rbx); // Make it walkable
2591 #else /* CC_INTERP */
2592 // This value is corrected by layout_activation_impl
2593 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD );
2594 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), rbx); // Make it walkable
2595 #endif /* CC_INTERP */
2596 __ movptr(sp_temp, rsp); // pass to next frame
2597 __ addptr(rsi, wordSize); // Bump array pointer (sizes)
2598 __ addptr(rcx, wordSize); // Bump array pointer (pcs)
2599 __ decrementl(counter); // decrement counter
2600 __ jcc(Assembler::notZero, loop);
2601 __ pushptr(Address(rcx, 0)); // save final return address
2602
2603 // Re-push self-frame
2604 __ enter(); // save old & set new rbp,
2605
2606 // Return address and rbp, are in place
2607 // We'll push additional args later. Just allocate a full sized
2608 // register save area
2609 __ subptr(rsp, (frame_size_in_words-additional_words - 2) * wordSize);
2610
2611 // Restore frame locals after moving the frame
2612 __ movptr(Address(rsp, RegisterSaver::raxOffset()*wordSize), rax);
2613 __ movptr(Address(rsp, RegisterSaver::rdxOffset()*wordSize), rdx);
2614 __ fstp_d(Address(rsp, RegisterSaver::fpResultOffset()*wordSize)); // Pop float stack and store in local
2615 if( UseSSE>=2 ) __ movdbl(Address(rsp, RegisterSaver::xmm0Offset()*wordSize), xmm0);
2616 if( UseSSE==1 ) __ movflt(Address(rsp, RegisterSaver::xmm0Offset()*wordSize), xmm0);
2617
2618 // Set up the args to unpack_frame
2619
2620 __ pushl(unpack_kind); // get the unpack_kind value
2621 __ get_thread(rcx);
2622 __ push(rcx);
2623
2624 // set last_Java_sp, last_Java_fp
2625 __ set_last_Java_frame(rcx, noreg, rbp, NULL);
2626
2627 // Call C code. Need thread but NOT official VM entry
2628 // crud. We cannot block on this call, no GC can happen. Call should
2629 // restore return values to their stack-slots with the new SP.
2630 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
2631 // Set an oopmap for the call site
2632 oop_maps->add_gc_map( __ pc()-start, new OopMap( frame_size_in_words, 0 ));
2633
2634 // rax, contains the return result type
2635 __ push(rax);
2636
2637 __ get_thread(rcx);
2638 __ reset_last_Java_frame(rcx, false, false);
2639
2640 // Collect return values
2641 __ movptr(rax,Address(rsp, (RegisterSaver::raxOffset() + additional_words + 1)*wordSize));
2642 __ movptr(rdx,Address(rsp, (RegisterSaver::rdxOffset() + additional_words + 1)*wordSize));
2643
2644 // Clear floating point stack before returning to interpreter
2645 __ empty_FPU_stack();
2646
2647 // Check if we should push the float or double return value.
2648 Label results_done, yes_double_value;
2649 __ cmpl(Address(rsp, 0), T_DOUBLE);
2650 __ jcc (Assembler::zero, yes_double_value);
2651 __ cmpl(Address(rsp, 0), T_FLOAT);
2652 __ jcc (Assembler::notZero, results_done);
2653
2654 // return float value as expected by interpreter
2655 if( UseSSE>=1 ) __ movflt(xmm0, Address(rsp, (RegisterSaver::xmm0Offset() + additional_words + 1)*wordSize));
2656 else __ fld_d(Address(rsp, (RegisterSaver::fpResultOffset() + additional_words + 1)*wordSize));
2657 __ jmp(results_done);
2658
2659 // return double value as expected by interpreter
2660 __ bind(yes_double_value);
2661 if( UseSSE>=2 ) __ movdbl(xmm0, Address(rsp, (RegisterSaver::xmm0Offset() + additional_words + 1)*wordSize));
2662 else __ fld_d(Address(rsp, (RegisterSaver::fpResultOffset() + additional_words + 1)*wordSize));
2683 void SharedRuntime::generate_uncommon_trap_blob() {
2684 // allocate space for the code
2685 ResourceMark rm;
2686 // setup code generation tools
2687 CodeBuffer buffer("uncommon_trap_blob", 512, 512);
2688 MacroAssembler* masm = new MacroAssembler(&buffer);
2689
2690 enum frame_layout {
2691 arg0_off, // thread sp + 0 // Arg location for
2692 arg1_off, // unloaded_class_index sp + 1 // calling C
2693 // The frame sender code expects that rbp will be in the "natural" place and
2694 // will override any oopMap setting for it. We must therefore force the layout
2695 // so that it agrees with the frame sender code.
2696 rbp_off, // callee saved register sp + 2
2697 return_off, // slot for return address sp + 3
2698 framesize
2699 };
2700
2701 address start = __ pc();
2702 // Push self-frame.
2703 __ subptr(rsp, return_off*wordSize); // Epilog!
2704
2705 // rbp, is an implicitly saved callee saved register (i.e. the calling
2706 // convention will save restore it in prolog/epilog) Other than that
2707 // there are no callee save registers no that adapter frames are gone.
2708 __ movptr(Address(rsp, rbp_off*wordSize), rbp);
2709
2710 // Clear the floating point exception stack
2711 __ empty_FPU_stack();
2712
2713 // set last_Java_sp
2714 __ get_thread(rdx);
2715 __ set_last_Java_frame(rdx, noreg, noreg, NULL);
2716
2717 // Call C code. Need thread but NOT official VM entry
2718 // crud. We cannot block on this call, no GC can happen. Call should
2719 // capture callee-saved registers as well as return values.
2720 __ movptr(Address(rsp, arg0_off*wordSize), rdx);
2721 // argument already in ECX
2722 __ movl(Address(rsp, arg1_off*wordSize),rcx);
2723 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
2724
2725 // Set an oopmap for the call site
2726 OopMapSet *oop_maps = new OopMapSet();
2727 OopMap* map = new OopMap( framesize, 0 );
2728 // No oopMap for rbp, it is known implicitly
2729
2730 oop_maps->add_gc_map( __ pc()-start, map);
2731
2732 __ get_thread(rcx);
2733
2734 __ reset_last_Java_frame(rcx, false, false);
2735
2736 // Load UnrollBlock into EDI
2737 __ movptr(rdi, rax);
2738
2739 // Pop all the frames we must move/replace.
2740 //
2741 // Frame picture (youngest to oldest)
2742 // 1: self-frame (no frame link)
2743 // 2: deopting frame (no frame link)
2744 // 3: caller of deopting frame (could be compiled/interpreted).
2745
2746 // Pop self-frame. We have no frame, and must rely only on EAX and ESP.
2747 __ addptr(rsp,(framesize-1)*wordSize); // Epilog!
2748
2749 // Pop deoptimized frame
2750 __ movl2ptr(rcx, Address(rdi,Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
2751 __ addptr(rsp, rcx);
2752
2753 // sp should be pointing at the return address to the caller (3)
2754
2755 // Stack bang to make sure there's enough room for these interpreter frames.
2756 if (UseStackBanging) {
2757 __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2758 __ bang_stack_size(rbx, rcx);
2759 }
2760
2761
2762 // Load array of frame pcs into ECX
2763 __ movl(rcx,Address(rdi,Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2764
2765 __ pop(rsi); // trash the pc
2766
2767 // Load array of frame sizes into ESI
2768 __ movptr(rsi,Address(rdi,Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2769
2770 Address counter(rdi, Deoptimization::UnrollBlock::counter_temp_offset_in_bytes());
2771
2772 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2773 __ movl(counter, rbx);
2774
2775 // Pick up the initial fp we should save
2776 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_fp_offset_in_bytes()));
2777
2778 // Now adjust the caller's stack to make up for the extra locals
2779 // but record the original sp so that we can save it in the skeletal interpreter
2780 // frame and the stack walking of interpreter_sender will get the unextended sp
2781 // value and not the "real" sp value.
2782
2783 Address sp_temp(rdi, Deoptimization::UnrollBlock::sender_sp_temp_offset_in_bytes());
2784 __ movptr(sp_temp, rsp);
2785 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2786 __ subptr(rsp, rbx);
2787
2788 // Push interpreter frames in a loop
2789 Label loop;
2790 __ bind(loop);
2791 __ movptr(rbx, Address(rsi, 0)); // Load frame size
2792 #ifdef CC_INTERP
2793 __ subptr(rbx, 4*wordSize); // we'll push pc and ebp by hand and
2794 #ifdef ASSERT
2795 __ push(0xDEADDEAD); // Make a recognizable pattern
2796 __ push(0xDEADDEAD); // (parm to RecursiveInterpreter...)
2797 #else /* ASSERT */
2798 __ subptr(rsp, 2*wordSize); // skip the "static long no_param"
2799 #endif /* ASSERT */
2800 #else /* CC_INTERP */
2801 __ subptr(rbx, 2*wordSize); // we'll push pc and rbp, by hand
2802 #endif /* CC_INTERP */
2803 __ pushptr(Address(rcx, 0)); // save return address
2804 __ enter(); // save old & set new rbp,
2805 __ subptr(rsp, rbx); // Prolog!
2806 __ movptr(rbx, sp_temp); // sender's sp
2807 #ifdef CC_INTERP
2808 __ movptr(Address(rbp,
2809 -(sizeof(BytecodeInterpreter)) + in_bytes(byte_offset_of(BytecodeInterpreter, _sender_sp))),
2810 rbx); // Make it walkable
2811 #else /* CC_INTERP */
2812 // This value is corrected by layout_activation_impl
2813 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD );
2814 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), rbx); // Make it walkable
2815 #endif /* CC_INTERP */
2816 __ movptr(sp_temp, rsp); // pass to next frame
2817 __ addptr(rsi, wordSize); // Bump array pointer (sizes)
2818 __ addptr(rcx, wordSize); // Bump array pointer (pcs)
2819 __ decrementl(counter); // decrement counter
2820 __ jcc(Assembler::notZero, loop);
2821 __ pushptr(Address(rcx, 0)); // save final return address
2822
2823 // Re-push self-frame
2824 __ enter(); // save old & set new rbp,
2825 __ subptr(rsp, (framesize-2) * wordSize); // Prolog!
2826
2827
2828 // set last_Java_sp, last_Java_fp
2829 __ get_thread(rdi);
2830 __ set_last_Java_frame(rdi, noreg, rbp, NULL);
2831
2832 // Call C code. Need thread but NOT official VM entry
2833 // crud. We cannot block on this call, no GC can happen. Call should
2834 // restore return values to their stack-slots with the new SP.
2835 __ movptr(Address(rsp,arg0_off*wordSize),rdi);
2836 __ movl(Address(rsp,arg1_off*wordSize), Deoptimization::Unpack_uncommon_trap);
2837 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
2838 // Set an oopmap for the call site
2839 oop_maps->add_gc_map( __ pc()-start, new OopMap( framesize, 0 ) );
2840
2841 __ get_thread(rdi);
2842 __ reset_last_Java_frame(rdi, true, false);
2843
2844 // Pop self-frame.
2845 __ leave(); // Epilog!
2846
2847 // Jump to interpreter
2848 __ ret(0);
2849
2850 // -------------
2851 // make sure all code is generated
2852 masm->flush();
2853
2854 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize);
2855 }
2872 ResourceMark rm;
2873 OopMapSet *oop_maps = new OopMapSet();
2874 OopMap* map;
2875
2876 // allocate space for the code
2877 // setup code generation tools
2878 CodeBuffer buffer("handler_blob", 1024, 512);
2879 MacroAssembler* masm = new MacroAssembler(&buffer);
2880
2881 const Register java_thread = rdi; // callee-saved for VC++
2882 address start = __ pc();
2883 address call_pc = NULL;
2884
2885 // If cause_return is true we are at a poll_return and there is
2886 // the return address on the stack to the caller on the nmethod
2887 // that is safepoint. We can leave this return on the stack and
2888 // effectively complete the return and safepoint in the caller.
2889 // Otherwise we push space for a return address that the safepoint
2890 // handler will install later to make the stack walking sensible.
2891 if( !cause_return )
2892 __ push(rbx); // Make room for return address (or push it again)
2893
2894 map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false);
2895
2896 // The following is basically a call_VM. However, we need the precise
2897 // address of the call in order to generate an oopmap. Hence, we do all the
2898 // work ourselves.
2899
2900 // Push thread argument and setup last_Java_sp
2901 __ get_thread(java_thread);
2902 __ push(java_thread);
2903 __ set_last_Java_frame(java_thread, noreg, noreg, NULL);
2904
2905 // if this was not a poll_return then we need to correct the return address now.
2906 if( !cause_return ) {
2907 __ movptr(rax, Address(java_thread, JavaThread::saved_exception_pc_offset()));
2908 __ movptr(Address(rbp, wordSize), rax);
2909 }
2910
2911 // do the call
2912 __ call(RuntimeAddress(call_ptr));
2913
2914 // Set an oopmap for the call site. This oopmap will map all
2915 // oop-registers and debug-info registers as callee-saved. This
2916 // will allow deoptimization at this safepoint to find all possible
2917 // debug-info recordings, as well as let GC find all oops.
2918
2919 oop_maps->add_gc_map( __ pc() - start, map);
2920
2921 // Discard arg
2922 __ pop(rcx);
2923
2924 Label noException;
2925
2926 // Clear last_Java_sp again
2927 __ get_thread(java_thread);
2928 __ reset_last_Java_frame(java_thread, false, false);
2929
2930 __ cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
2931 __ jcc(Assembler::equal, noException);
2932
2933 // Exception pending
2934
2935 RegisterSaver::restore_live_registers(masm);
2936
2937 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2938
2939 __ bind(noException);
2940
2941 // Normal exit, register restoring and exit
2942 RegisterSaver::restore_live_registers(masm);
2943
2944 __ ret(0);
2945
2946 // make sure all code is generated
2947 masm->flush();
2948
2949 // Fill-out other meta info
2950 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
2967 CodeBuffer buffer(name, 1000, 512);
2968 MacroAssembler* masm = new MacroAssembler(&buffer);
2969
2970 int frame_size_words;
2971 enum frame_layout {
2972 thread_off,
2973 extra_words };
2974
2975 OopMapSet *oop_maps = new OopMapSet();
2976 OopMap* map = NULL;
2977
2978 int start = __ offset();
2979
2980 map = RegisterSaver::save_live_registers(masm, extra_words, &frame_size_words);
2981
2982 int frame_complete = __ offset();
2983
2984 const Register thread = rdi;
2985 __ get_thread(rdi);
2986
2987 __ push(thread);
2988 __ set_last_Java_frame(thread, noreg, rbp, NULL);
2989
2990 __ call(RuntimeAddress(destination));
2991
2992
2993 // Set an oopmap for the call site.
2994 // We need this not only for callee-saved registers, but also for volatile
2995 // registers that the compiler might be keeping live across a safepoint.
2996
2997 oop_maps->add_gc_map( __ offset() - start, map);
2998
2999 // rax, contains the address we are going to jump to assuming no exception got installed
3000
3001 __ addptr(rsp, wordSize);
3002
3003 // clear last_Java_sp
3004 __ reset_last_Java_frame(thread, true, false);
3005 // check for pending exceptions
3006 Label pending;
3007 __ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
3008 __ jcc(Assembler::notEqual, pending);
3009
3010 // get the returned methodOop
3011 __ movptr(rbx, Address(thread, JavaThread::vm_result_offset()));
3012 __ movptr(Address(rsp, RegisterSaver::rbx_offset() * wordSize), rbx);
3013
3014 __ movptr(Address(rsp, RegisterSaver::rax_offset() * wordSize), rax);
3015
3016 RegisterSaver::restore_live_registers(masm);
3017
3018 // We are back the the original state on entry and ready to go.
3019
3020 __ jmp(rax);
3021
3022 // Pending exception after the safepoint
3023
3024 __ bind(pending);
3025
3026 RegisterSaver::restore_live_registers(masm);
3027
3028 // exception pending => remove activation and forward to exception handler
3029
3030 __ get_thread(thread);
3031 __ movptr(Address(thread, JavaThread::vm_result_offset()), (int32_t)NULL_WORD);
3032 __ movptr(rax, Address(thread, Thread::pending_exception_offset()));
3033 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3034
3035 // -------------
3036 // make sure all code is generated
3037 masm->flush();
3038
3039 // return the blob
3040 // frame_size_words or bytes??
3041 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
3042 }
3043
3044 void SharedRuntime::generate_stubs() {
3045
3046 _wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method),
3047 "wrong_method_stub");
3048
3049 _ic_miss_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method_ic_miss),
3050 "ic_miss_stub");
3051
3052 _resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_opt_virtual_call_C),
|