621
622 //
623 // We will only enter here from an interpreted frame and never from after
624 // passing thru a c2i. Azul allowed this but we do not. If we lose the
625 // race and use a c2i we will remain interpreted for the race loser(s).
626 // This removes all sorts of headaches on the x86 side and also eliminates
627 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
628
629
630 // Note: r13 contains the senderSP on entry. We must preserve it since
631 // we may do a i2c -> c2i transition if we lose a race where compiled
632 // code goes non-entrant while we get args ready.
633 // In addition we use r13 to locate all the interpreter args as
634 // we must align the stack to 16 bytes on an i2c entry else we
635 // lose alignment we expect in all compiled code and register
636 // save code can segv when fxsave instructions find improperly
637 // aligned stack pointer.
638
639 __ movptr(rax, Address(rsp, 0));
640
641 // Cut-out for having no stack args. Since up to 2 int/oop args are passed
642 // in registers, we will occasionally have no stack args.
643 int comp_words_on_stack = 0;
644 if (comp_args_on_stack) {
645 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
646 // registers are below. By subtracting stack0, we either get a negative
647 // number (all values in registers) or the maximum stack slot accessed.
648
649 // Convert 4-byte c2 stack slots to words.
650 comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
651 // Round up to miminum stack alignment, in wordSize
652 comp_words_on_stack = round_to(comp_words_on_stack, 2);
653 __ subptr(rsp, comp_words_on_stack * wordSize);
654 }
655
656
657 // Ensure compiled code always sees stack at proper alignment
658 __ andptr(rsp, -16);
659
660 // push the return address and misalign the stack that youngest frame always sees
661 // as far as the placement of the call instruction
662 __ push(rax);
663
664 // Will jump to the compiled code just as if compiled code was doing it.
665 // Pre-load the register-jump target early, to schedule it better.
666 __ movptr(r11, Address(rbx, in_bytes(methodOopDesc::from_compiled_offset())));
667
668 // Now generate the shuffle code. Pick up all register args and move the
669 // rest through the floating point stack top.
670 for (int i = 0; i < total_args_passed; i++) {
671 if (sig_bt[i] == T_VOID) {
672 // Longs and doubles are passed in native word order, but misaligned
673 // in the 32-bit build.
674 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
675 continue;
676 }
677
678 // Pick up 0, 1 or 2 words from SP+offset.
679
680 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
681 "scrambled load targets?");
682 // Load in argument order going down.
683 // int ld_off = (total_args_passed + comp_words_on_stack -i)*wordSize;
684 // base ld_off on r13 (sender_sp) as the stack alignment makes offsets from rsp
685 // unpredictable
686 int ld_off = ((total_args_passed - 1) - i)*Interpreter::stackElementSize();
687
688 // Point to interpreter value (vs. tag)
689 int next_off = ld_off - Interpreter::stackElementSize();
690 //
691 //
692 //
693 VMReg r_1 = regs[i].first();
694 VMReg r_2 = regs[i].second();
695 if (!r_1->is_valid()) {
696 assert(!r_2->is_valid(), "");
697 continue;
698 }
699 if (r_1->is_stack()) {
700 // Convert stack slot to an SP offset (+ wordSize to account for return address )
701 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
702 if (!r_2->is_valid()) {
703 // sign extend???
704 __ movl(rax, Address(r13, ld_off));
705 __ movptr(Address(rsp, st_off), rax);
706 } else {
707 //
708 // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
709 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
710 // So we must adjust where to pick up the data to match the interpreter.
711 //
712 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
713 // are accessed as negative so LSW is at LOW address
714
715 // ld_off is MSW so get LSW
716 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
717 next_off : ld_off;
718 __ movq(rax, Address(r13, offset));
719 // st_off is LSW (i.e. reg.first())
720 __ movq(Address(rsp, st_off), rax);
721 }
722 } else if (r_1->is_Register()) { // Register argument
723 Register r = r_1->as_Register();
724 assert(r != rax, "must be different");
725 if (r_2->is_valid()) {
726 //
727 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
728 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
729 // So we must adjust where to pick up the data to match the interpreter.
730
731 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
732 next_off : ld_off;
733
734 // this can be a misaligned move
735 __ movq(r, Address(r13, offset));
736 } else {
737 // sign extend and use a full word?
738 __ movl(r, Address(r13, ld_off));
739 }
740 } else {
741 if (!r_2->is_valid()) {
742 __ movflt(r_1->as_XMMRegister(), Address(r13, ld_off));
743 } else {
744 __ movdbl(r_1->as_XMMRegister(), Address(r13, next_off));
745 }
746 }
747 }
748
749 // 6243940 We might end up in handle_wrong_method if
750 // the callee is deoptimized as we race thru here. If that
751 // happens we don't want to take a safepoint because the
752 // caller frame will look interpreted and arguments are now
753 // "compiled" so it is much better to make this transition
754 // invisible to the stack walking code. Unfortunately if
755 // we try and find the callee by normal means a safepoint
756 // is possible. So we stash the desired callee in the thread
757 // and the vm will find there should this case occur.
758
759 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
760
761 // put methodOop where a c2i would expect should we end up there
762 // only needed becaus eof c2 resolve stubs return methodOop as a result in
763 // rax
764 __ mov(rax, rbx);
3302
3303 OopMapSet* oop_maps = new OopMapSet();
3304
3305 oop_maps->add_gc_map( __ pc()-start, new OopMap(SimpleRuntimeFrame::framesize, 0));
3306
3307 __ reset_last_Java_frame(false, false);
3308
3309 // Restore callee-saved registers
3310
3311 // rbp is an implicitly saved callee saved register (i.e. the calling
3312 // convention will save restore it in prolog/epilog) Other than that
3313 // there are no callee save registers no that adapter frames are gone.
3314
3315 __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt));
3316
3317 __ addptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog
3318 __ pop(rdx); // No need for exception pc anymore
3319
3320 // rax: exception handler
3321
3322 // We have a handler in rax (could be deopt blob).
3323 __ mov(r8, rax);
3324
3325 // Get the exception oop
3326 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
3327 // Get the exception pc in case we are deoptimized
3328 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
3329 #ifdef ASSERT
3330 __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), (int)NULL_WORD);
3331 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD);
3332 #endif
3333 // Clear the exception oop so GC no longer processes it as a root.
3334 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int)NULL_WORD);
3335
3336 // rax: exception oop
3337 // r8: exception handler
3338 // rdx: exception pc
3339 // Jump to handler
3340
3341 __ jmp(r8);
|
621
622 //
623 // We will only enter here from an interpreted frame and never from after
624 // passing thru a c2i. Azul allowed this but we do not. If we lose the
625 // race and use a c2i we will remain interpreted for the race loser(s).
626 // This removes all sorts of headaches on the x86 side and also eliminates
627 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
628
629
630 // Note: r13 contains the senderSP on entry. We must preserve it since
631 // we may do a i2c -> c2i transition if we lose a race where compiled
632 // code goes non-entrant while we get args ready.
633 // In addition we use r13 to locate all the interpreter args as
634 // we must align the stack to 16 bytes on an i2c entry else we
635 // lose alignment we expect in all compiled code and register
636 // save code can segv when fxsave instructions find improperly
637 // aligned stack pointer.
638
639 __ movptr(rax, Address(rsp, 0));
640
641 // Must preserve original SP for loading incoming arguments because
642 // we need to align the outgoing SP for compiled code.
643 __ movptr(r11, rsp);
644
645 // Cut-out for having no stack args. Since up to 2 int/oop args are passed
646 // in registers, we will occasionally have no stack args.
647 int comp_words_on_stack = 0;
648 if (comp_args_on_stack) {
649 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
650 // registers are below. By subtracting stack0, we either get a negative
651 // number (all values in registers) or the maximum stack slot accessed.
652
653 // Convert 4-byte c2 stack slots to words.
654 comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
655 // Round up to miminum stack alignment, in wordSize
656 comp_words_on_stack = round_to(comp_words_on_stack, 2);
657 __ subptr(rsp, comp_words_on_stack * wordSize);
658 }
659
660
661 // Ensure compiled code always sees stack at proper alignment
662 __ andptr(rsp, -16);
663
664 // push the return address and misalign the stack that youngest frame always sees
665 // as far as the placement of the call instruction
666 __ push(rax);
667
668 // Put saved SP in another register
669 const Register saved_sp = rax;
670 __ movptr(saved_sp, r11);
671
672 // Will jump to the compiled code just as if compiled code was doing it.
673 // Pre-load the register-jump target early, to schedule it better.
674 __ movptr(r11, Address(rbx, in_bytes(methodOopDesc::from_compiled_offset())));
675
676 // Now generate the shuffle code. Pick up all register args and move the
677 // rest through the floating point stack top.
678 for (int i = 0; i < total_args_passed; i++) {
679 if (sig_bt[i] == T_VOID) {
680 // Longs and doubles are passed in native word order, but misaligned
681 // in the 32-bit build.
682 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
683 continue;
684 }
685
686 // Pick up 0, 1 or 2 words from SP+offset.
687
688 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
689 "scrambled load targets?");
690 // Load in argument order going down.
691 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize() + Interpreter::value_offset_in_bytes();
692 // Point to interpreter value (vs. tag)
693 int next_off = ld_off - Interpreter::stackElementSize();
694 //
695 //
696 //
697 VMReg r_1 = regs[i].first();
698 VMReg r_2 = regs[i].second();
699 if (!r_1->is_valid()) {
700 assert(!r_2->is_valid(), "");
701 continue;
702 }
703 if (r_1->is_stack()) {
704 // Convert stack slot to an SP offset (+ wordSize to account for return address )
705 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
706
707 // We can use r13 as a temp here because compiled code doesn't need r13 as an input
708 // and if we end up going thru a c2i because of a miss a reasonable value of r13
709 // will be generated.
710 if (!r_2->is_valid()) {
711 // sign extend???
712 __ movl(r13, Address(saved_sp, ld_off));
713 __ movptr(Address(rsp, st_off), r13);
714 } else {
715 //
716 // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
717 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
718 // So we must adjust where to pick up the data to match the interpreter.
719 //
720 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
721 // are accessed as negative so LSW is at LOW address
722
723 // ld_off is MSW so get LSW
724 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
725 next_off : ld_off;
726 __ movq(r13, Address(saved_sp, offset));
727 // st_off is LSW (i.e. reg.first())
728 __ movq(Address(rsp, st_off), r13);
729 }
730 } else if (r_1->is_Register()) { // Register argument
731 Register r = r_1->as_Register();
732 assert(r != rax, "must be different");
733 if (r_2->is_valid()) {
734 //
735 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
736 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
737 // So we must adjust where to pick up the data to match the interpreter.
738
739 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
740 next_off : ld_off;
741
742 // this can be a misaligned move
743 __ movq(r, Address(saved_sp, offset));
744 } else {
745 // sign extend and use a full word?
746 __ movl(r, Address(saved_sp, ld_off));
747 }
748 } else {
749 if (!r_2->is_valid()) {
750 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
751 } else {
752 __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));
753 }
754 }
755 }
756
757 // 6243940 We might end up in handle_wrong_method if
758 // the callee is deoptimized as we race thru here. If that
759 // happens we don't want to take a safepoint because the
760 // caller frame will look interpreted and arguments are now
761 // "compiled" so it is much better to make this transition
762 // invisible to the stack walking code. Unfortunately if
763 // we try and find the callee by normal means a safepoint
764 // is possible. So we stash the desired callee in the thread
765 // and the vm will find there should this case occur.
766
767 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
768
769 // put methodOop where a c2i would expect should we end up there
770 // only needed becaus eof c2 resolve stubs return methodOop as a result in
771 // rax
772 __ mov(rax, rbx);
3310
3311 OopMapSet* oop_maps = new OopMapSet();
3312
3313 oop_maps->add_gc_map( __ pc()-start, new OopMap(SimpleRuntimeFrame::framesize, 0));
3314
3315 __ reset_last_Java_frame(false, false);
3316
3317 // Restore callee-saved registers
3318
3319 // rbp is an implicitly saved callee saved register (i.e. the calling
3320 // convention will save restore it in prolog/epilog) Other than that
3321 // there are no callee save registers no that adapter frames are gone.
3322
3323 __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt));
3324
3325 __ addptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog
3326 __ pop(rdx); // No need for exception pc anymore
3327
3328 // rax: exception handler
3329
3330 // Restore SP from BP if the exception PC is a MethodHandle call.
3331 __ cmpl(Address(r15_thread, JavaThread::is_method_handle_exception_offset()), 0);
3332 __ cmovptr(Assembler::notEqual, rsp, rbp);
3333
3334 // We have a handler in rax (could be deopt blob).
3335 __ mov(r8, rax);
3336
3337 // Get the exception oop
3338 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
3339 // Get the exception pc in case we are deoptimized
3340 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
3341 #ifdef ASSERT
3342 __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), (int)NULL_WORD);
3343 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD);
3344 #endif
3345 // Clear the exception oop so GC no longer processes it as a root.
3346 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int)NULL_WORD);
3347
3348 // rax: exception oop
3349 // r8: exception handler
3350 // rdx: exception pc
3351 // Jump to handler
3352
3353 __ jmp(r8);
|