528
529 // Allocate argument register save area
530 if (frame::arg_reg_save_area_bytes != 0) {
531 __ subptr(rsp, frame::arg_reg_save_area_bytes);
532 }
533 __ mov(c_rarg0, rbx);
534 __ mov(c_rarg1, rax);
535 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
536
537 // De-allocate argument register save area
538 if (frame::arg_reg_save_area_bytes != 0) {
539 __ addptr(rsp, frame::arg_reg_save_area_bytes);
540 }
541
542 __ pop_CPU_state();
543 // restore sp
544 __ mov(rsp, r13);
545 __ bind(L);
546 }
547
548
549 static void gen_c2i_adapter(MacroAssembler *masm,
550 int total_args_passed,
551 int comp_args_on_stack,
552 const BasicType *sig_bt,
553 const VMRegPair *regs,
554 Label& skip_fixup) {
555 // Before we get into the guts of the C2I adapter, see if we should be here
556 // at all. We've come from compiled code and are attempting to jump to the
557 // interpreter, which means the caller made a static call to get here
558 // (vcalls always get a compiled target if there is one). Check for a
559 // compiled target. If there is one, we need to patch the caller's call.
560 patch_callers_callsite(masm);
561
562 __ bind(skip_fixup);
563
564 // Since all args are passed on the stack, total_args_passed *
565 // Interpreter::stackElementSize is the space we need. Plus 1 because
566 // we also account for the return address location since
567 // we store it first rather than hold it in rax across all the shuffling
568
569 int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize;
570
571 // stack is aligned, keep it that way
572 extraspace = round_to(extraspace, 2*wordSize);
573
574 // Get return address
575 __ pop(rax);
576
577 // set senderSP value
578 __ mov(r13, rsp);
579
580 __ subptr(rsp, extraspace);
581
582 // Store the return address in the expected location
583 __ movptr(Address(rsp, 0), rax);
584
585 // Now write the args into the outgoing interpreter space
586 for (int i = 0; i < total_args_passed; i++) {
587 if (sig_bt[i] == T_VOID) {
588 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
589 continue;
590 }
591
592 // offset to start parameters
593 int st_off = (total_args_passed - i) * Interpreter::stackElementSize;
594 int next_off = st_off - Interpreter::stackElementSize;
595
596 // Say 4 args:
597 // i st_off
598 // 0 32 T_LONG
599 // 1 24 T_VOID
600 // 2 16 T_OBJECT
601 // 3 8 T_BOOL
602 // - 0 return address
603 //
604 // However to make thing extra confusing. Because we can fit a long/double in
605 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
606 // leaves one slot empty and only stores to a single slot. In this case the
607 // slot that is occupied is the T_VOID slot. See I said it was confusing.
608
609 VMReg r_1 = regs[i].first();
610 VMReg r_2 = regs[i].second();
611 if (!r_1->is_valid()) {
612 assert(!r_2->is_valid(), "");
613 continue;
614 }
615 if (r_1->is_stack()) {
616 // memory to memory use rax
617 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
618 if (!r_2->is_valid()) {
619 // sign extend??
620 __ movl(rax, Address(rsp, ld_off));
621 __ movptr(Address(rsp, st_off), rax);
622
623 } else {
624
625 __ movq(rax, Address(rsp, ld_off));
626
627 // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
628 // T_DOUBLE and T_LONG use two slots in the interpreter
629 if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
630 // ld_off == LSW, ld_off+wordSize == MSW
631 // st_off == MSW, next_off == LSW
632 __ movq(Address(rsp, next_off), rax);
633 #ifdef ASSERT
634 // Overwrite the unused slot with known junk
635 __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
636 __ movptr(Address(rsp, st_off), rax);
637 #endif /* ASSERT */
638 } else {
639 __ movq(Address(rsp, st_off), rax);
640 }
641 }
642 } else if (r_1->is_Register()) {
643 Register r = r_1->as_Register();
644 if (!r_2->is_valid()) {
645 // must be only an int (or less ) so move only 32bits to slot
646 // why not sign extend??
647 __ movl(Address(rsp, st_off), r);
648 } else {
649 // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
650 // T_DOUBLE and T_LONG use two slots in the interpreter
651 if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
652 // long/double in gpr
653 #ifdef ASSERT
654 // Overwrite the unused slot with known junk
655 __ mov64(rax, CONST64(0xdeadffffdeadaaab));
656 __ movptr(Address(rsp, st_off), rax);
657 #endif /* ASSERT */
658 __ movq(Address(rsp, next_off), r);
659 } else {
660 __ movptr(Address(rsp, st_off), r);
661 }
662 }
663 } else {
664 assert(r_1->is_XMMRegister(), "");
665 if (!r_2->is_valid()) {
666 // only a float use just part of the slot
667 __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
668 } else {
669 #ifdef ASSERT
670 // Overwrite the unused slot with known junk
671 __ mov64(rax, CONST64(0xdeadffffdeadaaac));
672 __ movptr(Address(rsp, st_off), rax);
673 #endif /* ASSERT */
674 __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister());
675 }
676 }
677 }
678
679 // Schedule the branch target address early.
680 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
681 __ jmp(rcx);
682 }
683
684 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
685 address code_start, address code_end,
686 Label& L_ok) {
687 Label L_fail;
688 __ lea(temp_reg, ExternalAddress(code_start));
689 __ cmpptr(pc_reg, temp_reg);
690 __ jcc(Assembler::belowEqual, L_fail);
691 __ lea(temp_reg, ExternalAddress(code_end));
692 __ cmpptr(pc_reg, temp_reg);
693 __ jcc(Assembler::below, L_ok);
694 __ bind(L_fail);
695 }
696
697 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
698 int total_args_passed,
699 int comp_args_on_stack,
700 const BasicType *sig_bt,
701 const VMRegPair *regs) {
702
703 // Note: r13 contains the senderSP on entry. We must preserve it since
704 // we may do a i2c -> c2i transition if we lose a race where compiled
705 // code goes non-entrant while we get args ready.
706 // In addition we use r13 to locate all the interpreter args as
707 // we must align the stack to 16 bytes on an i2c entry else we
708 // lose alignment we expect in all compiled code and register
709 // save code can segv when fxsave instructions find improperly
710 // aligned stack pointer.
711
712 // Adapters can be frameless because they do not require the caller
713 // to perform additional cleanup work, such as correcting the stack pointer.
714 // An i2c adapter is frameless because the *caller* frame, which is interpreted,
715 // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
716 // even if a callee has modified the stack pointer.
717 // A c2i adapter is frameless because the *callee* frame, which is interpreted,
718 // routinely repairs its caller's stack pointer (from sender_sp, which is set
719 // up via the senderSP register).
720 // In other words, if *either* the caller or callee is interpreted, we can
786 // Put saved SP in another register
787 const Register saved_sp = rax;
788 __ movptr(saved_sp, r11);
789
790 // Will jump to the compiled code just as if compiled code was doing it.
791 // Pre-load the register-jump target early, to schedule it better.
792 __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset())));
793
794 #if INCLUDE_JVMCI
795 if (EnableJVMCI) {
796 // check if this call should be routed towards a specific entry point
797 __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
798 Label no_alternative_target;
799 __ jcc(Assembler::equal, no_alternative_target);
800 __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
801 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
802 __ bind(no_alternative_target);
803 }
804 #endif // INCLUDE_JVMCI
805
806 // Now generate the shuffle code. Pick up all register args and move the
807 // rest through the floating point stack top.
808 for (int i = 0; i < total_args_passed; i++) {
809 if (sig_bt[i] == T_VOID) {
810 // Longs and doubles are passed in native word order, but misaligned
811 // in the 32-bit build.
812 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
813 continue;
814 }
815
816 // Pick up 0, 1 or 2 words from SP+offset.
817
818 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
819 "scrambled load targets?");
820 // Load in argument order going down.
821 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
822 // Point to interpreter value (vs. tag)
823 int next_off = ld_off - Interpreter::stackElementSize;
824 //
825 //
826 //
827 VMReg r_1 = regs[i].first();
828 VMReg r_2 = regs[i].second();
829 if (!r_1->is_valid()) {
830 assert(!r_2->is_valid(), "");
831 continue;
832 }
833 if (r_1->is_stack()) {
834 // Convert stack slot to an SP offset (+ wordSize to account for return address )
835 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
836
837 // We can use r13 as a temp here because compiled code doesn't need r13 as an input
838 // and if we end up going thru a c2i because of a miss a reasonable value of r13
839 // will be generated.
840 if (!r_2->is_valid()) {
841 // sign extend???
842 __ movl(r13, Address(saved_sp, ld_off));
843 __ movptr(Address(rsp, st_off), r13);
844 } else {
845 //
846 // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
847 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
848 // So we must adjust where to pick up the data to match the interpreter.
849 //
850 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
851 // are accessed as negative so LSW is at LOW address
852
853 // ld_off is MSW so get LSW
854 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
855 next_off : ld_off;
856 __ movq(r13, Address(saved_sp, offset));
857 // st_off is LSW (i.e. reg.first())
858 __ movq(Address(rsp, st_off), r13);
859 }
860 } else if (r_1->is_Register()) { // Register argument
861 Register r = r_1->as_Register();
862 assert(r != rax, "must be different");
863 if (r_2->is_valid()) {
864 //
865 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
866 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
867 // So we must adjust where to pick up the data to match the interpreter.
868
869 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
870 next_off : ld_off;
871
872 // this can be a misaligned move
873 __ movq(r, Address(saved_sp, offset));
874 } else {
875 // sign extend and use a full word?
876 __ movl(r, Address(saved_sp, ld_off));
877 }
878 } else {
879 if (!r_2->is_valid()) {
880 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
881 } else {
882 __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));
883 }
884 }
885 }
886
887 // 6243940 We might end up in handle_wrong_method if
888 // the callee is deoptimized as we race thru here. If that
889 // happens we don't want to take a safepoint because the
890 // caller frame will look interpreted and arguments are now
891 // "compiled" so it is much better to make this transition
892 // invisible to the stack walking code. Unfortunately if
893 // we try and find the callee by normal means a safepoint
894 // is possible. So we stash the desired callee in the thread
895 // and the vm will find there should this case occur.
896
897 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
898
899 // put Method* where a c2i would expect should we end up there
900 // only needed becaus eof c2 resolve stubs return Method* as a result in
901 // rax
902 __ mov(rax, rbx);
903 __ jmp(r11);
904 }
905
906 // ---------------------------------------------------------------
907 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
908 int total_args_passed,
909 int comp_args_on_stack,
910 const BasicType *sig_bt,
911 const VMRegPair *regs,
912 AdapterFingerPrint* fingerprint) {
913 address i2c_entry = __ pc();
914
915 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
916
917 // -------------------------------------------------------------------------
918 // Generate a C2I adapter. On entry we know rbx holds the Method* during calls
919 // to the interpreter. The args start out packed in the compiled layout. They
920 // need to be unpacked into the interpreter layout. This will almost always
921 // require some stack space. We grow the current (compiled) stack, then repack
922 // the args. We finally end in a jump to the generic interpreter entry point.
923 // On exit from the interpreter, the interpreter will restore our SP (lest the
924 // compiled code, which relys solely on SP and not RBP, get sick).
925
926 address c2i_unverified_entry = __ pc();
927 Label skip_fixup;
928 Label ok;
929
930 Register holder = rax;
931 Register receiver = j_rarg0;
932 Register temp = rbx;
933
934 {
935 __ load_klass(temp, receiver);
936 __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
937 __ movptr(rbx, Address(holder, CompiledICHolder::holder_method_offset()));
938 __ jcc(Assembler::equal, ok);
939 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
940
941 __ bind(ok);
942 // Method might have been compiled since the call site was patched to
943 // interpreted if that is the case treat it as a miss so we can get
944 // the call site corrected.
945 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
946 __ jcc(Assembler::equal, skip_fixup);
947 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
948 }
949
950 address c2i_entry = __ pc();
951
952 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
953
954 __ flush();
955 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
956 }
957
958 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
959 VMRegPair *regs,
960 VMRegPair *regs2,
961 int total_args_passed) {
962 assert(regs2 == NULL, "not needed on x86");
963 // We return the amount of VMRegImpl stack slots we need to reserve for all
964 // the arguments NOT counting out_preserve_stack_slots.
965
966 // NOTE: These arrays will have to change when c1 is ported
967 #ifdef _WIN64
968 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
969 c_rarg0, c_rarg1, c_rarg2, c_rarg3
970 };
971 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
972 c_farg0, c_farg1, c_farg2, c_farg3
973 };
974 #else
|
528
529 // Allocate argument register save area
530 if (frame::arg_reg_save_area_bytes != 0) {
531 __ subptr(rsp, frame::arg_reg_save_area_bytes);
532 }
533 __ mov(c_rarg0, rbx);
534 __ mov(c_rarg1, rax);
535 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
536
537 // De-allocate argument register save area
538 if (frame::arg_reg_save_area_bytes != 0) {
539 __ addptr(rsp, frame::arg_reg_save_area_bytes);
540 }
541
542 __ pop_CPU_state();
543 // restore sp
544 __ mov(rsp, r13);
545 __ bind(L);
546 }
547
548 // For each value type argument, sig includes the list of fields of
549 // the value type. This utility function computes the number of
550 // arguments for the call if value types are passed by reference (the
551 // calling convention the interpreter expects).
552 static int compute_total_args_passed_int(const GrowableArray<SigEntry>& sig_extended) {
553 int total_args_passed = 0;
554 if (ValueTypePassFieldsAsArgs) {
555 for (int i = 0; i < sig_extended.length(); i++) {
556 BasicType bt = sig_extended.at(i)._bt;
557 if (bt == T_VALUETYPE) {
558 // In sig_extended, a value type argument starts with:
559 // T_VALUETYPE, followed by the types of the fields of the
560 // value type and T_VOID to mark the end of the value
561 // type. Value types are flattened so, for instance, in the
562 // case of a value type with an int field and a value type
563 // field that itself has 2 fields, an int and a long:
564 // T_VALUETYPE T_INT T_VALUETYPE T_INT T_LONG T_VOID (second
565 // slot for the T_LONG) T_VOID (inner T_VALUETYPE) T_VOID
566 // (outer T_VALUETYPE)
567 total_args_passed++;
568 int vt = 1;
569 do {
570 i++;
571 BasicType bt = sig_extended.at(i)._bt;
572 BasicType prev_bt = sig_extended.at(i-1)._bt;
573 if (bt == T_VALUETYPE) {
574 vt++;
575 } else if (bt == T_VOID &&
576 prev_bt != T_LONG &&
577 prev_bt != T_DOUBLE) {
578 vt--;
579 }
580 } while (vt != 0);
581 } else {
582 total_args_passed++;
583 }
584 }
585 } else {
586 total_args_passed = sig_extended.length();
587 }
588 return total_args_passed;
589 }
590
591
592 static void gen_c2i_adapter_helper(MacroAssembler *masm,
593 BasicType bt,
594 BasicType prev_bt,
595 const VMRegPair& reg_pair,
596 const Address& to,
597 int extraspace) {
598 assert(bt != T_VALUETYPE || !ValueTypePassFieldsAsArgs, "no value type here");
599 if (bt == T_VOID) {
600 assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half");
601 return;
602 }
603
604 // Say 4 args:
605 // i st_off
606 // 0 32 T_LONG
607 // 1 24 T_VOID
608 // 2 16 T_OBJECT
609 // 3 8 T_BOOL
610 // - 0 return address
611 //
612 // However to make thing extra confusing. Because we can fit a long/double in
613 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
614 // leaves one slot empty and only stores to a single slot. In this case the
615 // slot that is occupied is the T_VOID slot. See I said it was confusing.
616
617 VMReg r_1 = reg_pair.first();
618 VMReg r_2 = reg_pair.second();
619 if (!r_1->is_valid()) {
620 assert(!r_2->is_valid(), "");
621 return;
622 }
623 if (r_1->is_stack()) {
624 // memory to memory use rax
625 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
626 if (!r_2->is_valid()) {
627 // sign extend??
628 __ movl(rax, Address(rsp, ld_off));
629 __ movl(to, rax);
630
631 } else {
632
633 __ movq(rax, Address(rsp, ld_off));
634 __ movq(to, rax);
635 }
636 } else if (r_1->is_Register()) {
637 Register r = r_1->as_Register();
638 if (!r_2->is_valid()) {
639 // must be only an int (or less ) so move only 32bits to slot
640 // why not sign extend??
641 __ movl(to, r);
642 } else {
643 __ movq(to, r);
644 }
645 } else {
646 assert(r_1->is_XMMRegister(), "");
647 if (!r_2->is_valid()) {
648 // only a float use just part of the slot
649 __ movflt(to, r_1->as_XMMRegister());
650 } else {
651 __ movdbl(to, r_1->as_XMMRegister());
652 }
653 }
654 }
655
656 static void gen_c2i_adapter(MacroAssembler *masm,
657 const GrowableArray<SigEntry>& sig_extended,
658 const VMRegPair *regs,
659 Label& skip_fixup,
660 address start,
661 OopMapSet*& oop_maps,
662 int& frame_complete,
663 int& frame_size_in_words) {
664 // Before we get into the guts of the C2I adapter, see if we should be here
665 // at all. We've come from compiled code and are attempting to jump to the
666 // interpreter, which means the caller made a static call to get here
667 // (vcalls always get a compiled target if there is one). Check for a
668 // compiled target. If there is one, we need to patch the caller's call.
669 patch_callers_callsite(masm);
670
671 __ bind(skip_fixup);
672
673 if (ValueTypePassFieldsAsArgs) {
674 // Is there a value type arguments?
675 int i = 0;
676 for (; i < sig_extended.length() && sig_extended.at(i)._bt != T_VALUETYPE; i++);
677
678 if (i < sig_extended.length()) {
679 // There is at least a value type argument: we're coming from
680 // compiled code so we have no buffers to back the value
681 // types. Allocate the buffers here with a runtime call.
682 oop_maps = new OopMapSet();
683 OopMap* map = NULL;
684
685 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
686
687 frame_complete = __ offset();
688
689 __ set_last_Java_frame(noreg, noreg, NULL);
690
691 __ mov(c_rarg0, r15_thread);
692
693 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_value_types)));
694
695 oop_maps->add_gc_map((int)(__ pc() - start), map);
696 __ reset_last_Java_frame(false, false);
697
698 RegisterSaver::restore_live_registers(masm);
699
700 Label no_exception;
701 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
702 __ jcc(Assembler::equal, no_exception);
703
704 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD);
705 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
706 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
707
708 __ bind(no_exception);
709
710 // We get an array of objects from the runtime call
711 int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(T_OBJECT);
712 __ get_vm_result(r13, r15_thread);
713 __ addptr(r13, offset_in_bytes);
714 __ mov(r10, r13);
715 }
716 }
717
718
719 // Since all args are passed on the stack, total_args_passed *
720 // Interpreter::stackElementSize is the space we need. Plus 1 because
721 // we also account for the return address location since
722 // we store it first rather than hold it in rax across all the shuffling
723 int total_args_passed = compute_total_args_passed_int(sig_extended);
724 int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize;
725
726 // stack is aligned, keep it that way
727 extraspace = round_to(extraspace, 2*wordSize);
728
729 // Get return address
730 __ pop(rax);
731
732 // set senderSP value
733 __ mov(r13, rsp);
734
735 __ subptr(rsp, extraspace);
736
737 // Store the return address in the expected location
738 __ movptr(Address(rsp, 0), rax);
739
740 // Now write the args into the outgoing interpreter space
741
742 // next_arg_comp is the next argument from the compiler point of
743 // view (value type fields are passed in registers/on the stack). In
744 // sig_extended, a value type argument starts with: T_VALUETYPE,
745 // followed by the types of the fields of the value type and T_VOID
746 // to mark the end of the value type. ignored counts the number of
747 // T_VALUETYPE/T_VOID. next_vt_arg is the next value type argument:
748 // used to get the buffer for that argument from the pool of buffers
749 // we allocated above and want to pass to the
750 // interpreter. next_arg_int is the next argument from the
751 // interpreter point of view (value types are passed by reference).
752 for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0;
753 next_arg_comp < sig_extended.length(); next_arg_comp++) {
754 assert((next_arg_comp == 0 && ignored == 0) || ignored < next_arg_comp, "shouldn't skip over more slot than there are arguments");
755 assert(next_arg_int < total_args_passed, "more arguments for the interpreter than expected?");
756 BasicType bt = sig_extended.at(next_arg_comp)._bt;
757 int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize;
758 if (!ValueTypePassFieldsAsArgs || bt != T_VALUETYPE) {
759 int next_off = st_off - Interpreter::stackElementSize;
760 const int offset = (bt==T_LONG||bt==T_DOUBLE) ? next_off : st_off;
761 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended.at(next_arg_comp-1)._bt : T_ILLEGAL,
762 regs[next_arg_comp-ignored], Address(rsp, offset), extraspace);
763 next_arg_int++;
764 #ifdef ASSERT
765 if (bt==T_LONG || bt==T_DOUBLE) {
766 // Overwrite the unused slot with known junk
767 __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
768 __ movptr(Address(rsp, st_off), rax);
769 }
770 #endif /* ASSERT */
771 } else {
772 ignored++;
773 // get the buffer from the just allocated pool of buffers
774 __ load_heap_oop(r11, Address(r10, next_vt_arg * type2aelembytes(T_VALUETYPE)));
775 next_vt_arg++; next_arg_int++;
776 int vt = 1;
777 // write fields we get from compiled code in registers/stack
778 // slots to the buffer: we know we are done with that value type
779 // argument when we hit the T_VOID that acts as an end of value
780 // type delimiter for this value type. Value types are flattened
781 // so we might encounter a embedded value types. Each entry in
782 // sig_extended contains a field offset in the buffer.
783 do {
784 next_arg_comp++;
785 BasicType bt = sig_extended.at(next_arg_comp)._bt;
786 BasicType prev_bt = sig_extended.at(next_arg_comp-1)._bt;
787 if (bt == T_VALUETYPE) {
788 vt++;
789 ignored++;
790 } else if (bt == T_VOID &&
791 prev_bt != T_LONG &&
792 prev_bt != T_DOUBLE) {
793 vt--;
794 ignored++;
795 } else {
796 int off = sig_extended.at(next_arg_comp)._offset;
797 assert(off > 0, "offset in object should be positive");
798 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended.at(next_arg_comp-1)._bt : T_ILLEGAL,
799 regs[next_arg_comp-ignored], Address(r11, off), extraspace);
800 }
801 } while (vt != 0);
802 // pass the buffer to the interpreter
803 __ movptr(Address(rsp, st_off), r11);
804 }
805 }
806
807 // Schedule the branch target address early.
808 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
809 __ jmp(rcx);
810 }
811
812 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
813 address code_start, address code_end,
814 Label& L_ok) {
815 Label L_fail;
816 __ lea(temp_reg, ExternalAddress(code_start));
817 __ cmpptr(pc_reg, temp_reg);
818 __ jcc(Assembler::belowEqual, L_fail);
819 __ lea(temp_reg, ExternalAddress(code_end));
820 __ cmpptr(pc_reg, temp_reg);
821 __ jcc(Assembler::below, L_ok);
822 __ bind(L_fail);
823 }
824
825 static void gen_i2c_adapter_helper(MacroAssembler *masm,
826 BasicType bt,
827 BasicType prev_bt,
828 const VMRegPair& reg_pair,
829 const Address& from) {
830 assert(bt != T_VALUETYPE || !ValueTypePassFieldsAsArgs, "no value type here");
831 if (bt == T_VOID) {
832 // Longs and doubles are passed in native word order, but misaligned
833 // in the 32-bit build.
834 assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half");
835 return;
836 }
837 // Pick up 0, 1 or 2 words from SP+offset.
838
839 assert(!reg_pair.second()->is_valid() || reg_pair.first()->next() == reg_pair.second(),
840 "scrambled load targets?");
841 //
842 //
843 //
844 VMReg r_1 = reg_pair.first();
845 VMReg r_2 = reg_pair.second();
846 if (!r_1->is_valid()) {
847 assert(!r_2->is_valid(), "");
848 return;
849 }
850 if (r_1->is_stack()) {
851 // Convert stack slot to an SP offset (+ wordSize to account for return address )
852 int st_off = reg_pair.first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
853
854 // We can use r13 as a temp here because compiled code doesn't need r13 as an input
855 // and if we end up going thru a c2i because of a miss a reasonable value of r13
856 // will be generated.
857 if (!r_2->is_valid()) {
858 // sign extend???
859 __ movl(r13, from);
860 __ movptr(Address(rsp, st_off), r13);
861 } else {
862 //
863 // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
864 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
865 // So we must adjust where to pick up the data to match the interpreter.
866 //
867 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
868 // are accessed as negative so LSW is at LOW address
869
870 // ld_off is MSW so get LSW
871 __ movq(r13, from);
872 // st_off is LSW (i.e. reg.first())
873 __ movq(Address(rsp, st_off), r13);
874 }
875 } else if (r_1->is_Register()) { // Register argument
876 Register r = r_1->as_Register();
877 assert(r != rax, "must be different");
878 if (r_2->is_valid()) {
879 //
880 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
881 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
882 // So we must adjust where to pick up the data to match the interpreter.
883
884 // this can be a misaligned move
885 __ movq(r, from);
886 } else {
887 // sign extend and use a full word?
888 __ movl(r, from);
889 }
890 } else {
891 if (!r_2->is_valid()) {
892 __ movflt(r_1->as_XMMRegister(), from);
893 } else {
894 __ movdbl(r_1->as_XMMRegister(), from);
895 }
896 }
897 }
898
899 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
900 int comp_args_on_stack,
901 const GrowableArray<SigEntry>& sig_extended,
902 const VMRegPair *regs) {
903
904 // Note: r13 contains the senderSP on entry. We must preserve it since
905 // we may do a i2c -> c2i transition if we lose a race where compiled
906 // code goes non-entrant while we get args ready.
907 // In addition we use r13 to locate all the interpreter args as
908 // we must align the stack to 16 bytes on an i2c entry else we
909 // lose alignment we expect in all compiled code and register
910 // save code can segv when fxsave instructions find improperly
911 // aligned stack pointer.
912
913 // Adapters can be frameless because they do not require the caller
914 // to perform additional cleanup work, such as correcting the stack pointer.
915 // An i2c adapter is frameless because the *caller* frame, which is interpreted,
916 // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
917 // even if a callee has modified the stack pointer.
918 // A c2i adapter is frameless because the *callee* frame, which is interpreted,
919 // routinely repairs its caller's stack pointer (from sender_sp, which is set
920 // up via the senderSP register).
921 // In other words, if *either* the caller or callee is interpreted, we can
987 // Put saved SP in another register
988 const Register saved_sp = rax;
989 __ movptr(saved_sp, r11);
990
991 // Will jump to the compiled code just as if compiled code was doing it.
992 // Pre-load the register-jump target early, to schedule it better.
993 __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset())));
994
995 #if INCLUDE_JVMCI
996 if (EnableJVMCI) {
997 // check if this call should be routed towards a specific entry point
998 __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
999 Label no_alternative_target;
1000 __ jcc(Assembler::equal, no_alternative_target);
1001 __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
1002 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
1003 __ bind(no_alternative_target);
1004 }
1005 #endif // INCLUDE_JVMCI
1006
1007 int total_args_passed = compute_total_args_passed_int(sig_extended);
1008 // Now generate the shuffle code. Pick up all register args and move the
1009 // rest through the floating point stack top.
1010
1011 // next_arg_comp is the next argument from the compiler point of
1012 // view (value type fields are passed in registers/on the stack). In
1013 // sig_extended, a value type argument starts with: T_VALUETYPE,
1014 // followed by the types of the fields of the value type and T_VOID
1015 // to mark the end of the value type. ignored counts the number of
1016 // T_VALUETYPE/T_VOID. next_arg_int is the next argument from the
1017 // interpreter point of view (value types are passed by reference).
1018 for (int next_arg_comp = 0, ignored = 0, next_arg_int = 0; next_arg_comp < sig_extended.length(); next_arg_comp++) {
1019 assert((next_arg_comp == 0 && ignored == 0) || ignored < next_arg_comp, "shouldn't skip over more slot than there are arguments");
1020 assert(next_arg_int < total_args_passed, "more arguments from the interpreter than expected?");
1021 BasicType bt = sig_extended.at(next_arg_comp)._bt;
1022 int ld_off = (total_args_passed - next_arg_int)*Interpreter::stackElementSize;
1023 if (!ValueTypePassFieldsAsArgs || bt != T_VALUETYPE) {
1024 // Load in argument order going down.
1025 // Point to interpreter value (vs. tag)
1026 int next_off = ld_off - Interpreter::stackElementSize;
1027 const int offset = (bt==T_LONG||bt==T_DOUBLE) ? next_off : ld_off;
1028 gen_i2c_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended.at(next_arg_comp-1)._bt : T_ILLEGAL,
1029 regs[next_arg_comp-ignored], Address(saved_sp, offset));
1030 next_arg_int++;
1031 } else {
1032 next_arg_int++;
1033 ignored++;
1034 // get the buffer for that value type
1035 __ movptr(r10, Address(saved_sp, ld_off));
1036 int vt = 1;
1037 // load fields to registers/stack slots from the buffer: we know
1038 // we are done with that value type argument when we hit the
1039 // T_VOID that acts as an end of value type delimiter for this
1040 // value type. Value types are flattened so we might encounter a
1041 // embedded value types. Each entry in sig_extended contains a
1042 // field offset in the buffer.
1043 do {
1044 next_arg_comp++;
1045 BasicType bt = sig_extended.at(next_arg_comp)._bt;
1046 BasicType prev_bt = sig_extended.at(next_arg_comp-1)._bt;
1047 if (bt == T_VALUETYPE) {
1048 vt++;
1049 ignored++;
1050 } else if (bt == T_VOID &&
1051 prev_bt != T_LONG &&
1052 prev_bt != T_DOUBLE) {
1053 vt--;
1054 ignored++;
1055 } else {
1056 int off = sig_extended.at(next_arg_comp)._offset;
1057 assert(off > 0, "offset in object should be positive");
1058 gen_i2c_adapter_helper(masm, bt, prev_bt, regs[next_arg_comp - ignored], Address(r10, off));
1059 }
1060 } while (vt != 0);
1061 }
1062 }
1063
1064 // 6243940 We might end up in handle_wrong_method if
1065 // the callee is deoptimized as we race thru here. If that
1066 // happens we don't want to take a safepoint because the
1067 // caller frame will look interpreted and arguments are now
1068 // "compiled" so it is much better to make this transition
1069 // invisible to the stack walking code. Unfortunately if
1070 // we try and find the callee by normal means a safepoint
1071 // is possible. So we stash the desired callee in the thread
1072 // and the vm will find there should this case occur.
1073
1074 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
1075
1076 // put Method* where a c2i would expect should we end up there
1077 // only needed because of c2 resolve stubs return Method* as a result in
1078 // rax
1079 __ mov(rax, rbx);
1080 __ jmp(r11);
1081 }
1082
1083 // ---------------------------------------------------------------
1084 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
1085 int comp_args_on_stack,
1086 const GrowableArray<SigEntry>& sig_extended,
1087 const VMRegPair *regs,
1088 AdapterFingerPrint* fingerprint,
1089 AdapterBlob*& new_adapter) {
1090 address i2c_entry = __ pc();
1091
1092 gen_i2c_adapter(masm, comp_args_on_stack, sig_extended, regs);
1093
1094 // -------------------------------------------------------------------------
1095 // Generate a C2I adapter. On entry we know rbx holds the Method* during calls
1096 // to the interpreter. The args start out packed in the compiled layout. They
1097 // need to be unpacked into the interpreter layout. This will almost always
1098 // require some stack space. We grow the current (compiled) stack, then repack
1099 // the args. We finally end in a jump to the generic interpreter entry point.
1100 // On exit from the interpreter, the interpreter will restore our SP (lest the
1101 // compiled code, which relys solely on SP and not RBP, get sick).
1102
1103 address c2i_unverified_entry = __ pc();
1104 Label skip_fixup;
1105 Label ok;
1106
1107 Register holder = rax;
1108 Register receiver = j_rarg0;
1109 Register temp = rbx;
1110
1111 {
1112 __ load_klass(temp, receiver);
1113 __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
1114 __ movptr(rbx, Address(holder, CompiledICHolder::holder_method_offset()));
1115 __ jcc(Assembler::equal, ok);
1116 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1117
1118 __ bind(ok);
1119 // Method might have been compiled since the call site was patched to
1120 // interpreted if that is the case treat it as a miss so we can get
1121 // the call site corrected.
1122 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
1123 __ jcc(Assembler::equal, skip_fixup);
1124 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1125 }
1126
1127 address c2i_entry = __ pc();
1128
1129 OopMapSet* oop_maps = NULL;
1130 int frame_complete = CodeOffsets::frame_never_safe;
1131 int frame_size_in_words = 0;
1132 gen_c2i_adapter(masm, sig_extended, regs, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words);
1133
1134 __ flush();
1135 new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps);
1136 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
1137 }
1138
1139 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1140 VMRegPair *regs,
1141 VMRegPair *regs2,
1142 int total_args_passed) {
1143 assert(regs2 == NULL, "not needed on x86");
1144 // We return the amount of VMRegImpl stack slots we need to reserve for all
1145 // the arguments NOT counting out_preserve_stack_slots.
1146
1147 // NOTE: These arrays will have to change when c1 is ported
1148 #ifdef _WIN64
1149 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
1150 c_rarg0, c_rarg1, c_rarg2, c_rarg3
1151 };
1152 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
1153 c_farg0, c_farg1, c_farg2, c_farg3
1154 };
1155 #else
|