26 #include "asm/macroAssembler.inline.hpp"
27 #include "code/debugInfoRec.hpp"
28 #include "code/icBuffer.hpp"
29 #include "code/vtableStubs.hpp"
30 #include "interpreter/interpreter.hpp"
31 #include "oops/compiledICHolder.hpp"
32 #include "prims/jvmtiRedefineClassesTrace.hpp"
33 #include "runtime/sharedRuntime.hpp"
34 #include "runtime/vframeArray.hpp"
35 #include "vmreg_sparc.inline.hpp"
36 #ifdef COMPILER1
37 #include "c1/c1_Runtime1.hpp"
38 #endif
39 #ifdef COMPILER2
40 #include "opto/runtime.hpp"
41 #endif
42 #ifdef SHARK
43 #include "compiler/compileBroker.hpp"
44 #include "shark/sharkCompiler.hpp"
45 #endif
46
47 #define __ masm->
48
49
50 class RegisterSaver {
51
52 // Used for saving volatile registers. This is Gregs, Fregs, I/L/O.
53 // The Oregs are problematic. In the 32bit build the compiler can
54 // have O registers live with 64 bit quantities. A window save will
55 // cut the heads off of the registers. We have to do a very extensive
56 // stack dance to save and restore these properly.
57
58 // Note that the Oregs problem only exists if we block at either a polling
59 // page exception a compiled code safepoint that was not originally a call
60 // or deoptimize following one of these kinds of safepoints.
61
62 // Lots of registers to save. For all builds, a window save will preserve
63 // the %i and %l registers. For the 32-bit longs-in-two entries and 64-bit
64 // builds a window-save will preserve the %o registers. In the LION build
65 // we need to save the 64-bit %o registers which requires we save them
743 // (really L0) is in use by the compiled frame as a generic temp. However,
744 // the interpreter does not know where its args are without some kind of
745 // arg pointer being passed in. Pass it in Gargs.
746 __ delayed()->add(SP, G1, Gargs);
747 }
748
749 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, Register temp2_reg,
750 address code_start, address code_end,
751 Label& L_ok) {
752 Label L_fail;
753 __ set(ExternalAddress(code_start), temp_reg);
754 __ set(pointer_delta(code_end, code_start, 1), temp2_reg);
755 __ cmp(pc_reg, temp_reg);
756 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pn, L_fail);
757 __ delayed()->add(temp_reg, temp2_reg, temp_reg);
758 __ cmp(pc_reg, temp_reg);
759 __ cmp_and_brx_short(pc_reg, temp_reg, Assembler::lessUnsigned, Assembler::pt, L_ok);
760 __ bind(L_fail);
761 }
762
763 void AdapterGenerator::gen_i2c_adapter(
764 int total_args_passed,
765 // VMReg max_arg,
766 int comp_args_on_stack, // VMRegStackSlots
767 const BasicType *sig_bt,
768 const VMRegPair *regs) {
769
770 // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
771 // layout. Lesp was saved by the calling I-frame and will be restored on
772 // return. Meanwhile, outgoing arg space is all owned by the callee
773 // C-frame, so we can mangle it at will. After adjusting the frame size,
774 // hoist register arguments and repack other args according to the compiled
775 // code convention. Finally, end in a jump to the compiled code. The entry
776 // point address is the start of the buffer.
777
778 // We will only enter here from an interpreted frame and never from after
779 // passing thru a c2i. Azul allowed this but we do not. If we lose the
780 // race and use a c2i we will remain interpreted for the race loser(s).
781 // This removes all sorts of headaches on the x86 side and also eliminates
782 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
783
784 // More detail:
785 // Adapters can be frameless because they do not require the caller
786 // to perform additional cleanup work, such as correcting the stack pointer.
787 // An i2c adapter is frameless because the *caller* frame, which is interpreted,
788 // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
789 // even if a callee has modified the stack pointer.
973 __ ldf(FloatRegisterImpl::S, Gargs, next_arg_slot(ld_off), r_1->as_FloatRegister());
974 __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister());
975 #endif
976 }
977 }
978 // Was the argument really intended to be on the stack, but was loaded
979 // into F8/F9?
980 if (regs[i].first()->is_stack()) {
981 assert(r_1->as_FloatRegister() == F8, "fix this code");
982 // Convert stack slot to an SP offset
983 int st_off = reg2offset(regs[i].first()) + STACK_BIAS;
984 // Store down the shuffled stack word. Target address _is_ aligned.
985 RegisterOrConstant slot = __ ensure_simm13_or_reg(st_off, Rdisp);
986 if (!r_2->is_valid()) __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), SP, slot);
987 else __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot);
988 }
989 }
990
991 // Jump to the compiled code just as if compiled code was doing it.
992 __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3);
993
994 // 6243940 We might end up in handle_wrong_method if
995 // the callee is deoptimized as we race thru here. If that
996 // happens we don't want to take a safepoint because the
997 // caller frame will look interpreted and arguments are now
998 // "compiled" so it is much better to make this transition
999 // invisible to the stack walking code. Unfortunately if
1000 // we try and find the callee by normal means a safepoint
1001 // is possible. So we stash the desired callee in the thread
1002 // and the vm will find there should this case occur.
1003 Address callee_target_addr(G2_thread, JavaThread::callee_target_offset());
1004 __ st_ptr(G5_method, callee_target_addr);
1005 __ jmpl(G3, 0, G0);
1006 __ delayed()->nop();
1007 }
1008
1009 // ---------------------------------------------------------------
1010 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
1011 int total_args_passed,
1012 // VMReg max_arg,
1013 int comp_args_on_stack, // VMRegStackSlots
1014 const BasicType *sig_bt,
1015 const VMRegPair *regs,
1016 AdapterFingerPrint* fingerprint) {
1017 address i2c_entry = __ pc();
1018
1019 AdapterGenerator agen(masm);
1020
1021 agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
1022
1023
1024 // -------------------------------------------------------------------------
1025 // Generate a C2I adapter. On entry we know G5 holds the Method*. The
1026 // args start out packed in the compiled layout. They need to be unpacked
1027 // into the interpreter layout. This will almost always require some stack
1028 // space. We grow the current (compiled) stack, then repack the args. We
1029 // finally end in a jump to the generic interpreter entry point. On exit
1030 // from the interpreter, the interpreter will restore our SP (lest the
1031 // compiled code, which relys solely on SP and not FP, get sick).
1032
1033 address c2i_unverified_entry = __ pc();
1034 Label L_skip_fixup;
1035 {
1036 Register R_temp = G1; // another scratch register
1037
1038 AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());
1039
1040 __ verify_oop(O0);
1041 __ load_klass(O0, G3_scratch);
1046 Label ok, ok2;
1047 __ brx(Assembler::equal, false, Assembler::pt, ok);
1048 __ delayed()->ld_ptr(G5_method, CompiledICHolder::holder_method_offset(), G5_method);
1049 __ jump_to(ic_miss, G3_scratch);
1050 __ delayed()->nop();
1051
1052 __ bind(ok);
1053 // Method might have been compiled since the call site was patched to
1054 // interpreted if that is the case treat it as a miss so we can get
1055 // the call site corrected.
1056 __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch);
1057 __ bind(ok2);
1058 __ br_null(G3_scratch, false, Assembler::pt, L_skip_fixup);
1059 __ delayed()->nop();
1060 __ jump_to(ic_miss, G3_scratch);
1061 __ delayed()->nop();
1062
1063 }
1064
1065 address c2i_entry = __ pc();
1066
1067 agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, L_skip_fixup);
1068
1069 __ flush();
1070 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
1071
1072 }
1073
1074 // Helper function for native calling conventions
1075 static VMReg int_stk_helper( int i ) {
1076 // Bias any stack based VMReg we get by ignoring the window area
1077 // but not the register parameter save area.
1078 //
1079 // This is strange for the following reasons. We'd normally expect
1080 // the calling convention to return an VMReg for a stack slot
1081 // completely ignoring any abi reserved area. C2 thinks of that
1082 // abi area as only out_preserve_stack_slots. This does not include
1083 // the area allocated by the C abi to store down integer arguments
1084 // because the java calling convention does not use it. So
1085 // since c2 assumes that there are only out_preserve_stack_slots
1086 // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack
2899
2900 __ cmp_zero_and_br(Assembler::notZero, O4array_size, loop);
2901 __ delayed()->add(O3array, wordSize, O3array);
2902 __ ld_ptr(G3pcs, 0, O7); // load final frame new pc
2903
2904 }
2905
2906 //------------------------------generate_deopt_blob----------------------------
2907 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
2908 // instead.
2909 void SharedRuntime::generate_deopt_blob() {
2910 // allocate space for the code
2911 ResourceMark rm;
2912 // setup code generation tools
2913 int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code
2914 #ifdef ASSERT
2915 if (UseStackBanging) {
2916 pad += StackShadowPages*16 + 32;
2917 }
2918 #endif
2919 #ifdef _LP64
2920 CodeBuffer buffer("deopt_blob", 2100+pad, 512);
2921 #else
2922 // Measured 8/7/03 at 1212 in 32bit debug build (no VerifyThread)
2923 // Measured 8/7/03 at 1396 in 32bit debug build (VerifyThread)
2924 CodeBuffer buffer("deopt_blob", 1600+pad, 512);
2925 #endif /* _LP64 */
2926 MacroAssembler* masm = new MacroAssembler(&buffer);
2927 FloatRegister Freturn0 = F0;
2928 Register Greturn1 = G1;
2929 Register Oreturn0 = O0;
2930 Register Oreturn1 = O1;
2931 Register O2UnrollBlock = O2;
2932 Register L0deopt_mode = L0;
2933 Register G4deopt_mode = G4_scratch;
2934 int frame_size_words;
2935 Address saved_Freturn0_addr(FP, -sizeof(double) + STACK_BIAS);
2936 #if !defined(_LP64) && defined(COMPILER2)
2937 Address saved_Greturn1_addr(FP, -sizeof(double) -sizeof(jlong) + STACK_BIAS);
2938 #endif
2965 //
2966 // Refer to the following methods for more information:
2967 // - Deoptimization::fetch_unroll_info
2968 // - Deoptimization::unpack_frames
2969
2970 OopMap* map = NULL;
2971
2972 int start = __ offset();
2973
2974 // restore G2, the trampoline destroyed it
2975 __ get_thread();
2976
2977 // On entry we have been called by the deoptimized nmethod with a call that
2978 // replaced the original call (or safepoint polling location) so the deoptimizing
2979 // pc is now in O7. Return values are still in the expected places
2980
2981 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
2982 __ ba(cont);
2983 __ delayed()->mov(Deoptimization::Unpack_deopt, L0deopt_mode);
2984
2985 int exception_offset = __ offset() - start;
2986
2987 // restore G2, the trampoline destroyed it
2988 __ get_thread();
2989
2990 // On entry we have been jumped to by the exception handler (or exception_blob
2991 // for server). O0 contains the exception oop and O7 contains the original
2992 // exception pc. So if we push a frame here it will look to the
2993 // stack walking code (fetch_unroll_info) just like a normal call so
2994 // state will be extracted normally.
2995
2996 // save exception oop in JavaThread and fall through into the
2997 // exception_in_tls case since they are handled in same way except
2998 // for where the pending exception is kept.
2999 __ st_ptr(Oexception, G2_thread, JavaThread::exception_oop_offset());
3000
3001 //
3002 // Vanilla deoptimization with an exception pending in exception_oop
3003 //
3004 int exception_in_tls_offset = __ offset() - start;
3005
3006 // No need to update oop_map as each call to save_live_registers will produce identical oopmap
3007 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
3008
3009 // Restore G2_thread
3010 __ get_thread();
3011
3012 #ifdef ASSERT
3013 {
3014 // verify that there is really an exception oop in exception_oop
3015 Label has_exception;
3016 __ ld_ptr(G2_thread, JavaThread::exception_oop_offset(), Oexception);
3017 __ br_notnull_short(Oexception, Assembler::pt, has_exception);
3018 __ stop("no exception in thread");
3019 __ bind(has_exception);
3020
3021 // verify that there is no pending exception
3022 Label no_pending_exception;
3023 Address exception_addr(G2_thread, Thread::pending_exception_offset());
3024 __ ld_ptr(exception_addr, Oexception);
3025 __ br_null_short(Oexception, Assembler::pt, no_pending_exception);
3026 __ stop("must not have pending exception here");
3027 __ bind(no_pending_exception);
3028 }
3029 #endif
3030
3031 __ ba(cont);
3032 __ delayed()->mov(Deoptimization::Unpack_exception, L0deopt_mode);;
3033
3034 //
3035 // Reexecute entry, similar to c2 uncommon trap
3036 //
3037 int reexecute_offset = __ offset() - start;
3038
3039 // No need to update oop_map as each call to save_live_registers will produce identical oopmap
3040 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
3041
3042 __ mov(Deoptimization::Unpack_reexecute, L0deopt_mode);
3043
3044 __ bind(cont);
3045
3046 __ set_last_Java_frame(SP, noreg);
3047
3048 // do the call by hand so we can get the oopmap
3049
3050 __ mov(G2_thread, L7_thread_cache);
3051 __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
3052 __ delayed()->mov(G2_thread, O0);
3053
3054 // Set an oopmap for the call site this describes all our saved volatile registers
3055
3056 oop_maps->add_gc_map( __ offset()-start, map);
3057
3058 __ mov(L7_thread_cache, G2_thread);
3059
3060 __ reset_last_Java_frame();
3061
3062 // NOTE: we know that only O0/O1 will be reloaded by restore_result_registers
3063 // so this move will survive
3064
3065 __ mov(L0deopt_mode, G4deopt_mode);
3066
3067 __ mov(O0, O2UnrollBlock->after_save());
3068
3069 RegisterSaver::restore_result_registers(masm);
3070
3071 Label noException;
3072 __ cmp_and_br_short(G4deopt_mode, Deoptimization::Unpack_exception, Assembler::notEqual, Assembler::pt, noException);
3073
3074 // Move the pending exception from exception_oop to Oexception so
3075 // the pending exception will be picked up the interpreter.
3076 __ ld_ptr(G2_thread, in_bytes(JavaThread::exception_oop_offset()), Oexception);
3077 __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset()));
3078 __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_pc_offset()));
3079 __ bind(noException);
3080
3081 // deallocate the deoptimization frame taking care to preserve the return values
3107 __ set_last_Java_frame(SP, G0);
3108 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O1);
3109 #endif
3110 __ reset_last_Java_frame();
3111 __ ldf(FloatRegisterImpl::D, saved_Freturn0_addr, Freturn0);
3112
3113 #if !defined(_LP64) && defined(COMPILER2)
3114 // In 32 bit, C2 returns longs in G1 so restore the saved G1 into
3115 // I0/I1 if the return value is long.
3116 Label not_long;
3117 __ cmp_and_br_short(O0,T_LONG, Assembler::notEqual, Assembler::pt, not_long);
3118 __ ldd(saved_Greturn1_addr,I0);
3119 __ bind(not_long);
3120 #endif
3121 __ ret();
3122 __ delayed()->restore();
3123
3124 masm->flush();
3125 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words);
3126 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3127 }
3128
3129 #ifdef COMPILER2
3130
3131 //------------------------------generate_uncommon_trap_blob--------------------
3132 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
3133 // instead.
3134 void SharedRuntime::generate_uncommon_trap_blob() {
3135 // allocate space for the code
3136 ResourceMark rm;
3137 // setup code generation tools
3138 int pad = VerifyThread ? 512 : 0;
3139 #ifdef ASSERT
3140 if (UseStackBanging) {
3141 pad += StackShadowPages*16 + 32;
3142 }
3143 #endif
3144 #ifdef _LP64
3145 CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
3146 #else
|
26 #include "asm/macroAssembler.inline.hpp"
27 #include "code/debugInfoRec.hpp"
28 #include "code/icBuffer.hpp"
29 #include "code/vtableStubs.hpp"
30 #include "interpreter/interpreter.hpp"
31 #include "oops/compiledICHolder.hpp"
32 #include "prims/jvmtiRedefineClassesTrace.hpp"
33 #include "runtime/sharedRuntime.hpp"
34 #include "runtime/vframeArray.hpp"
35 #include "vmreg_sparc.inline.hpp"
36 #ifdef COMPILER1
37 #include "c1/c1_Runtime1.hpp"
38 #endif
39 #ifdef COMPILER2
40 #include "opto/runtime.hpp"
41 #endif
42 #ifdef SHARK
43 #include "compiler/compileBroker.hpp"
44 #include "shark/sharkCompiler.hpp"
45 #endif
46 #if INCLUDE_JVMCI
47 #include "jvmci/jvmciJavaClasses.hpp"
48 #endif
49
50 #define __ masm->
51
52
53 class RegisterSaver {
54
55 // Used for saving volatile registers. This is Gregs, Fregs, I/L/O.
56 // The Oregs are problematic. In the 32bit build the compiler can
57 // have O registers live with 64 bit quantities. A window save will
58 // cut the heads off of the registers. We have to do a very extensive
59 // stack dance to save and restore these properly.
60
61 // Note that the Oregs problem only exists if we block at either a polling
62 // page exception a compiled code safepoint that was not originally a call
63 // or deoptimize following one of these kinds of safepoints.
64
65 // Lots of registers to save. For all builds, a window save will preserve
66 // the %i and %l registers. For the 32-bit longs-in-two entries and 64-bit
67 // builds a window-save will preserve the %o registers. In the LION build
68 // we need to save the 64-bit %o registers which requires we save them
746 // (really L0) is in use by the compiled frame as a generic temp. However,
747 // the interpreter does not know where its args are without some kind of
748 // arg pointer being passed in. Pass it in Gargs.
749 __ delayed()->add(SP, G1, Gargs);
750 }
751
752 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, Register temp2_reg,
753 address code_start, address code_end,
754 Label& L_ok) {
755 Label L_fail;
756 __ set(ExternalAddress(code_start), temp_reg);
757 __ set(pointer_delta(code_end, code_start, 1), temp2_reg);
758 __ cmp(pc_reg, temp_reg);
759 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pn, L_fail);
760 __ delayed()->add(temp_reg, temp2_reg, temp_reg);
761 __ cmp(pc_reg, temp_reg);
762 __ cmp_and_brx_short(pc_reg, temp_reg, Assembler::lessUnsigned, Assembler::pt, L_ok);
763 __ bind(L_fail);
764 }
765
766 void AdapterGenerator::gen_i2c_adapter(int total_args_passed,
767 // VMReg max_arg,
768 int comp_args_on_stack, // VMRegStackSlots
769 const BasicType *sig_bt,
770 const VMRegPair *regs) {
771 // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
772 // layout. Lesp was saved by the calling I-frame and will be restored on
773 // return. Meanwhile, outgoing arg space is all owned by the callee
774 // C-frame, so we can mangle it at will. After adjusting the frame size,
775 // hoist register arguments and repack other args according to the compiled
776 // code convention. Finally, end in a jump to the compiled code. The entry
777 // point address is the start of the buffer.
778
779 // We will only enter here from an interpreted frame and never from after
780 // passing thru a c2i. Azul allowed this but we do not. If we lose the
781 // race and use a c2i we will remain interpreted for the race loser(s).
782 // This removes all sorts of headaches on the x86 side and also eliminates
783 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
784
785 // More detail:
786 // Adapters can be frameless because they do not require the caller
787 // to perform additional cleanup work, such as correcting the stack pointer.
788 // An i2c adapter is frameless because the *caller* frame, which is interpreted,
789 // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
790 // even if a callee has modified the stack pointer.
974 __ ldf(FloatRegisterImpl::S, Gargs, next_arg_slot(ld_off), r_1->as_FloatRegister());
975 __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister());
976 #endif
977 }
978 }
979 // Was the argument really intended to be on the stack, but was loaded
980 // into F8/F9?
981 if (regs[i].first()->is_stack()) {
982 assert(r_1->as_FloatRegister() == F8, "fix this code");
983 // Convert stack slot to an SP offset
984 int st_off = reg2offset(regs[i].first()) + STACK_BIAS;
985 // Store down the shuffled stack word. Target address _is_ aligned.
986 RegisterOrConstant slot = __ ensure_simm13_or_reg(st_off, Rdisp);
987 if (!r_2->is_valid()) __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), SP, slot);
988 else __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot);
989 }
990 }
991
992 // Jump to the compiled code just as if compiled code was doing it.
993 __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3);
994 #if INCLUDE_JVMCI
995 if (EnableJVMCI) {
996 // check if this call should be routed towards a specific entry point
997 __ ld(Address(G2_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), G1);
998 __ cmp(G0, G1);
999 Label no_alternative_target;
1000 __ br(Assembler::equal, false, Assembler::pn, no_alternative_target);
1001 __ delayed()->nop();
1002
1003 __ ld_ptr(G2_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()), G3);
1004 __ st(G0, Address(G2_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
1005
1006 __ bind(no_alternative_target);
1007 }
1008 #endif // INCLUDE_JVMCI
1009
1010 // 6243940 We might end up in handle_wrong_method if
1011 // the callee is deoptimized as we race thru here. If that
1012 // happens we don't want to take a safepoint because the
1013 // caller frame will look interpreted and arguments are now
1014 // "compiled" so it is much better to make this transition
1015 // invisible to the stack walking code. Unfortunately if
1016 // we try and find the callee by normal means a safepoint
1017 // is possible. So we stash the desired callee in the thread
1018 // and the vm will find there should this case occur.
1019 Address callee_target_addr(G2_thread, JavaThread::callee_target_offset());
1020 __ st_ptr(G5_method, callee_target_addr);
1021 __ jmpl(G3, 0, G0);
1022 __ delayed()->nop();
1023 }
1024
1025 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
1026 int total_args_passed,
1027 int comp_args_on_stack,
1028 const BasicType *sig_bt,
1029 const VMRegPair *regs) {
1030 AdapterGenerator agen(masm);
1031 agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
1032 }
1033
1034 // ---------------------------------------------------------------
1035 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
1036 int total_args_passed,
1037 // VMReg max_arg,
1038 int comp_args_on_stack, // VMRegStackSlots
1039 const BasicType *sig_bt,
1040 const VMRegPair *regs,
1041 AdapterFingerPrint* fingerprint) {
1042 address i2c_entry = __ pc();
1043
1044 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
1045
1046
1047 // -------------------------------------------------------------------------
1048 // Generate a C2I adapter. On entry we know G5 holds the Method*. The
1049 // args start out packed in the compiled layout. They need to be unpacked
1050 // into the interpreter layout. This will almost always require some stack
1051 // space. We grow the current (compiled) stack, then repack the args. We
1052 // finally end in a jump to the generic interpreter entry point. On exit
1053 // from the interpreter, the interpreter will restore our SP (lest the
1054 // compiled code, which relys solely on SP and not FP, get sick).
1055
1056 address c2i_unverified_entry = __ pc();
1057 Label L_skip_fixup;
1058 {
1059 Register R_temp = G1; // another scratch register
1060
1061 AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());
1062
1063 __ verify_oop(O0);
1064 __ load_klass(O0, G3_scratch);
1069 Label ok, ok2;
1070 __ brx(Assembler::equal, false, Assembler::pt, ok);
1071 __ delayed()->ld_ptr(G5_method, CompiledICHolder::holder_method_offset(), G5_method);
1072 __ jump_to(ic_miss, G3_scratch);
1073 __ delayed()->nop();
1074
1075 __ bind(ok);
1076 // Method might have been compiled since the call site was patched to
1077 // interpreted if that is the case treat it as a miss so we can get
1078 // the call site corrected.
1079 __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch);
1080 __ bind(ok2);
1081 __ br_null(G3_scratch, false, Assembler::pt, L_skip_fixup);
1082 __ delayed()->nop();
1083 __ jump_to(ic_miss, G3_scratch);
1084 __ delayed()->nop();
1085
1086 }
1087
1088 address c2i_entry = __ pc();
1089 AdapterGenerator agen(masm);
1090 agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, L_skip_fixup);
1091
1092 __ flush();
1093 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
1094
1095 }
1096
1097 // Helper function for native calling conventions
1098 static VMReg int_stk_helper( int i ) {
1099 // Bias any stack based VMReg we get by ignoring the window area
1100 // but not the register parameter save area.
1101 //
1102 // This is strange for the following reasons. We'd normally expect
1103 // the calling convention to return an VMReg for a stack slot
1104 // completely ignoring any abi reserved area. C2 thinks of that
1105 // abi area as only out_preserve_stack_slots. This does not include
1106 // the area allocated by the C abi to store down integer arguments
1107 // because the java calling convention does not use it. So
1108 // since c2 assumes that there are only out_preserve_stack_slots
1109 // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack
2922
2923 __ cmp_zero_and_br(Assembler::notZero, O4array_size, loop);
2924 __ delayed()->add(O3array, wordSize, O3array);
2925 __ ld_ptr(G3pcs, 0, O7); // load final frame new pc
2926
2927 }
2928
2929 //------------------------------generate_deopt_blob----------------------------
2930 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
2931 // instead.
2932 void SharedRuntime::generate_deopt_blob() {
2933 // allocate space for the code
2934 ResourceMark rm;
2935 // setup code generation tools
2936 int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code
2937 #ifdef ASSERT
2938 if (UseStackBanging) {
2939 pad += StackShadowPages*16 + 32;
2940 }
2941 #endif
2942 #if INCLUDE_JVMCI
2943 if (EnableJVMCI) {
2944 pad += 1000; // Increase the buffer size when compiling for JVMCI
2945 }
2946 #endif
2947 #ifdef _LP64
2948 CodeBuffer buffer("deopt_blob", 2100+pad, 512);
2949 #else
2950 // Measured 8/7/03 at 1212 in 32bit debug build (no VerifyThread)
2951 // Measured 8/7/03 at 1396 in 32bit debug build (VerifyThread)
2952 CodeBuffer buffer("deopt_blob", 1600+pad, 512);
2953 #endif /* _LP64 */
2954 MacroAssembler* masm = new MacroAssembler(&buffer);
2955 FloatRegister Freturn0 = F0;
2956 Register Greturn1 = G1;
2957 Register Oreturn0 = O0;
2958 Register Oreturn1 = O1;
2959 Register O2UnrollBlock = O2;
2960 Register L0deopt_mode = L0;
2961 Register G4deopt_mode = G4_scratch;
2962 int frame_size_words;
2963 Address saved_Freturn0_addr(FP, -sizeof(double) + STACK_BIAS);
2964 #if !defined(_LP64) && defined(COMPILER2)
2965 Address saved_Greturn1_addr(FP, -sizeof(double) -sizeof(jlong) + STACK_BIAS);
2966 #endif
2993 //
2994 // Refer to the following methods for more information:
2995 // - Deoptimization::fetch_unroll_info
2996 // - Deoptimization::unpack_frames
2997
2998 OopMap* map = NULL;
2999
3000 int start = __ offset();
3001
3002 // restore G2, the trampoline destroyed it
3003 __ get_thread();
3004
3005 // On entry we have been called by the deoptimized nmethod with a call that
3006 // replaced the original call (or safepoint polling location) so the deoptimizing
3007 // pc is now in O7. Return values are still in the expected places
3008
3009 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
3010 __ ba(cont);
3011 __ delayed()->mov(Deoptimization::Unpack_deopt, L0deopt_mode);
3012
3013
3014 #if INCLUDE_JVMCI
3015 Label after_fetch_unroll_info_call;
3016 int implicit_exception_uncommon_trap_offset = 0;
3017 int uncommon_trap_offset = 0;
3018
3019 if (EnableJVMCI) {
3020 masm->block_comment("BEGIN implicit_exception_uncommon_trap");
3021 implicit_exception_uncommon_trap_offset = __ offset() - start;
3022
3023 __ ld_ptr(G2_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()), O7);
3024 __ st_ptr(G0, Address(G2_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
3025 __ add(O7, -8, O7);
3026
3027 uncommon_trap_offset = __ offset() - start;
3028
3029 // Save everything in sight.
3030 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
3031 __ set_last_Java_frame(SP, NULL);
3032
3033 __ ld(G2_thread, in_bytes(JavaThread::pending_deoptimization_offset()), O1);
3034 __ sub(G0, 1, L1);
3035 __ st(L1, G2_thread, in_bytes(JavaThread::pending_deoptimization_offset()));
3036
3037 __ mov((int32_t)Deoptimization::Unpack_reexecute, L0deopt_mode);
3038 __ mov(G2_thread, O0);
3039 __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
3040 __ delayed()->nop();
3041 oop_maps->add_gc_map( __ offset()-start, map->deep_copy());
3042 __ get_thread();
3043 __ add(O7, 8, O7);
3044 __ reset_last_Java_frame();
3045
3046 __ ba(after_fetch_unroll_info_call);
3047 __ delayed()->nop(); // Delay slot
3048 masm->block_comment("END implicit_exception_uncommon_trap");
3049 } // EnableJVMCI
3050 #endif // INCLUDE_JVMCI
3051
3052 int exception_offset = __ offset() - start;
3053
3054 // restore G2, the trampoline destroyed it
3055 __ get_thread();
3056
3057 // On entry we have been jumped to by the exception handler (or exception_blob
3058 // for server). O0 contains the exception oop and O7 contains the original
3059 // exception pc. So if we push a frame here it will look to the
3060 // stack walking code (fetch_unroll_info) just like a normal call so
3061 // state will be extracted normally.
3062
3063 // save exception oop in JavaThread and fall through into the
3064 // exception_in_tls case since they are handled in same way except
3065 // for where the pending exception is kept.
3066 __ st_ptr(Oexception, G2_thread, JavaThread::exception_oop_offset());
3067
3068 //
3069 // Vanilla deoptimization with an exception pending in exception_oop
3070 //
3071 int exception_in_tls_offset = __ offset() - start;
3072
3073 // No need to update oop_map as each call to save_live_registers will produce identical oopmap
3074 // Opens a new stack frame
3075 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
3076
3077 // Restore G2_thread
3078 __ get_thread();
3079
3080 #ifdef ASSERT
3081 {
3082 // verify that there is really an exception oop in exception_oop
3083 Label has_exception;
3084 __ ld_ptr(G2_thread, JavaThread::exception_oop_offset(), Oexception);
3085 __ br_notnull_short(Oexception, Assembler::pt, has_exception);
3086 __ stop("no exception in thread");
3087 __ bind(has_exception);
3088
3089 // verify that there is no pending exception
3090 Label no_pending_exception;
3091 Address exception_addr(G2_thread, Thread::pending_exception_offset());
3092 __ ld_ptr(exception_addr, Oexception);
3093 __ br_null_short(Oexception, Assembler::pt, no_pending_exception);
3094 __ stop("must not have pending exception here");
3095 __ bind(no_pending_exception);
3096 }
3097 #endif
3098
3099 __ ba(cont);
3100 __ delayed()->mov(Deoptimization::Unpack_exception, L0deopt_mode);;
3101
3102 //
3103 // Reexecute entry, similar to c2 uncommon trap
3104 //
3105 int reexecute_offset = __ offset() - start;
3106 #if INCLUDE_JVMCI && !defined(COMPILER1)
3107 if (EnableJVMCI && UseJVMCICompiler) {
3108 // JVMCI does not use this kind of deoptimization
3109 __ should_not_reach_here();
3110 }
3111 #endif
3112 // No need to update oop_map as each call to save_live_registers will produce identical oopmap
3113 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
3114
3115 __ mov(Deoptimization::Unpack_reexecute, L0deopt_mode);
3116
3117 __ bind(cont);
3118
3119 __ set_last_Java_frame(SP, noreg);
3120
3121 // do the call by hand so we can get the oopmap
3122
3123 __ mov(G2_thread, L7_thread_cache);
3124 __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
3125 __ delayed()->mov(G2_thread, O0);
3126
3127 // Set an oopmap for the call site this describes all our saved volatile registers
3128
3129 oop_maps->add_gc_map( __ offset()-start, map);
3130
3131 __ mov(L7_thread_cache, G2_thread);
3132
3133 __ reset_last_Java_frame();
3134
3135 #if INCLUDE_JVMCI
3136 if (EnableJVMCI) {
3137 __ bind(after_fetch_unroll_info_call);
3138 }
3139 #endif
3140 // NOTE: we know that only O0/O1 will be reloaded by restore_result_registers
3141 // so this move will survive
3142
3143 __ mov(L0deopt_mode, G4deopt_mode);
3144
3145 __ mov(O0, O2UnrollBlock->after_save());
3146
3147 RegisterSaver::restore_result_registers(masm);
3148
3149 Label noException;
3150 __ cmp_and_br_short(G4deopt_mode, Deoptimization::Unpack_exception, Assembler::notEqual, Assembler::pt, noException);
3151
3152 // Move the pending exception from exception_oop to Oexception so
3153 // the pending exception will be picked up the interpreter.
3154 __ ld_ptr(G2_thread, in_bytes(JavaThread::exception_oop_offset()), Oexception);
3155 __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset()));
3156 __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_pc_offset()));
3157 __ bind(noException);
3158
3159 // deallocate the deoptimization frame taking care to preserve the return values
3185 __ set_last_Java_frame(SP, G0);
3186 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O1);
3187 #endif
3188 __ reset_last_Java_frame();
3189 __ ldf(FloatRegisterImpl::D, saved_Freturn0_addr, Freturn0);
3190
3191 #if !defined(_LP64) && defined(COMPILER2)
3192 // In 32 bit, C2 returns longs in G1 so restore the saved G1 into
3193 // I0/I1 if the return value is long.
3194 Label not_long;
3195 __ cmp_and_br_short(O0,T_LONG, Assembler::notEqual, Assembler::pt, not_long);
3196 __ ldd(saved_Greturn1_addr,I0);
3197 __ bind(not_long);
3198 #endif
3199 __ ret();
3200 __ delayed()->restore();
3201
3202 masm->flush();
3203 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words);
3204 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3205 #if INCLUDE_JVMCI
3206 if (EnableJVMCI) {
3207 _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
3208 _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
3209 }
3210 #endif
3211 }
3212
3213 #ifdef COMPILER2
3214
3215 //------------------------------generate_uncommon_trap_blob--------------------
3216 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
3217 // instead.
3218 void SharedRuntime::generate_uncommon_trap_blob() {
3219 // allocate space for the code
3220 ResourceMark rm;
3221 // setup code generation tools
3222 int pad = VerifyThread ? 512 : 0;
3223 #ifdef ASSERT
3224 if (UseStackBanging) {
3225 pad += StackShadowPages*16 + 32;
3226 }
3227 #endif
3228 #ifdef _LP64
3229 CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
3230 #else
|