--- old/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp 2014-03-06 11:59:49.097382770 +0100 +++ new/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp 2014-03-06 11:59:48.748632600 +0100 @@ -55,9 +55,9 @@ } -void C1_MacroAssembler::build_frame(int frame_size_in_bytes) { - - generate_stack_overflow_check(frame_size_in_bytes); +void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) { + assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); + generate_stack_overflow_check(bang_size_in_bytes); // Create the frame. save_frame_c1(frame_size_in_bytes); } --- old/src/cpu/sparc/vm/sparc.ad 2014-03-06 11:59:49.106468949 +0100 +++ new/src/cpu/sparc/vm/sparc.ad 2014-03-06 11:59:48.724417467 +0100 @@ -1146,15 +1146,16 @@ st->print_cr("Verify_Thread"); st->print("\t"); } - size_t framesize = C->frame_slots() << LogBytesPerInt; + size_t framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); // Calls to C2R adapters often do not accept exceptional returns. // We require that their callers must bang for them. But be careful, because // some VM calls (such as call site linkage) can use several kilobytes of // stack. But the stack safety zone should account for that. // See bugs 4446381, 4468289, 4497237. - if (C->need_stack_bang(framesize)) { - st->print_cr("! stack bang"); st->print("\t"); + if (C->need_stack_bang(bangsize)) { + st->print_cr("! stack bang (%d bytes)", bangsize); st->print("\t"); } if (Assembler::is_simm13(-framesize)) { @@ -1178,17 +1179,18 @@ __ verify_thread(); - size_t framesize = C->frame_slots() << LogBytesPerInt; + size_t framesize = C->frame_size_in_bytes(); assert(framesize >= 16*wordSize, "must have room for reg. save area"); assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment"); + int bangsize = C->bang_size_in_bytes(); // Calls to C2R adapters often do not accept exceptional returns. // We require that their callers must bang for them. But be careful, because // some VM calls (such as call site linkage) can use several kilobytes of // stack. But the stack safety zone should account for that. // See bugs 4446381, 4468289, 4497237. - if (C->need_stack_bang(framesize)) { - __ generate_stack_overflow_check(framesize); + if (C->need_stack_bang(bangsize)) { + __ generate_stack_overflow_check(bangsize); } if (Assembler::is_simm13(-framesize)) { @@ -2504,7 +2506,7 @@ enc_class call_epilog %{ if( VerifyStackAtCalls ) { MacroAssembler _masm(&cbuf); - int framesize = ra_->C->frame_slots() << LogBytesPerInt; + int framesize = ra_->C->frame_size_in_bytes(); Register temp_reg = G3; __ add(SP, framesize, temp_reg); __ cmp(temp_reg, FP); --- old/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp 2014-03-06 11:59:49.141810534 +0100 +++ new/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp 2014-03-06 11:59:48.818381607 +0100 @@ -152,7 +152,7 @@ } -int LIR_Assembler::initial_frame_size_in_bytes() { +int LIR_Assembler::initial_frame_size_in_bytes() const { return in_bytes(frame_map()->framesize_in_bytes()); } @@ -182,7 +182,7 @@ int number_of_locks = entry_state->locks_size(); // Create a frame for the compiled activation. - __ build_frame(initial_frame_size_in_bytes()); + __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); // OSR buffer is // --- old/src/cpu/ppc/vm/cppInterpreter_ppc.cpp 2014-03-06 11:59:49.218729857 +0100 +++ new/src/cpu/ppc/vm/cppInterpreter_ppc.cpp 2014-03-06 11:59:48.883692367 +0100 @@ -26,6 +26,7 @@ #include "precompiled.hpp" #include "asm/assembler.hpp" #include "asm/macroAssembler.inline.hpp" +#include "ci/ciMethod.hpp" #include "interpreter/bytecodeHistogram.hpp" #include "interpreter/cppInterpreter.hpp" #include "interpreter/interpreter.hpp" @@ -2940,34 +2941,31 @@ istate->_last_Java_fp = last_Java_fp; } -int AbstractInterpreter::layout_activation(Method* method, - int temps, // Number of slots on java expression stack in use. - int popframe_args, - int monitors, // Number of active monitors. - int caller_actual_parameters, - int callee_params,// Number of slots for callee parameters. - int callee_locals,// Number of slots for locals. - frame* caller, - frame* interpreter_frame, - bool is_top_frame, - bool is_bottom_frame) { - - // NOTE this code must exactly mimic what - // InterpreterGenerator::generate_compute_interpreter_state() does - // as far as allocating an interpreter frame. However there is an - // exception. With the C++ based interpreter only the top most frame - // has a full sized expression stack. The 16 byte slop factor is - // both the abi scratch area and a place to hold a result from a - // callee on its way to the callers stack. - - int monitor_size = frame::interpreter_frame_monitor_size_in_bytes() * monitors; - int frame_size; - int top_frame_size = round_to(frame::interpreter_frame_cinterpreterstate_size_in_bytes() +template static void frame_size_helper(M* method, + int monitors, + int& monitor_size, + int& top_frame_size) { + monitor_size = frame::interpreter_frame_monitor_size_in_bytes() * monitors; + top_frame_size = round_to(frame::interpreter_frame_cinterpreterstate_size_in_bytes() + monitor_size + (method->max_stack() *Interpreter::stackElementWords * BytesPerWord) + 2*BytesPerWord, frame::alignment_in_bytes) + frame::top_ijava_frame_abi_size; +} + +template int AbstractInterpreter::size_activation(M* method, + int temps, + int popframe_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { + int monitor_size = 0; + int top_frame_size = 0; + frame_size_helper(method, monitors, monitor_size, top_frame_size); + + int frame_size; if (is_top_frame) { frame_size = top_frame_size; } else { @@ -2981,65 +2979,103 @@ assert(popframe_args==0, "non-zero for top_frame only"); } - // If we actually have a frame to layout we must now fill in all the pieces. - if (interpreter_frame != NULL) { + return frame_size/BytesPerWord; +} - intptr_t sp = (intptr_t)interpreter_frame->sp(); - intptr_t fp = *(intptr_t *)sp; - assert(fp == (intptr_t)caller->sp(), "fp must match"); - interpreterState cur_state = - (interpreterState)(fp - frame::interpreter_frame_cinterpreterstate_size_in_bytes()); - - // Now fill in the interpreterState object. - - intptr_t* locals; - if (caller->is_interpreted_frame()) { - // Locals must agree with the caller because it will be used to set the - // caller's tos when we return. - interpreterState prev = caller->get_interpreterState(); - // Calculate start of "locals" for MH calls. For MH calls, the - // current method() (= MH target) and prev->callee() (= - // MH.invoke*()) are different and especially have different - // signatures. To pop the argumentsof the caller, we must use - // the prev->callee()->size_of_arguments() because that's what - // the caller actually pushed. Currently, for synthetic MH - // calls (deoptimized from inlined MH calls), detected by - // is_method_handle_invoke(), we use the callee's arguments - // because here, the caller's and callee's signature match. - if (true /*!caller->is_at_mh_callsite()*/) { - locals = prev->stack() + method->size_of_parameters(); - } else { - // Normal MH call. - locals = prev->stack() + prev->callee()->size_of_parameters(); - } - } else { - bool is_deopted; - locals = (intptr_t*) (fp + ((method->max_locals() - 1) * BytesPerWord) + - frame::parent_ijava_frame_abi_size); - } +template int AbstractInterpreter::size_activation(Method* method, + int temps, + int popframe_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame); + +template int AbstractInterpreter::size_activation(ciMethod* method, + int temps, + int popframe_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame); + +void AbstractInterpreter::layout_activation(Method* method, + int temps, // Number of slots on java expression stack in use. + int popframe_args, + int monitors, // Number of active monitors. + int caller_actual_parameters, + int callee_params,// Number of slots for callee parameters. + int callee_locals,// Number of slots for locals. + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { - intptr_t* monitor_base = (intptr_t*) cur_state; - intptr_t* stack_base = (intptr_t*) ((intptr_t) monitor_base - monitor_size); + // NOTE this code must exactly mimic what + // InterpreterGenerator::generate_compute_interpreter_state() does + // as far as allocating an interpreter frame. However there is an + // exception. With the C++ based interpreter only the top most frame + // has a full sized expression stack. The 16 byte slop factor is + // both the abi scratch area and a place to hold a result from a + // callee on its way to the callers stack. - // Provide pop_frame capability on PPC64, add popframe_args. - // +1 because stack is always prepushed. - intptr_t* stack = (intptr_t*) ((intptr_t) stack_base - (temps + popframe_args + 1) * BytesPerWord); - - BytecodeInterpreter::layout_interpreterState(cur_state, - caller, - interpreter_frame, - method, - locals, - stack, - stack_base, - monitor_base, - (intptr_t*)(((intptr_t)fp)-top_frame_size), - is_top_frame); + int monitor_size = 0; + int top_frame_size = 0; + frame_size_helper(method, monitors, monitor_size, top_frame_size); + + intptr_t sp = (intptr_t)interpreter_frame->sp(); + intptr_t fp = *(intptr_t *)sp; + assert(fp == (intptr_t)caller->sp(), "fp must match"); + interpreterState cur_state = + (interpreterState)(fp - frame::interpreter_frame_cinterpreterstate_size_in_bytes()); - BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, - interpreter_frame->fp()); - } - return frame_size/BytesPerWord; + // Now fill in the interpreterState object. + + intptr_t* locals; + if (caller->is_interpreted_frame()) { + // Locals must agree with the caller because it will be used to set the + // caller's tos when we return. + interpreterState prev = caller->get_interpreterState(); + // Calculate start of "locals" for MH calls. For MH calls, the + // current method() (= MH target) and prev->callee() (= + // MH.invoke*()) are different and especially have different + // signatures. To pop the argumentsof the caller, we must use + // the prev->callee()->size_of_arguments() because that's what + // the caller actually pushed. Currently, for synthetic MH + // calls (deoptimized from inlined MH calls), detected by + // is_method_handle_invoke(), we use the callee's arguments + // because here, the caller's and callee's signature match. + if (true /*!caller->is_at_mh_callsite()*/) { + locals = prev->stack() + method->size_of_parameters(); + } else { + // Normal MH call. + locals = prev->stack() + prev->callee()->size_of_parameters(); + } + } else { + bool is_deopted; + locals = (intptr_t*) (fp + ((method->max_locals() - 1) * BytesPerWord) + + frame::parent_ijava_frame_abi_size); + } + + intptr_t* monitor_base = (intptr_t*) cur_state; + intptr_t* stack_base = (intptr_t*) ((intptr_t) monitor_base - monitor_size); + + // Provide pop_frame capability on PPC64, add popframe_args. + // +1 because stack is always prepushed. + intptr_t* stack = (intptr_t*) ((intptr_t) stack_base - (temps + popframe_args + 1) * BytesPerWord); + + BytecodeInterpreter::layout_interpreterState(cur_state, + caller, + interpreter_frame, + method, + locals, + stack, + stack_base, + monitor_base, + (intptr_t*)(((intptr_t)fp)-top_frame_size), + is_top_frame); + + BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, + interpreter_frame->fp()); } #endif // CC_INTERP --- old/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp 2014-03-06 11:59:49.143679488 +0100 +++ new/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp 2014-03-06 11:59:48.774507567 +0100 @@ -288,7 +288,7 @@ // build frame ciMethod* m = compilation()->method(); - __ build_frame(initial_frame_size_in_bytes()); + __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); // OSR buffer is // @@ -376,7 +376,7 @@ } // This specifies the rsp decrement needed to build the frame -int LIR_Assembler::initial_frame_size_in_bytes() { +int LIR_Assembler::initial_frame_size_in_bytes() const { // if rounding, must let FrameMap know! // The frame_map records size in slots (32bit word) --- old/src/cpu/x86/vm/macroAssembler_x86.cpp 2014-03-06 11:59:49.222439246 +0100 +++ new/src/cpu/x86/vm/macroAssembler_x86.cpp 2014-03-06 11:59:48.851512261 +0100 @@ -5691,7 +5691,7 @@ // C2 compiled method's prolog code. -void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { +void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b) { // WARNING: Initial instruction MUST be 5 bytes or longer so that // NativeJump::patch_verified_entry will be able to patch out the entry @@ -5699,18 +5699,20 @@ // the frame allocation can be either 3 or 6 bytes. So if we don't do // stack bang then we must use the 6 byte frame allocation even if // we have no frame. :-( - + assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect"); + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove word for return addr framesize -= wordSize; + stack_bang_size -= wordSize; // Calls to C2R adapters often do not accept exceptional returns. // We require that their callers must bang for them. But be careful, because // some VM calls (such as call site linkage) can use several kilobytes of // stack. But the stack safety zone should account for that. // See bugs 4446381, 4468289, 4497237. - if (stack_bang) { - generate_stack_overflow_check(framesize); + if (stack_bang_size > 0) { + generate_stack_overflow_check(stack_bang_size); // We always push rbp, so that on return to interpreter rbp, will be // restored correctly and we can correct the stack. --- old/src/cpu/sparc/vm/templateInterpreter_sparc.cpp 2014-03-06 11:59:49.257728767 +0100 +++ new/src/cpu/sparc/vm/templateInterpreter_sparc.cpp 2014-03-06 11:59:48.903664471 +0100 @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.hpp" +#include "ci/ciMethod.hpp" #include "interpreter/bytecodeHistogram.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterGenerator.hpp" @@ -1526,7 +1527,7 @@ // +---------------+ <--- Gargs // | | -static int size_activation_helper(int callee_extra_locals, int max_stack, int monitor_size) { +template static int size_activation_helper(int callee_extra_locals, int max_stack, int monitor_size) { // Figure out the size of an interpreter frame (in words) given that we have a fully allocated // expression stack, the callee will have callee_extra_locals (so we can account for @@ -1563,38 +1564,24 @@ // the method is synchronized int monitor_size = method->is_synchronized() ? 1*frame::interpreter_frame_monitor_size() : 0; - return size_activation_helper(method->max_locals(), method->max_stack(), - monitor_size) + call_stub_size; + return size_activation_helper(method->max_locals(), method->max_stack(), + monitor_size) + call_stub_size; } -int AbstractInterpreter::layout_activation(Method* method, - int tempcount, - int popframe_extra_args, - int moncount, - int caller_actual_parameters, - int callee_param_count, - int callee_local_count, - frame* caller, - frame* interpreter_frame, - bool is_top_frame, - bool is_bottom_frame) { +template int AbstractInterpreter::size_activation(M* method, + int temps, + int popframe_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { // Note: This calculation must exactly parallel the frame setup // in InterpreterGenerator::generate_fixed_frame. - // If f!=NULL, set up the following variables: - // - Lmethod - // - Llocals - // - Lmonitors (to the indicated number of monitors) - // - Lesp (to the indicated number of temps) - // The frame f (if not NULL) on entry is a description of the caller of the frame - // we are about to layout. We are guaranteed that we will be able to fill in a - // new interpreter frame as its callee (i.e. the stack space is allocated and - // the amount was determined by an earlier call to this method with f == NULL). - // On return f (if not NULL) while describe the interpreter frame we just layed out. - int monitor_size = moncount * frame::interpreter_frame_monitor_size(); - int rounded_vm_local_words = round_to(frame::interpreter_frame_vm_local_words,WordsPerLong); + int monitor_size = monitors * frame::interpreter_frame_monitor_size(); assert(monitor_size == round_to(monitor_size, WordsPerLong), "must align"); + // // Note: if you look closely this appears to be doing something much different // than generate_fixed_frame. What is happening is this. On sparc we have to do @@ -1619,146 +1606,188 @@ // there is no sense in messing working code. // - int rounded_cls = round_to((callee_local_count - callee_param_count), WordsPerLong); + int rounded_cls = round_to((callee_locals - callee_params), WordsPerLong); assert(rounded_cls == round_to(rounded_cls, WordsPerLong), "must align"); - int raw_frame_size = size_activation_helper(rounded_cls, method->max_stack(), - monitor_size); + int raw_frame_size = size_activation_helper(rounded_cls, method->max_stack(), + monitor_size); - if (interpreter_frame != NULL) { - // The skeleton frame must already look like an interpreter frame - // even if not fully filled out. - assert(interpreter_frame->is_interpreted_frame(), "Must be interpreted frame"); - - intptr_t* fp = interpreter_frame->fp(); - - JavaThread* thread = JavaThread::current(); - RegisterMap map(thread, false); - // More verification that skeleton frame is properly walkable - assert(fp == caller->sp(), "fp must match"); - - intptr_t* montop = fp - rounded_vm_local_words; - - // preallocate monitors (cf. __ add_monitor_to_stack) - intptr_t* monitors = montop - monitor_size; - - // preallocate stack space - intptr_t* esp = monitors - 1 - - (tempcount * Interpreter::stackElementWords) - - popframe_extra_args; - - int local_words = method->max_locals() * Interpreter::stackElementWords; - NEEDS_CLEANUP; - intptr_t* locals; - if (caller->is_interpreted_frame()) { - // Can force the locals area to end up properly overlapping the top of the expression stack. - intptr_t* Lesp_ptr = caller->interpreter_frame_tos_address() - 1; - // Note that this computation means we replace size_of_parameters() values from the caller - // interpreter frame's expression stack with our argument locals - int parm_words = caller_actual_parameters * Interpreter::stackElementWords; - locals = Lesp_ptr + parm_words; - int delta = local_words - parm_words; - int computed_sp_adjustment = (delta > 0) ? round_to(delta, WordsPerLong) : 0; - *interpreter_frame->register_addr(I5_savedSP) = (intptr_t) (fp + computed_sp_adjustment) - STACK_BIAS; - if (!is_bottom_frame) { - // Llast_SP is set below for the current frame to SP (with the - // extra space for the callee's locals). Here we adjust - // Llast_SP for the caller's frame, removing the extra space - // for the current method's locals. - *caller->register_addr(Llast_SP) = *interpreter_frame->register_addr(I5_savedSP); - } else { - assert(*caller->register_addr(Llast_SP) >= *interpreter_frame->register_addr(I5_savedSP), "strange Llast_SP"); - } + return raw_frame_size; +} + +template int AbstractInterpreter::size_activation(Method* method, + int temps, + int popframe_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame); + +template int AbstractInterpreter::size_activation(ciMethod* method, + int temps, + int popframe_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame); + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_local_count, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + // Set up the following variables: + // - Lmethod + // - Llocals + // - Lmonitors (to the indicated number of monitors) + // - Lesp (to the indicated number of temps) + // The frame caller on entry is a description of the caller of the + // frame we are about to layout. We are guaranteed that we will be + // able to fill in a new interpreter frame as its callee (i.e. the + // stack space is allocated and the amount was determined by an + // earlier call to the size_activation() method). On return caller + // while describe the interpreter frame we just layed out. + + // The skeleton frame must already look like an interpreter frame + // even if not fully filled out. + assert(interpreter_frame->is_interpreted_frame(), "Must be interpreted frame"); + + int rounded_vm_local_words = round_to(frame::interpreter_frame_vm_local_words,WordsPerLong); + int monitor_size = moncount * frame::interpreter_frame_monitor_size(); + assert(monitor_size == round_to(monitor_size, WordsPerLong), "must align"); + + intptr_t* fp = interpreter_frame->fp(); + + JavaThread* thread = JavaThread::current(); + RegisterMap map(thread, false); + // More verification that skeleton frame is properly walkable + assert(fp == caller->sp(), "fp must match"); + + intptr_t* montop = fp - rounded_vm_local_words; + + // preallocate monitors (cf. __ add_monitor_to_stack) + intptr_t* monitors = montop - monitor_size; + + // preallocate stack space + intptr_t* esp = monitors - 1 - + (tempcount * Interpreter::stackElementWords) - + popframe_extra_args; + + int local_words = method->max_locals() * Interpreter::stackElementWords; + NEEDS_CLEANUP; + intptr_t* locals; + if (caller->is_interpreted_frame()) { + // Can force the locals area to end up properly overlapping the top of the expression stack. + intptr_t* Lesp_ptr = caller->interpreter_frame_tos_address() - 1; + // Note that this computation means we replace size_of_parameters() values from the caller + // interpreter frame's expression stack with our argument locals + int parm_words = caller_actual_parameters * Interpreter::stackElementWords; + locals = Lesp_ptr + parm_words; + int delta = local_words - parm_words; + int computed_sp_adjustment = (delta > 0) ? round_to(delta, WordsPerLong) : 0; + *interpreter_frame->register_addr(I5_savedSP) = (intptr_t) (fp + computed_sp_adjustment) - STACK_BIAS; + if (!is_bottom_frame) { + // Llast_SP is set below for the current frame to SP (with the + // extra space for the callee's locals). Here we adjust + // Llast_SP for the caller's frame, removing the extra space + // for the current method's locals. + *caller->register_addr(Llast_SP) = *interpreter_frame->register_addr(I5_savedSP); } else { - assert(caller->is_compiled_frame() || caller->is_entry_frame(), "only possible cases"); - // Don't have Lesp available; lay out locals block in the caller - // adjacent to the register window save area. - // - // Compiled frames do not allocate a varargs area which is why this if - // statement is needed. - // - if (caller->is_compiled_frame()) { - locals = fp + frame::register_save_words + local_words - 1; - } else { - locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1; - } - if (!caller->is_entry_frame()) { - // Caller wants his own SP back - int caller_frame_size = caller->cb()->frame_size(); - *interpreter_frame->register_addr(I5_savedSP) = (intptr_t)(caller->fp() - caller_frame_size) - STACK_BIAS; - } + assert(*caller->register_addr(Llast_SP) >= *interpreter_frame->register_addr(I5_savedSP), "strange Llast_SP"); } - if (TraceDeoptimization) { - if (caller->is_entry_frame()) { - // make sure I5_savedSP and the entry frames notion of saved SP - // agree. This assertion duplicate a check in entry frame code - // but catches the failure earlier. - assert(*caller->register_addr(Lscratch) == *interpreter_frame->register_addr(I5_savedSP), - "would change callers SP"); - } - if (caller->is_entry_frame()) { - tty->print("entry "); - } - if (caller->is_compiled_frame()) { - tty->print("compiled "); - if (caller->is_deoptimized_frame()) { - tty->print("(deopt) "); - } - } - if (caller->is_interpreted_frame()) { - tty->print("interpreted "); + } else { + assert(caller->is_compiled_frame() || caller->is_entry_frame(), "only possible cases"); + // Don't have Lesp available; lay out locals block in the caller + // adjacent to the register window save area. + // + // Compiled frames do not allocate a varargs area which is why this if + // statement is needed. + // + if (caller->is_compiled_frame()) { + locals = fp + frame::register_save_words + local_words - 1; + } else { + locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1; + } + if (!caller->is_entry_frame()) { + // Caller wants his own SP back + int caller_frame_size = caller->cb()->frame_size(); + *interpreter_frame->register_addr(I5_savedSP) = (intptr_t)(caller->fp() - caller_frame_size) - STACK_BIAS; + } + } + if (TraceDeoptimization) { + if (caller->is_entry_frame()) { + // make sure I5_savedSP and the entry frames notion of saved SP + // agree. This assertion duplicate a check in entry frame code + // but catches the failure earlier. + assert(*caller->register_addr(Lscratch) == *interpreter_frame->register_addr(I5_savedSP), + "would change callers SP"); + } + if (caller->is_entry_frame()) { + tty->print("entry "); + } + if (caller->is_compiled_frame()) { + tty->print("compiled "); + if (caller->is_deoptimized_frame()) { + tty->print("(deopt) "); } - tty->print_cr("caller fp=0x%x sp=0x%x", caller->fp(), caller->sp()); - tty->print_cr("save area = 0x%x, 0x%x", caller->sp(), caller->sp() + 16); - tty->print_cr("save area = 0x%x, 0x%x", caller->fp(), caller->fp() + 16); - tty->print_cr("interpreter fp=0x%x sp=0x%x", interpreter_frame->fp(), interpreter_frame->sp()); - tty->print_cr("save area = 0x%x, 0x%x", interpreter_frame->sp(), interpreter_frame->sp() + 16); - tty->print_cr("save area = 0x%x, 0x%x", interpreter_frame->fp(), interpreter_frame->fp() + 16); - tty->print_cr("Llocals = 0x%x", locals); - tty->print_cr("Lesp = 0x%x", esp); - tty->print_cr("Lmonitors = 0x%x", monitors); } - - if (method->max_locals() > 0) { - assert(locals < caller->sp() || locals >= (caller->sp() + 16), "locals in save area"); - assert(locals < caller->fp() || locals > (caller->fp() + 16), "locals in save area"); - assert(locals < interpreter_frame->sp() || locals > (interpreter_frame->sp() + 16), "locals in save area"); - assert(locals < interpreter_frame->fp() || locals >= (interpreter_frame->fp() + 16), "locals in save area"); + if (caller->is_interpreted_frame()) { + tty->print("interpreted "); } + tty->print_cr("caller fp=0x%x sp=0x%x", caller->fp(), caller->sp()); + tty->print_cr("save area = 0x%x, 0x%x", caller->sp(), caller->sp() + 16); + tty->print_cr("save area = 0x%x, 0x%x", caller->fp(), caller->fp() + 16); + tty->print_cr("interpreter fp=0x%x sp=0x%x", interpreter_frame->fp(), interpreter_frame->sp()); + tty->print_cr("save area = 0x%x, 0x%x", interpreter_frame->sp(), interpreter_frame->sp() + 16); + tty->print_cr("save area = 0x%x, 0x%x", interpreter_frame->fp(), interpreter_frame->fp() + 16); + tty->print_cr("Llocals = 0x%x", locals); + tty->print_cr("Lesp = 0x%x", esp); + tty->print_cr("Lmonitors = 0x%x", monitors); + } + + if (method->max_locals() > 0) { + assert(locals < caller->sp() || locals >= (caller->sp() + 16), "locals in save area"); + assert(locals < caller->fp() || locals > (caller->fp() + 16), "locals in save area"); + assert(locals < interpreter_frame->sp() || locals > (interpreter_frame->sp() + 16), "locals in save area"); + assert(locals < interpreter_frame->fp() || locals >= (interpreter_frame->fp() + 16), "locals in save area"); + } #ifdef _LP64 - assert(*interpreter_frame->register_addr(I5_savedSP) & 1, "must be odd"); + assert(*interpreter_frame->register_addr(I5_savedSP) & 1, "must be odd"); #endif - *interpreter_frame->register_addr(Lmethod) = (intptr_t) method; - *interpreter_frame->register_addr(Llocals) = (intptr_t) locals; - *interpreter_frame->register_addr(Lmonitors) = (intptr_t) monitors; - *interpreter_frame->register_addr(Lesp) = (intptr_t) esp; - // Llast_SP will be same as SP as there is no adapter space - *interpreter_frame->register_addr(Llast_SP) = (intptr_t) interpreter_frame->sp() - STACK_BIAS; - *interpreter_frame->register_addr(LcpoolCache) = (intptr_t) method->constants()->cache(); + *interpreter_frame->register_addr(Lmethod) = (intptr_t) method; + *interpreter_frame->register_addr(Llocals) = (intptr_t) locals; + *interpreter_frame->register_addr(Lmonitors) = (intptr_t) monitors; + *interpreter_frame->register_addr(Lesp) = (intptr_t) esp; + // Llast_SP will be same as SP as there is no adapter space + *interpreter_frame->register_addr(Llast_SP) = (intptr_t) interpreter_frame->sp() - STACK_BIAS; + *interpreter_frame->register_addr(LcpoolCache) = (intptr_t) method->constants()->cache(); #ifdef FAST_DISPATCH - *interpreter_frame->register_addr(IdispatchTables) = (intptr_t) Interpreter::dispatch_table(); + *interpreter_frame->register_addr(IdispatchTables) = (intptr_t) Interpreter::dispatch_table(); #endif #ifdef ASSERT - BasicObjectLock* mp = (BasicObjectLock*)monitors; + BasicObjectLock* mp = (BasicObjectLock*)monitors; - assert(interpreter_frame->interpreter_frame_method() == method, "method matches"); - assert(interpreter_frame->interpreter_frame_local_at(9) == (intptr_t *)((intptr_t)locals - (9 * Interpreter::stackElementSize)), "locals match"); - assert(interpreter_frame->interpreter_frame_monitor_end() == mp, "monitor_end matches"); - assert(((intptr_t *)interpreter_frame->interpreter_frame_monitor_begin()) == ((intptr_t *)mp)+monitor_size, "monitor_begin matches"); - assert(interpreter_frame->interpreter_frame_tos_address()-1 == esp, "esp matches"); - - // check bounds - intptr_t* lo = interpreter_frame->sp() + (frame::memory_parameter_word_sp_offset - 1); - intptr_t* hi = interpreter_frame->fp() - rounded_vm_local_words; - assert(lo < monitors && montop <= hi, "monitors in bounds"); - assert(lo <= esp && esp < monitors, "esp in bounds"); + assert(interpreter_frame->interpreter_frame_method() == method, "method matches"); + assert(interpreter_frame->interpreter_frame_local_at(9) == (intptr_t *)((intptr_t)locals - (9 * Interpreter::stackElementSize)), "locals match"); + assert(interpreter_frame->interpreter_frame_monitor_end() == mp, "monitor_end matches"); + assert(((intptr_t *)interpreter_frame->interpreter_frame_monitor_begin()) == ((intptr_t *)mp)+monitor_size, "monitor_begin matches"); + assert(interpreter_frame->interpreter_frame_tos_address()-1 == esp, "esp matches"); + + // check bounds + intptr_t* lo = interpreter_frame->sp() + (frame::memory_parameter_word_sp_offset - 1); + intptr_t* hi = interpreter_frame->fp() - rounded_vm_local_words; + assert(lo < monitors && montop <= hi, "monitors in bounds"); + assert(lo <= esp && esp < monitors, "esp in bounds"); #endif // ASSERT - } - - return raw_frame_size; } //---------------------------------------------------------------------------------------------------- --- old/src/cpu/sparc/vm/cppInterpreter_sparc.cpp 2014-03-06 11:59:49.294804710 +0100 +++ new/src/cpu/sparc/vm/cppInterpreter_sparc.cpp 2014-03-06 11:59:48.954484838 +0100 @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "asm/assembler.hpp" +#include "ci/ciMethod.hpp" #include "interpreter/bytecodeHistogram.hpp" #include "interpreter/cppInterpreter.hpp" #include "interpreter/interpreter.hpp" @@ -2060,7 +2061,7 @@ } -static int size_activation_helper(int callee_extra_locals, int max_stack, int monitor_size) { +template static int size_activation_helper(int callee_extra_locals, int max_stack, int monitor_size) { // Figure out the size of an interpreter frame (in words) given that we have a fully allocated // expression stack, the callee will have callee_extra_locals (so we can account for @@ -2098,8 +2099,8 @@ // the method is synchronized int monitor_size = method->is_synchronized() ? 1*frame::interpreter_frame_monitor_size() : 0; - return size_activation_helper(method->max_locals(), method->max_stack(), - monitor_size) + call_stub_size; + return size_activation_helper(method->max_locals(), method->max_stack(), + monitor_size) + call_stub_size; } void BytecodeInterpreter::layout_interpreterState(interpreterState to_fill, @@ -2183,31 +2184,31 @@ istate->_last_Java_pc = (intptr_t*) last_Java_pc; } +template static int frame_size_helper(M* method, + int moncount, + int callee_param_size, + int callee_locals_size, + bool is_top_frame, + int& monitor_size, + int& full_frame_words) { + int extra_locals_size = callee_locals_size - callee_param_size; + monitor_size = (sizeof(BasicObjectLock) * moncount) / wordSize; + full_frame_words = size_activation_helper(extra_locals_size, method->max_stack(), monitor_size); + int short_frame_words = size_activation_helper(extra_locals_size, method->max_stack(), monitor_size); + int frame_words = is_top_frame ? full_frame_words : short_frame_words; -int AbstractInterpreter::layout_activation(Method* method, - int tempcount, // Number of slots on java expression stack in use - int popframe_extra_args, - int moncount, // Number of active monitors - int caller_actual_parameters, - int callee_param_size, - int callee_locals_size, - frame* caller, - frame* interpreter_frame, - bool is_top_frame, - bool is_bottom_frame) { + return frame_words; +} +template int AbstractInterpreter::size_activation(M* method, + int tempcount, + int popframe_extra_args, + int moncount, + int callee_param_size, + int callee_locals_size, + bool is_top_frame) { assert(popframe_extra_args == 0, "NEED TO FIX"); - // NOTE this code must exactly mimic what InterpreterGenerator::generate_compute_interpreter_state() - // does as far as allocating an interpreter frame. - // If interpreter_frame!=NULL, set up the method, locals, and monitors. - // The frame interpreter_frame, if not NULL, is guaranteed to be the right size, - // as determined by a previous call to this method. - // It is also guaranteed to be walkable even though it is in a skeletal state // NOTE: return size is in words not bytes - // NOTE: tempcount is the current size of the java expression stack. For top most - // frames we will allocate a full sized expression stack and not the curback - // version that non-top frames have. - // Calculate the amount our frame will be adjust by the callee. For top frame // this is zero. @@ -2216,87 +2217,125 @@ // to it. So it ignores last_frame_adjust value. Seems suspicious as far // as getting sender_sp correct. - int extra_locals_size = callee_locals_size - callee_param_size; - int monitor_size = (sizeof(BasicObjectLock) * moncount) / wordSize; - int full_frame_words = size_activation_helper(extra_locals_size, method->max_stack(), monitor_size); - int short_frame_words = size_activation_helper(extra_locals_size, method->max_stack(), monitor_size); - int frame_words = is_top_frame ? full_frame_words : short_frame_words; + int unused_monitor_size = 0; + int unused_full_frame_words = 0; + return frame_size_helper(method, moncount, callee_param_size, callee_locals_size, is_top_frame, + unused_monitor_size, unused_full_frame_words); +} + +template int AbstractInterpreter::size_activation(Method* method, + int temps, + int popframe_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame); + +template int AbstractInterpreter::size_activation(ciMethod* method, + int temps, + int popframe_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame); + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, // Number of slots on java expression stack in use + int popframe_extra_args, + int moncount, // Number of active monitors + int caller_actual_parameters, + int callee_param_size, + int callee_locals_size, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + assert(popframe_extra_args == 0, "NEED TO FIX"); + // NOTE this code must exactly mimic what InterpreterGenerator::generate_compute_interpreter_state() + // does as far as allocating an interpreter frame. + // Set up the method, locals, and monitors. + // The frame interpreter_frame is guaranteed to be the right size, + // as determined by a previous call to the size_activation() method. + // It is also guaranteed to be walkable even though it is in a skeletal state + // NOTE: tempcount is the current size of the java expression stack. For top most + // frames we will allocate a full sized expression stack and not the curback + // version that non-top frames have. + int monitor_size = 0; + int full_frame_words = 0; + int frame_words = frame_size_helper(method, moncount, callee_param_size, callee_locals_size, + is_top_frame, monitor_size, full_frame_words); /* - if we actually have a frame to layout we must now fill in all the pieces. This means both + We must now fill in all the pieces of the frame. This means both the interpreterState and the registers. */ - if (interpreter_frame != NULL) { - // MUCHO HACK + // MUCHO HACK - intptr_t* frame_bottom = interpreter_frame->sp() - (full_frame_words - frame_words); - // 'interpreter_frame->sp()' is unbiased while 'frame_bottom' must be a biased value in 64bit mode. - assert(((intptr_t)frame_bottom & 0xf) == 0, "SP biased in layout_activation"); - frame_bottom = (intptr_t*)((intptr_t)frame_bottom - STACK_BIAS); + intptr_t* frame_bottom = interpreter_frame->sp() - (full_frame_words - frame_words); + // 'interpreter_frame->sp()' is unbiased while 'frame_bottom' must be a biased value in 64bit mode. + assert(((intptr_t)frame_bottom & 0xf) == 0, "SP biased in layout_activation"); + frame_bottom = (intptr_t*)((intptr_t)frame_bottom - STACK_BIAS); - /* Now fillin the interpreterState object */ + /* Now fillin the interpreterState object */ - interpreterState cur_state = (interpreterState) ((intptr_t)interpreter_frame->fp() - sizeof(BytecodeInterpreter)); + interpreterState cur_state = (interpreterState) ((intptr_t)interpreter_frame->fp() - sizeof(BytecodeInterpreter)); - intptr_t* locals; + intptr_t* locals; - // Calculate the postion of locals[0]. This is painful because of - // stack alignment (same as ia64). The problem is that we can - // not compute the location of locals from fp(). fp() will account - // for the extra locals but it also accounts for aligning the stack - // and we can't determine if the locals[0] was misaligned but max_locals - // was enough to have the - // calculate postion of locals. fp already accounts for extra locals. - // +2 for the static long no_params() issue. + // Calculate the postion of locals[0]. This is painful because of + // stack alignment (same as ia64). The problem is that we can + // not compute the location of locals from fp(). fp() will account + // for the extra locals but it also accounts for aligning the stack + // and we can't determine if the locals[0] was misaligned but max_locals + // was enough to have the + // calculate postion of locals. fp already accounts for extra locals. + // +2 for the static long no_params() issue. - if (caller->is_interpreted_frame()) { - // locals must agree with the caller because it will be used to set the - // caller's tos when we return. - interpreterState prev = caller->get_interpreterState(); - // stack() is prepushed. - locals = prev->stack() + method->size_of_parameters(); - } else { - // Lay out locals block in the caller adjacent to the register window save area. - // - // Compiled frames do not allocate a varargs area which is why this if - // statement is needed. - // - intptr_t* fp = interpreter_frame->fp(); - int local_words = method->max_locals() * Interpreter::stackElementWords; - - if (caller->is_compiled_frame()) { - locals = fp + frame::register_save_words + local_words - 1; - } else { - locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1; - } + if (caller->is_interpreted_frame()) { + // locals must agree with the caller because it will be used to set the + // caller's tos when we return. + interpreterState prev = caller->get_interpreterState(); + // stack() is prepushed. + locals = prev->stack() + method->size_of_parameters(); + } else { + // Lay out locals block in the caller adjacent to the register window save area. + // + // Compiled frames do not allocate a varargs area which is why this if + // statement is needed. + // + intptr_t* fp = interpreter_frame->fp(); + int local_words = method->max_locals() * Interpreter::stackElementWords; + if (caller->is_compiled_frame()) { + locals = fp + frame::register_save_words + local_words - 1; + } else { + locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1; } - // END MUCHO HACK - intptr_t* monitor_base = (intptr_t*) cur_state; - intptr_t* stack_base = monitor_base - monitor_size; - /* +1 because stack is always prepushed */ - intptr_t* stack = stack_base - (tempcount + 1); - - - BytecodeInterpreter::layout_interpreterState(cur_state, - caller, - interpreter_frame, - method, - locals, - stack, - stack_base, - monitor_base, - frame_bottom, - is_top_frame); + } + // END MUCHO HACK - BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, interpreter_frame->fp()); + intptr_t* monitor_base = (intptr_t*) cur_state; + intptr_t* stack_base = monitor_base - monitor_size; + /* +1 because stack is always prepushed */ + intptr_t* stack = stack_base - (tempcount + 1); + + + BytecodeInterpreter::layout_interpreterState(cur_state, + caller, + interpreter_frame, + method, + locals, + stack, + stack_base, + monitor_base, + frame_bottom, + is_top_frame); - } - return frame_words; + BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, interpreter_frame->fp()); } #endif // CC_INTERP --- old/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp 2014-03-06 11:59:49.277463620 +0100 +++ new/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp 2014-03-06 11:59:48.927902334 +0100 @@ -349,13 +349,14 @@ } -void C1_MacroAssembler::build_frame(int frame_size_in_bytes) { +void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) { + assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); // Make sure there is enough stack space for this method's activation. // Note that we do this before doing an enter(). This matches the // ordering of C2's stack overflow check / rsp decrement and allows // the SharedRuntime stack overflow handling to be consistent // between the two compilers. - generate_stack_overflow_check(frame_size_in_bytes); + generate_stack_overflow_check(bang_size_in_bytes); push(rbp); #ifdef TIERED --- old/src/cpu/x86/vm/cppInterpreter_x86.cpp 2014-03-06 11:59:49.315813729 +0100 +++ new/src/cpu/x86/vm/cppInterpreter_x86.cpp 2014-03-06 11:59:48.983256211 +0100 @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.hpp" +#include "ci/ciMethod.hpp" #include "interpreter/bytecodeHistogram.hpp" #include "interpreter/cppInterpreter.hpp" #include "interpreter/interpreter.hpp" @@ -2342,119 +2343,163 @@ "Stack top out of range"); } -int AbstractInterpreter::layout_activation(Method* method, - int tempcount, // - int popframe_extra_args, - int moncount, - int caller_actual_parameters, - int callee_param_count, - int callee_locals, - frame* caller, - frame* interpreter_frame, - bool is_top_frame, - bool is_bottom_frame) { - - assert(popframe_extra_args == 0, "FIX ME"); - // NOTE this code must exactly mimic what InterpreterGenerator::generate_compute_interpreter_state() - // does as far as allocating an interpreter frame. - // If interpreter_frame!=NULL, set up the method, locals, and monitors. - // The frame interpreter_frame, if not NULL, is guaranteed to be the right size, - // as determined by a previous call to this method. - // It is also guaranteed to be walkable even though it is in a skeletal state - // NOTE: return size is in words not bytes - // NOTE: tempcount is the current size of the java expression stack. For top most - // frames we will allocate a full sized expression stack and not the curback - // version that non-top frames have. - - // Calculate the amount our frame will be adjust by the callee. For top frame - // this is zero. - - // NOTE: ia64 seems to do this wrong (or at least backwards) in that it - // calculates the extra locals based on itself. Not what the callee does - // to it. So it ignores last_frame_adjust value. Seems suspicious as far - // as getting sender_sp correct. +template static int frame_size_helper(M* method, + int tempcount, + int moncount, + int callee_param_count, + int callee_locals, + bool is_top_frame, + int& monitor_size, + int& full_frame_size) { int extra_locals_size = (callee_locals - callee_param_count) * BytesPerWord; - int monitor_size = sizeof(BasicObjectLock) * moncount; + monitor_size = sizeof(BasicObjectLock) * moncount; // First calculate the frame size without any java expression stack int short_frame_size = size_activation_helper(extra_locals_size, monitor_size); // Now with full size expression stack - int full_frame_size = short_frame_size + method->max_stack() * BytesPerWord; + full_frame_size = short_frame_size + method->max_stack() * BytesPerWord; // and now with only live portion of the expression stack short_frame_size = short_frame_size + tempcount * BytesPerWord; // the size the activation is right now. Only top frame is full size int frame_size = (is_top_frame ? full_frame_size : short_frame_size); + return frame_size; +} + +template int AbstractInterpreter::size_activation(M* method, + int tempcount, + int popframe_extra_args, + int moncount, + int callee_param_count, + int callee_locals, + bool is_top_frame) { + assert(popframe_extra_args == 0, "FIX ME"); + // NOTE: return size is in words not bytes + + // Calculate the amount our frame will be adjust by the callee. For top frame + // this is zero. + + // NOTE: ia64 seems to do this wrong (or at least backwards) in that it + // calculates the extra locals based on itself. Not what the callee does + // to it. So it ignores last_frame_adjust value. Seems suspicious as far + // as getting sender_sp correct. + + int unused_monitor_size = 0; + int unused_full_frame_size = 0; + return frame_size_helper(method, tempcount, moncount, callee_param_count, callee_locals, + is_top_frame, unused_monitor_size, unused_full_frame_size)/BytesPerWord; +} + +template int AbstractInterpreter::size_activation(Method* method, + int temps, + int popframe_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame); + +template int AbstractInterpreter::size_activation(ciMethod* method, + int temps, + int popframe_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame); + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, // + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + + assert(popframe_extra_args == 0, "FIX ME"); + // NOTE this code must exactly mimic what InterpreterGenerator::generate_compute_interpreter_state() + // does as far as allocating an interpreter frame. + // Set up the method, locals, and monitors. + // The frame interpreter_frame is guaranteed to be the right size, + // as determined by a previous call to the size_activation() method. + // It is also guaranteed to be walkable even though it is in a skeletal state + // NOTE: tempcount is the current size of the java expression stack. For top most + // frames we will allocate a full sized expression stack and not the curback + // version that non-top frames have. + + int monitor_size = 0; + int full_frame_size = 0; + int frame_size = frame_size_helper(method, tempcount, moncount, callee_param_count, callee_locals, + is_top_frame, monitor_size, full_frame_size); - if (interpreter_frame != NULL) { #ifdef ASSERT - assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable"); + assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable"); #endif - // MUCHO HACK + // MUCHO HACK - intptr_t* frame_bottom = (intptr_t*) ((intptr_t)interpreter_frame->sp() - (full_frame_size - frame_size)); + intptr_t* frame_bottom = (intptr_t*) ((intptr_t)interpreter_frame->sp() - (full_frame_size - frame_size)); - /* Now fillin the interpreterState object */ + /* Now fillin the interpreterState object */ - // The state object is the first thing on the frame and easily located + // The state object is the first thing on the frame and easily located - interpreterState cur_state = (interpreterState) ((intptr_t)interpreter_frame->fp() - sizeof(BytecodeInterpreter)); + interpreterState cur_state = (interpreterState) ((intptr_t)interpreter_frame->fp() - sizeof(BytecodeInterpreter)); - // Find the locals pointer. This is rather simple on x86 because there is no - // confusing rounding at the callee to account for. We can trivially locate - // our locals based on the current fp(). - // Note: the + 2 is for handling the "static long no_params() method" issue. - // (too bad I don't really remember that issue well...) + // Find the locals pointer. This is rather simple on x86 because there is no + // confusing rounding at the callee to account for. We can trivially locate + // our locals based on the current fp(). + // Note: the + 2 is for handling the "static long no_params() method" issue. + // (too bad I don't really remember that issue well...) - intptr_t* locals; - // If the caller is interpreted we need to make sure that locals points to the first - // argument that the caller passed and not in an area where the stack might have been extended. - // because the stack to stack to converter needs a proper locals value in order to remove the - // arguments from the caller and place the result in the proper location. Hmm maybe it'd be - // simpler if we simply stored the result in the BytecodeInterpreter object and let the c++ code - // adjust the stack?? HMMM QQQ - // - if (caller->is_interpreted_frame()) { - // locals must agree with the caller because it will be used to set the - // caller's tos when we return. - interpreterState prev = caller->get_interpreterState(); - // stack() is prepushed. - locals = prev->stack() + method->size_of_parameters(); - // locals = caller->unextended_sp() + (method->size_of_parameters() - 1); - if (locals != interpreter_frame->fp() + frame::sender_sp_offset + (method->max_locals() - 1) + 2) { - // os::breakpoint(); - } - } else { - // this is where a c2i would have placed locals (except for the +2) - locals = interpreter_frame->fp() + frame::sender_sp_offset + (method->max_locals() - 1) + 2; + intptr_t* locals; + // If the caller is interpreted we need to make sure that locals points to the first + // argument that the caller passed and not in an area where the stack might have been extended. + // because the stack to stack to converter needs a proper locals value in order to remove the + // arguments from the caller and place the result in the proper location. Hmm maybe it'd be + // simpler if we simply stored the result in the BytecodeInterpreter object and let the c++ code + // adjust the stack?? HMMM QQQ + // + if (caller->is_interpreted_frame()) { + // locals must agree with the caller because it will be used to set the + // caller's tos when we return. + interpreterState prev = caller->get_interpreterState(); + // stack() is prepushed. + locals = prev->stack() + method->size_of_parameters(); + // locals = caller->unextended_sp() + (method->size_of_parameters() - 1); + if (locals != interpreter_frame->fp() + frame::sender_sp_offset + (method->max_locals() - 1) + 2) { + // os::breakpoint(); } + } else { + // this is where a c2i would have placed locals (except for the +2) + locals = interpreter_frame->fp() + frame::sender_sp_offset + (method->max_locals() - 1) + 2; + } - intptr_t* monitor_base = (intptr_t*) cur_state; - intptr_t* stack_base = (intptr_t*) ((intptr_t) monitor_base - monitor_size); - /* +1 because stack is always prepushed */ - intptr_t* stack = (intptr_t*) ((intptr_t) stack_base - (tempcount + 1) * BytesPerWord); - - - BytecodeInterpreter::layout_interpreterState(cur_state, - caller, - interpreter_frame, - method, - locals, - stack, - stack_base, - monitor_base, - frame_bottom, - is_top_frame); + intptr_t* monitor_base = (intptr_t*) cur_state; + intptr_t* stack_base = (intptr_t*) ((intptr_t) monitor_base - monitor_size); + /* +1 because stack is always prepushed */ + intptr_t* stack = (intptr_t*) ((intptr_t) stack_base - (tempcount + 1) * BytesPerWord); + + + BytecodeInterpreter::layout_interpreterState(cur_state, + caller, + interpreter_frame, + method, + locals, + stack, + stack_base, + monitor_base, + frame_bottom, + is_top_frame); - // BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, interpreter_frame->fp()); - } - return frame_size/BytesPerWord; + // BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, interpreter_frame->fp()); } bool AbstractInterpreter::can_be_compiled(methodHandle m) { --- old/src/cpu/x86/vm/macroAssembler_x86.hpp 2014-03-06 11:59:49.370254789 +0100 +++ new/src/cpu/x86/vm/macroAssembler_x86.hpp 2014-03-06 11:59:49.038904196 +0100 @@ -1127,7 +1127,7 @@ void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); } // C2 compiled method's prolog code. - void verified_entry(int framesize, bool stack_bang, bool fp_mode_24b); + void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b); // clear memory of size 'cnt' qwords, starting at 'base'. void clear_mem(Register base, Register cnt, Register rtmp); --- old/src/cpu/sparc/vm/sharedRuntime_sparc.cpp 2014-03-06 11:59:49.318651673 +0100 +++ new/src/cpu/sparc/vm/sharedRuntime_sparc.cpp 2014-03-06 11:59:49.014817486 +0100 @@ -3355,13 +3355,16 @@ Register O4array_size = O4; Label loop; - // Before we make new frames, check to see if stack is available. - // Do this after the caller's return address is on top of stack +#ifdef ASSERT + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. if (UseStackBanging) { // Get total frame size for interpreted frames __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes(), O4); __ bang_stack_size(O4, O3, G3_scratch); } +#endif __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes(), O4array_size); __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes(), G3pcs); @@ -3409,9 +3412,11 @@ ResourceMark rm; // setup code generation tools int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code +#ifdef ASSERT if (UseStackBanging) { pad += StackShadowPages*16 + 32; } +#endif #ifdef _LP64 CodeBuffer buffer("deopt_blob", 2100+pad, 512); #else @@ -3632,9 +3637,11 @@ ResourceMark rm; // setup code generation tools int pad = VerifyThread ? 512 : 0; +#ifdef ASSERT if (UseStackBanging) { pad += StackShadowPages*16 + 32; } +#endif #ifdef _LP64 CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512); #else --- old/src/cpu/x86/vm/sharedRuntime_x86_32.cpp 2014-03-06 11:59:52.150410257 +0100 +++ new/src/cpu/x86/vm/sharedRuntime_x86_32.cpp 2014-03-06 11:59:51.960669134 +0100 @@ -3007,11 +3007,15 @@ // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); - // Stack bang to make sure there's enough room for these interpreter frames. +#ifdef ASSERT + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. if (UseStackBanging) { __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); __ bang_stack_size(rbx, rcx); } +#endif // Load array of frame pcs into ECX __ movptr(rcx,Address(rdi,Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); @@ -3227,12 +3231,15 @@ // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); - // Stack bang to make sure there's enough room for these interpreter frames. +#ifdef ASSERT + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. if (UseStackBanging) { __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); __ bang_stack_size(rbx, rcx); } - +#endif // Load array of frame pcs into ECX __ movl(rcx,Address(rdi,Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); --- old/src/cpu/x86/vm/sharedRuntime_x86_64.cpp 2014-03-06 11:59:53.357451446 +0100 +++ new/src/cpu/x86/vm/sharedRuntime_x86_64.cpp 2014-03-06 11:59:53.032102958 +0100 @@ -3477,11 +3477,15 @@ // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); - // Stack bang to make sure there's enough room for these interpreter frames. +#ifdef ASSERT + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. if (UseStackBanging) { __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); __ bang_stack_size(rbx, rcx); } +#endif // Load address of array of frame pcs into rcx __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); @@ -3670,11 +3674,15 @@ // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); - // Stack bang to make sure there's enough room for these interpreter frames. +#ifdef ASSERT + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. if (UseStackBanging) { __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); __ bang_stack_size(rbx, rcx); } +#endif // Load address of array of frame pcs into rcx (address*) __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); --- old/src/cpu/x86/vm/templateInterpreter_x86_32.cpp 2014-03-06 11:59:53.378325430 +0100 +++ new/src/cpu/x86/vm/templateInterpreter_x86_32.cpp 2014-03-06 11:59:53.120635289 +0100 @@ -1686,91 +1686,6 @@ return overhead_size + method_stack + stub_code; } -// asm based interpreter deoptimization helpers - -int AbstractInterpreter::layout_activation(Method* method, - int tempcount, - int popframe_extra_args, - int moncount, - int caller_actual_parameters, - int callee_param_count, - int callee_locals, - frame* caller, - frame* interpreter_frame, - bool is_top_frame, - bool is_bottom_frame) { - // Note: This calculation must exactly parallel the frame setup - // in AbstractInterpreterGenerator::generate_method_entry. - // If interpreter_frame!=NULL, set up the method, locals, and monitors. - // The frame interpreter_frame, if not NULL, is guaranteed to be the right size, - // as determined by a previous call to this method. - // It is also guaranteed to be walkable even though it is in a skeletal state - // NOTE: return size is in words not bytes - - // fixed size of an interpreter frame: - int max_locals = method->max_locals() * Interpreter::stackElementWords; - int extra_locals = (method->max_locals() - method->size_of_parameters()) * - Interpreter::stackElementWords; - - int overhead = frame::sender_sp_offset - frame::interpreter_frame_initial_sp_offset; - - // Our locals were accounted for by the caller (or last_frame_adjust on the transistion) - // Since the callee parameters already account for the callee's params we only need to account for - // the extra locals. - - - int size = overhead + - ((callee_locals - callee_param_count)*Interpreter::stackElementWords) + - (moncount*frame::interpreter_frame_monitor_size()) + - tempcount*Interpreter::stackElementWords + popframe_extra_args; - - if (interpreter_frame != NULL) { -#ifdef ASSERT - if (!EnableInvokeDynamic) - // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences? - // Probably, since deoptimization doesn't work yet. - assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable"); - assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); -#endif - - interpreter_frame->interpreter_frame_set_method(method); - // NOTE the difference in using sender_sp and interpreter_frame_sender_sp - // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) - // and sender_sp is fp+8 - intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; - -#ifdef ASSERT - if (caller->is_interpreted_frame()) { - assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); - } -#endif - - interpreter_frame->interpreter_frame_set_locals(locals); - BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); - BasicObjectLock* monbot = montop - moncount; - interpreter_frame->interpreter_frame_set_monitor_end(monbot); - - // Set last_sp - intptr_t* rsp = (intptr_t*) monbot - - tempcount*Interpreter::stackElementWords - - popframe_extra_args; - interpreter_frame->interpreter_frame_set_last_sp(rsp); - - // All frames but the initial (oldest) interpreter frame we fill in have a - // value for sender_sp that allows walking the stack but isn't - // truly correct. Correct the value here. - - if (extra_locals != 0 && - interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) { - interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); - } - *interpreter_frame->interpreter_frame_cache_addr() = - method->constants()->cache(); - } - return size; -} - - //------------------------------------------------------------------------------------------------------------------------ // Exceptions --- old/src/cpu/x86/vm/templateInterpreter_x86_64.cpp 2014-03-06 11:59:53.396217851 +0100 +++ new/src/cpu/x86/vm/templateInterpreter_x86_64.cpp 2014-03-06 11:59:53.060558920 +0100 @@ -1695,87 +1695,6 @@ return (overhead_size + method_stack + stub_code); } -int AbstractInterpreter::layout_activation(Method* method, - int tempcount, - int popframe_extra_args, - int moncount, - int caller_actual_parameters, - int callee_param_count, - int callee_locals, - frame* caller, - frame* interpreter_frame, - bool is_top_frame, - bool is_bottom_frame) { - // Note: This calculation must exactly parallel the frame setup - // in AbstractInterpreterGenerator::generate_method_entry. - // If interpreter_frame!=NULL, set up the method, locals, and monitors. - // The frame interpreter_frame, if not NULL, is guaranteed to be the - // right size, as determined by a previous call to this method. - // It is also guaranteed to be walkable even though it is in a skeletal state - - // fixed size of an interpreter frame: - int max_locals = method->max_locals() * Interpreter::stackElementWords; - int extra_locals = (method->max_locals() - method->size_of_parameters()) * - Interpreter::stackElementWords; - - int overhead = frame::sender_sp_offset - - frame::interpreter_frame_initial_sp_offset; - // Our locals were accounted for by the caller (or last_frame_adjust - // on the transistion) Since the callee parameters already account - // for the callee's params we only need to account for the extra - // locals. - int size = overhead + - (callee_locals - callee_param_count)*Interpreter::stackElementWords + - moncount * frame::interpreter_frame_monitor_size() + - tempcount* Interpreter::stackElementWords + popframe_extra_args; - if (interpreter_frame != NULL) { -#ifdef ASSERT - if (!EnableInvokeDynamic) - // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences? - // Probably, since deoptimization doesn't work yet. - assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable"); - assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); -#endif - - interpreter_frame->interpreter_frame_set_method(method); - // NOTE the difference in using sender_sp and - // interpreter_frame_sender_sp interpreter_frame_sender_sp is - // the original sp of the caller (the unextended_sp) and - // sender_sp is fp+16 XXX - intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; - -#ifdef ASSERT - if (caller->is_interpreted_frame()) { - assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); - } -#endif - - interpreter_frame->interpreter_frame_set_locals(locals); - BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); - BasicObjectLock* monbot = montop - moncount; - interpreter_frame->interpreter_frame_set_monitor_end(monbot); - - // Set last_sp - intptr_t* esp = (intptr_t*) monbot - - tempcount*Interpreter::stackElementWords - - popframe_extra_args; - interpreter_frame->interpreter_frame_set_last_sp(esp); - - // All frames but the initial (oldest) interpreter frame we fill in have - // a value for sender_sp that allows walking the stack but isn't - // truly correct. Correct the value here. - if (extra_locals != 0 && - interpreter_frame->sender_sp() == - interpreter_frame->interpreter_frame_sender_sp()) { - interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + - extra_locals); - } - *interpreter_frame->interpreter_frame_cache_addr() = - method->constants()->cache(); - } - return size; -} - //----------------------------------------------------------------------------- // Exceptions --- old/src/cpu/zero/vm/cppInterpreter_zero.cpp 2014-03-06 11:59:53.396423209 +0100 +++ new/src/cpu/zero/vm/cppInterpreter_zero.cpp 2014-03-06 11:59:53.178341418 +0100 @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "asm/assembler.hpp" +#include "ci/ciMethod.hpp" #include "interpreter/bytecodeHistogram.hpp" #include "interpreter/cppInterpreter.hpp" #include "interpreter/interpreter.hpp" @@ -916,17 +917,47 @@ return (InterpreterFrame *) fp; } -int AbstractInterpreter::layout_activation(Method* method, - int tempcount, - int popframe_extra_args, - int moncount, - int caller_actual_parameters, - int callee_param_count, - int callee_locals, - frame* caller, - frame* interpreter_frame, - bool is_top_frame, - bool is_bottom_frame) { +template int AbstractInterpreter::size_activation(M* method, + int tempcount, + int popframe_extra_args, + int moncount, + int callee_param_count, + int callee_locals, + bool is_top_frame) { + int header_words = InterpreterFrame::header_words; + int monitor_words = moncount * frame::interpreter_frame_monitor_size(); + int stack_words = is_top_frame ? method->max_stack() : tempcount; + int callee_extra_locals = callee_locals - callee_param_count; + + return header_words + monitor_words + stack_words + callee_extra_locals; +} + +template int AbstractInterpreter::size_activation(Method* method, + int temps, + int popframe_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame); + +template int AbstractInterpreter::size_activation(ciMethod* method, + int temps, + int popframe_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame); +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { assert(popframe_extra_args == 0, "what to do?"); assert(!is_top_frame || (!callee_locals && !callee_param_count), "top frame should have no caller"); @@ -935,39 +966,31 @@ // does (the full InterpreterFrame::build, that is, not the // one that creates empty frames for the deoptimizer). // - // If interpreter_frame is not NULL then it will be filled in. - // It's size is determined by a previous call to this method, - // so it should be correct. + // interpreter_frame will be filled in. It's size is determined by + // a previous call to the size_activation() method, // // Note that tempcount is the current size of the expression // stack. For top most frames we will allocate a full sized // expression stack and not the trimmed version that non-top // frames have. - int header_words = InterpreterFrame::header_words; int monitor_words = moncount * frame::interpreter_frame_monitor_size(); - int stack_words = is_top_frame ? method->max_stack() : tempcount; - int callee_extra_locals = callee_locals - callee_param_count; - - if (interpreter_frame) { - intptr_t *locals = interpreter_frame->fp() + method->max_locals(); - interpreterState istate = interpreter_frame->get_interpreterState(); - intptr_t *monitor_base = (intptr_t*) istate; - intptr_t *stack_base = monitor_base - monitor_words; - intptr_t *stack = stack_base - tempcount - 1; - - BytecodeInterpreter::layout_interpreterState(istate, - caller, - NULL, - method, - locals, - stack, - stack_base, - monitor_base, - NULL, - is_top_frame); - } - return header_words + monitor_words + stack_words + callee_extra_locals; + intptr_t *locals = interpreter_frame->fp() + method->max_locals(); + interpreterState istate = interpreter_frame->get_interpreterState(); + intptr_t *monitor_base = (intptr_t*) istate; + intptr_t *stack_base = monitor_base - monitor_words; + intptr_t *stack = stack_base - tempcount - 1; + + BytecodeInterpreter::layout_interpreterState(istate, + caller, + NULL, + method, + locals, + stack, + stack_base, + monitor_base, + NULL, + is_top_frame); } void BytecodeInterpreter::layout_interpreterState(interpreterState istate, --- old/src/cpu/x86/vm/x86_32.ad 2014-03-06 11:59:53.437086406 +0100 +++ new/src/cpu/x86/vm/x86_32.ad 2014-03-06 11:59:53.152360602 +0100 @@ -512,14 +512,15 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { Compile* C = ra_->C; - int framesize = C->frame_slots() << LogBytesPerInt; + int framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove wordSize for return addr which is already pushed. framesize -= wordSize; - if (C->need_stack_bang(framesize)) { + if (C->need_stack_bang(bangsize)) { framesize -= wordSize; - st->print("# stack bang"); + st->print("# stack bang (%d bytes)", bangsize); st->print("\n\t"); st->print("PUSH EBP\t# Save EBP"); if (framesize) { @@ -563,9 +564,10 @@ Compile* C = ra_->C; MacroAssembler _masm(&cbuf); - int framesize = C->frame_slots() << LogBytesPerInt; + int framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); - __ verified_entry(framesize, C->need_stack_bang(framesize), C->in_24_bit_fp_mode()); + __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); C->set_frame_complete(cbuf.insts_size()); @@ -589,7 +591,7 @@ #ifndef PRODUCT void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { Compile *C = ra_->C; - int framesize = C->frame_slots() << LogBytesPerInt; + int framesize = C->frame_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove two words for return addr and rbp, framesize -= 2*wordSize; @@ -629,7 +631,7 @@ masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); } - int framesize = C->frame_slots() << LogBytesPerInt; + int framesize = C->frame_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove two words for return addr and rbp, framesize -= 2*wordSize; @@ -663,7 +665,7 @@ if (C->max_vector_size() > 16) size += 3; // vzeroupper if (do_polling() && C->is_method_compilation()) size += 6; - int framesize = C->frame_slots() << LogBytesPerInt; + int framesize = C->frame_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove two words for return addr and rbp, framesize -= 2*wordSize; --- old/src/cpu/x86/vm/x86_64.ad 2014-03-06 11:59:53.431653526 +0100 +++ new/src/cpu/x86/vm/x86_64.ad 2014-03-06 11:59:53.092661941 +0100 @@ -713,14 +713,15 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { Compile* C = ra_->C; - int framesize = C->frame_slots() << LogBytesPerInt; + int framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove wordSize for return addr which is already pushed. framesize -= wordSize; - if (C->need_stack_bang(framesize)) { + if (C->need_stack_bang(bangsize)) { framesize -= wordSize; - st->print("# stack bang"); + st->print("# stack bang (%d bytes)", bangsize); st->print("\n\t"); st->print("pushq rbp\t# Save rbp"); if (framesize) { @@ -751,9 +752,10 @@ Compile* C = ra_->C; MacroAssembler _masm(&cbuf); - int framesize = C->frame_slots() << LogBytesPerInt; + int framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); - __ verified_entry(framesize, C->need_stack_bang(framesize), false); + __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, false); C->set_frame_complete(cbuf.insts_size()); @@ -786,7 +788,7 @@ st->cr(); st->print("\t"); } - int framesize = C->frame_slots() << LogBytesPerInt; + int framesize = C->frame_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove word for return adr already pushed // and RBP @@ -822,7 +824,7 @@ __ vzeroupper(); } - int framesize = C->frame_slots() << LogBytesPerInt; + int framesize = C->frame_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove word for return adr already pushed // and RBP --- old/src/share/vm/c1/c1_LIRAssembler.cpp 2014-03-06 11:59:53.556801054 +0100 +++ new/src/share/vm/c1/c1_LIRAssembler.cpp 2014-03-06 11:59:53.222681025 +0100 @@ -190,6 +190,13 @@ return _masm->pc(); } +// To bang the stack of this compiled method we use the stack size +// that the interpreter would need in case of a deoptimization. This +// removes the need to bang the stack in the deoptimization blob which +// in turn simplifies stack overflow handling. +int LIR_Assembler::bang_size_in_bytes() const { + return MAX2(initial_frame_size_in_bytes(), _compilation->interpreter_frame_size()); +} void LIR_Assembler::emit_exception_entries(ExceptionInfoList* info_list) { for (int i = 0; i < info_list->length(); i++) { @@ -797,7 +804,7 @@ void LIR_Assembler::build_frame() { - _masm->build_frame(initial_frame_size_in_bytes()); + _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); } --- old/src/share/vm/asm/assembler.cpp 2014-03-06 11:59:53.557128601 +0100 +++ new/src/share/vm/asm/assembler.cpp 2014-03-06 11:59:53.198251619 +0100 @@ -134,7 +134,7 @@ // is greater than a page. const int page_size = os::vm_page_size(); - int bang_end = StackShadowPages*page_size; + int bang_end = (StackShadowPages+1)*page_size; // This is how far the previous frame's stack banging extended. const int bang_end_safe = bang_end; --- old/src/share/vm/c1/c1_IR.hpp 2014-03-06 11:59:53.573658396 +0100 +++ new/src/share/vm/c1/c1_IR.hpp 2014-03-06 11:59:53.269693282 +0100 @@ -284,6 +284,8 @@ bool is_method_handle_invoke() const { return _is_method_handle_invoke; } void set_is_method_handle_invoke(bool x) { _is_method_handle_invoke = x; } + + int interpreter_frame_size() const; }; --- old/src/share/vm/c1/c1_Compilation.cpp 2014-03-06 11:59:53.618190697 +0100 +++ new/src/share/vm/c1/c1_Compilation.cpp 2014-03-06 11:59:53.312143249 +0100 @@ -546,6 +546,7 @@ , _code(buffer_blob) , _has_access_indexed(false) , _current_instruction(NULL) +, _interpreter_frame_size(0) #ifndef PRODUCT , _last_instruction_printed(NULL) #endif // PRODUCT --- old/src/share/vm/c1/c1_IR.cpp 2014-03-06 11:59:53.641194980 +0100 +++ new/src/share/vm/c1/c1_IR.cpp 2014-03-06 11:59:53.251319504 +0100 @@ -227,8 +227,44 @@ _oop_map->set_oop(name); } +// Mirror the stack size calculation in the deopt code +// How much stack space would we need at this point in the program in +// case of deoptimization? +int CodeEmitInfo::interpreter_frame_size() const { + ValueStack* state = _stack; + int size = 0; + int callee_parameters = 0; + int callee_locals = 0; + int popframe_extra_args = 0; + if (JvmtiExport::can_pop_frame() && state->kind() != ValueStack::StateBefore) { + ciMethod* method = state->scope()->method(); + int bci = state->bci(); + popframe_extra_args = MAX2(-method->stack_effect_if_at_invoke(bci), 0); + } + while (state != NULL) { + int locks = state->locks_size(); + int temps = state->stack_size(); + bool is_top_frame = (state->caller_state() == NULL); + ciMethod* method = state->scope()->method(); + + int frame_size = BytesPerWord * Interpreter::size_activation(method, + temps + callee_parameters, + popframe_extra_args, + locks, + callee_parameters, + callee_locals, + is_top_frame); + size += frame_size; + + callee_parameters = method->size_of_parameters(); + callee_locals = method->max_locals(); + popframe_extra_args = 0; + state = state->caller_state(); + } + return size + Deoptimization::last_frame_adjust(0, callee_locals) * BytesPerWord; +} // Implementation of IR --- old/src/share/vm/c1/c1_Compilation.hpp 2014-03-06 11:59:53.659303158 +0100 +++ new/src/share/vm/c1/c1_Compilation.hpp 2014-03-06 11:59:53.294503075 +0100 @@ -88,6 +88,7 @@ CodeOffsets _offsets; CodeBuffer _code; bool _has_access_indexed; + int _interpreter_frame_size; // Stack space needed in case of a deoptimization // compilation helpers void initialize(); @@ -262,6 +263,18 @@ // Dump inlining replay data to the stream. void dump_inline_data(outputStream* out) { /* do nothing now */ } + + // How much stack space would the interpreter need in case of a + // deoptimization (worst case) + void update_interpreter_frame_size(int size) { + if (_interpreter_frame_size < size) { + _interpreter_frame_size = size; + } + } + + int interpreter_frame_size() const { + return _interpreter_frame_size; + } }; --- old/src/share/vm/c1/c1_LIRAssembler.hpp 2014-03-06 11:59:56.182810383 +0100 +++ new/src/share/vm/c1/c1_LIRAssembler.hpp 2014-03-06 11:59:55.826573395 +0100 @@ -132,7 +132,8 @@ int code_offset() const; address pc() const; - int initial_frame_size_in_bytes(); + int initial_frame_size_in_bytes() const; + int bang_size_in_bytes() const; // test for constants which can be encoded directly in instructions static bool is_small_constant(LIR_Opr opr); --- old/src/share/vm/opto/compile.hpp 2014-03-06 11:59:56.817793433 +0100 +++ new/src/share/vm/opto/compile.hpp 2014-03-06 11:59:56.483666996 +0100 @@ -486,6 +486,7 @@ RegMask _FIRST_STACK_mask; // All stack slots usable for spills (depends on frame layout) Arena* _indexSet_arena; // control IndexSet allocation within PhaseChaitin void* _indexSet_free_block_list; // free list of IndexSet bit blocks + int _interpreter_frame_size; uint _node_bundling_limit; Bundle* _node_bundling_base; // Information for instruction bundling @@ -939,6 +940,7 @@ PhaseRegAlloc* regalloc() { return _regalloc; } int frame_slots() const { return _frame_slots; } int frame_size_in_words() const; // frame_slots in units of the polymorphic 'words' + int frame_size_in_bytes() const { return _frame_slots << LogBytesPerInt; } RegMask& FIRST_STACK_mask() { return _FIRST_STACK_mask; } Arena* indexSet_arena() { return _indexSet_arena; } void* indexSet_free_block_list() { return _indexSet_free_block_list; } @@ -950,6 +952,13 @@ bool need_stack_bang(int frame_size_in_bytes) const; bool need_register_stack_bang() const; + void update_interpreter_frame_size(int size) { + if (_interpreter_frame_size < size) { + _interpreter_frame_size = size; + } + } + int bang_size_in_bytes() const; + void set_matcher(Matcher* m) { _matcher = m; } //void set_regalloc(PhaseRegAlloc* ra) { _regalloc = ra; } void set_indexSet_arena(Arena* a) { _indexSet_arena = a; } --- old/src/share/vm/opto/callnode.cpp 2014-03-06 11:59:56.901323835 +0100 +++ new/src/share/vm/opto/callnode.cpp 2014-03-06 11:59:56.595251285 +0100 @@ -607,6 +607,45 @@ } } +// Mirror the stack size calculation in the deopt code +// How much stack space would we need at this point in the program in +// case of deoptimization? +int JVMState::interpreter_frame_size() const { + const JVMState* jvms = this; + int size = 0; + int callee_parameters = 0; + int callee_locals = 0; + int popframe_extra_args = 0; + + if (JvmtiExport::can_pop_frame()) { + ciMethod* method = this->method(); + int bci = this->bci(); + popframe_extra_args = MAX2(-method->stack_effect_if_at_invoke(bci), 0); + } + + while (jvms != NULL) { + int locks = jvms->nof_monitors(); + int temps = jvms->stk_size(); + bool is_top_frame = (jvms->caller() == NULL); + ciMethod* method = jvms->method(); + + int frame_size = BytesPerWord * Interpreter::size_activation(method, + temps + callee_parameters, + popframe_extra_args, + locks, + callee_parameters, + callee_locals, + is_top_frame); + size += frame_size; + + callee_parameters = method->size_of_parameters(); + callee_locals = method->max_locals(); + popframe_extra_args = 0; + jvms = jvms->caller(); + } + return size + Deoptimization::last_frame_adjust(0, callee_locals) * BytesPerWord; +} + //============================================================================= uint CallNode::cmp( const Node &n ) const { return _tf == ((CallNode&)n)._tf && _jvms == ((CallNode&)n)._jvms; } --- old/src/share/vm/opto/callnode.hpp 2014-03-06 11:59:56.963665398 +0100 +++ new/src/share/vm/opto/callnode.hpp 2014-03-06 11:59:56.619624990 +0100 @@ -300,6 +300,7 @@ JVMState* clone_shallow(Compile* C) const; // retains uncloned caller void set_map_deep(SafePointNode *map);// reset map for all callers void adapt_position(int delta); // Adapt offsets in in-array after adding an edge. + int interpreter_frame_size() const; #ifndef PRODUCT void format(PhaseRegAlloc *regalloc, const Node *n, outputStream* st) const; --- old/src/share/vm/ci/ciMethod.hpp 2014-03-06 11:59:56.926136614 +0100 +++ new/src/share/vm/ci/ciMethod.hpp 2014-03-06 11:59:56.533398366 +0100 @@ -71,6 +71,7 @@ int _interpreter_invocation_count; int _interpreter_throwout_count; int _instructions_size; + int _size_of_parameters; bool _uses_monitors; bool _balanced_monitors; @@ -166,6 +167,7 @@ int exception_table_length() const { check_is_loaded(); return _handler_count; } int interpreter_invocation_count() const { check_is_loaded(); return _interpreter_invocation_count; } int interpreter_throwout_count() const { check_is_loaded(); return _interpreter_throwout_count; } + int size_of_parameters() const { check_is_loaded(); return _size_of_parameters; } // Code size for inlining decisions. int code_size_for_inlining(); @@ -241,7 +243,8 @@ ciField* get_field_at_bci( int bci, bool &will_link); ciMethod* get_method_at_bci(int bci, bool &will_link, ciSignature* *declared_signature); - + int get_stack_effect_at_invoke(int bci, Bytecodes::Code code, int& inputs); + int stack_effect_if_at_invoke(int bci); // Given a certain calling environment, find the monomorphic target // for the call. Return NULL if the call is not monomorphic in // its calling environment. --- old/src/share/vm/interpreter/abstractInterpreter.hpp 2014-03-06 11:59:56.944807496 +0100 +++ new/src/share/vm/interpreter/abstractInterpreter.hpp 2014-03-06 11:59:56.641703795 +0100 @@ -181,30 +181,17 @@ // Deoptimization should reexecute this bytecode static bool bytecode_should_reexecute(Bytecodes::Code code); - // share implementation of size_activation and layout_activation: - static int size_activation(Method* method, + // deoptimization support + template + static int size_activation(M* method, int temps, int popframe_args, int monitors, - int caller_actual_parameters, int callee_params, int callee_locals, - bool is_top_frame, - bool is_bottom_frame) { - return layout_activation(method, - temps, - popframe_args, - monitors, - caller_actual_parameters, - callee_params, - callee_locals, - (frame*)NULL, - (frame*)NULL, - is_top_frame, - is_bottom_frame); - } + bool is_top_frame); - static int layout_activation(Method* method, + static void layout_activation(Method* method, int temps, int popframe_args, int monitors, --- old/src/share/vm/c1/c1_MacroAssembler.hpp 2014-03-06 11:59:56.925912052 +0100 +++ new/src/share/vm/c1/c1_MacroAssembler.hpp 2014-03-06 11:59:56.560273749 +0100 @@ -39,7 +39,7 @@ void explicit_null_check(Register base); void inline_cache_check(Register receiver, Register iCache); - void build_frame(int frame_size_in_bytes); + void build_frame(int frame_size_in_bytes, int bang_size_in_bytes); void remove_frame(int frame_size_in_bytes); void unverified_entry(Register receiver, Register ic_klass); --- old/src/share/vm/c1/c1_LinearScan.cpp 2014-03-06 11:59:56.991475312 +0100 +++ new/src/share/vm/c1/c1_LinearScan.cpp 2014-03-06 11:59:56.696060863 +0100 @@ -2451,6 +2451,9 @@ CodeEmitInfo* info = visitor.info_at(i); OopMap* oop_map = first_oop_map; + // compute worst case interpreter size in case of a deoptimization + _compilation->update_interpreter_frame_size(info->interpreter_frame_size()); + if (info->stack()->locks_size() != first_info->stack()->locks_size()) { // this info has a different number of locks then the precomputed oop map // (possible for lock and unlock instructions) -> compute oop map with --- old/src/share/vm/ci/ciMethod.cpp 2014-03-06 11:59:56.986945235 +0100 +++ new/src/share/vm/ci/ciMethod.cpp 2014-03-06 11:59:56.670507566 +0100 @@ -80,6 +80,7 @@ _code_size = h_m()->code_size(); _intrinsic_id = h_m()->intrinsic_id(); _handler_count = h_m()->exception_table_length(); + _size_of_parameters = h_m()->size_of_parameters(); _uses_monitors = h_m()->access_flags().has_monitor_bytecodes(); _balanced_monitors = !_uses_monitors || h_m()->access_flags().is_monitor_matching(); _is_c1_compilable = !h_m()->is_not_c1_compilable(); @@ -877,6 +878,27 @@ return iter.get_method(will_link, declared_signature); } +// How much stack slots is this invoke going to add or remove? +int ciMethod::get_stack_effect_at_invoke(int bci, Bytecodes::Code code, int& inputs) { + assert(Bytecodes::is_invoke(code), "not an invoke"); + bool ignored_will_link; + ciSignature* declared_signature = NULL; + ciMethod* ignored_callee = get_method_at_bci(bci, ignored_will_link, &declared_signature); + assert(declared_signature != NULL, "cannot be null"); + inputs = declared_signature->arg_size_for_bc(code); + int size = declared_signature->return_type()->size(); + return size - inputs; +} + +int ciMethod::stack_effect_if_at_invoke(int bci) { + Bytecodes::Code code = java_code_at_bci(bci); + if (Bytecodes::is_invoke(code)) { + int ignored = 0; + return get_stack_effect_at_invoke(bci, code, ignored); + } + return 0; +} + // ------------------------------------------------------------------ // Adjust a CounterData count to be commensurate with // interpreter_invocation_count. If the MDO exists for --- old/src/share/vm/opto/compile.cpp 2014-03-06 11:59:56.992410498 +0100 +++ new/src/share/vm/opto/compile.cpp 2014-03-06 11:59:56.722799714 +0100 @@ -439,6 +439,16 @@ return words; } +// To bang the stack of this compiled method we use the stack size +// that the interpreter would need in case of a deoptimization. This +// removes the need to bang the stack in the deoptimization blob which +// in turn simplifies stack overflow handling. +int Compile::bang_size_in_bytes() const { + int callee_locals = method() != NULL ? method()->max_locals() : 0; + int interpreter_frame_size = _interpreter_frame_size; + return MAX2(interpreter_frame_size, frame_size_in_bytes()); +} + // ============================================================================ //------------------------------CompileWrapper--------------------------------- class CompileWrapper : public StackObj { @@ -662,7 +672,8 @@ _inlining_incrementally(false), _print_inlining_list(NULL), _print_inlining_idx(0), - _preserve_jvm_state(0) { + _preserve_jvm_state(0), + _interpreter_frame_size(0) { C = this; CompileWrapper cw(this); @@ -966,7 +977,8 @@ _print_inlining_list(NULL), _print_inlining_idx(0), _preserve_jvm_state(0), - _allowed_reasons(0) { + _allowed_reasons(0), + _interpreter_frame_size(0) { C = this; #ifndef PRODUCT @@ -3056,8 +3068,12 @@ Node* m = n->in(i); ++i; if (m != NULL && !frc._visited.test_set(m->_idx)) { - if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL) + if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL) { + // compute worst case interpreter size in case of a deoptimization + update_interpreter_frame_size(m->as_SafePoint()->jvms()->interpreter_frame_size()); + sfpt.push(m); + } cnt = m->req(); nstack.push(n, i); // put on stack parent and next input's index n = m; --- old/src/share/vm/opto/graphKit.cpp 2014-03-06 11:59:59.079834194 +0100 +++ new/src/share/vm/opto/graphKit.cpp 2014-03-06 11:59:58.826957162 +0100 @@ -1049,13 +1049,8 @@ case Bytecodes::_invokedynamic: case Bytecodes::_invokeinterface: { - bool ignored_will_link; - ciSignature* declared_signature = NULL; - ciMethod* ignored_callee = method()->get_method_at_bci(bci(), ignored_will_link, &declared_signature); - assert(declared_signature != NULL, "cannot be null"); - inputs = declared_signature->arg_size_for_bc(code); - int size = declared_signature->return_type()->size(); - depth = size - inputs; + int size = method()->get_stack_effect_at_invoke(bci(), code, inputs); + depth = size; } break; --- old/src/share/vm/runtime/deoptimization.cpp 2014-03-06 11:59:59.179446361 +0100 +++ new/src/share/vm/runtime/deoptimization.cpp 2014-03-06 11:59:58.892271126 +0100 @@ -420,14 +420,8 @@ // frame[number_of_frames - 1 ] = on_stack_size(youngest) // frame[number_of_frames - 2 ] = on_stack_size(sender(youngest)) // frame[number_of_frames - 3 ] = on_stack_size(sender(sender(youngest))) - int caller_parms = callee_parameters; - if ((index == array->frames() - 1) && caller_was_method_handle) { - caller_parms = 0; - } - frame_sizes[number_of_frames - 1 - index] = BytesPerWord * array->element(index)->on_stack_size(caller_parms, - callee_parameters, + frame_sizes[number_of_frames - 1 - index] = BytesPerWord * array->element(index)->on_stack_size(callee_parameters, callee_locals, - index == 0, index == array->frames() - 1, popframe_extra_args); // This pc doesn't have to be perfect just good enough to identify the frame --- old/src/share/vm/opto/output.cpp 2014-03-06 11:59:59.180702046 +0100 +++ new/src/share/vm/opto/output.cpp 2014-03-06 11:59:58.762084924 +0100 @@ -171,8 +171,13 @@ // Determine if we need to generate a stack overflow check. // Do it if the method is not a stub function and // has java calls or has frame size > vm_page_size/8. + // The debug VM checks that deoptimization doesn't trigger an + // unexpected stack overflow (compiled method stack banging should + // guarantee it doesn't happen) so we always need the stack bang in + // a debug VM. return (UseStackBanging && stub_function() == NULL && - (has_java_calls() || frame_size_in_bytes > os::vm_page_size()>>3)); + (has_java_calls() || frame_size_in_bytes > os::vm_page_size()>>3 + NOT_DEBUG(|| true))); } bool Compile::need_register_stack_bang() const { --- old/src/share/vm/runtime/sharedRuntime.cpp 2014-03-06 11:59:59.464536891 +0100 +++ new/src/share/vm/runtime/sharedRuntime.cpp 2014-03-06 11:59:59.236407574 +0100 @@ -775,10 +775,13 @@ // going to be unwound. Dispatch to a shared runtime stub // which will cause the StackOverflowError to be fabricated // and processed. - // For stack overflow in deoptimization blob, cleanup thread. - if (thread->deopt_mark() != NULL) { - Deoptimization::cleanup_deopt_info(thread, NULL); - } + // Stack overflow should never occur during deoptimization: + // the compiled method bang the stack by as much as the + // interpreter would need in case of a deoptimization. The + // deoptimization blob and uncommon trap blob bang the stack + // in a debug VM to verify the correctness of the compiled + // method stack banging. + assert(thread->deopt_mark() == NULL, "no stack overflow from deopt blob/uncommon trap"); Events::log_exception(thread, "StackOverflowError at " INTPTR_FORMAT, pc); return StubRoutines::throw_StackOverflowError_entry(); } --- /dev/null 2013-11-25 16:34:39.360817680 +0100 +++ new/src/cpu/x86/vm/templateInterpreter_x86.cpp 2014-03-06 11:59:59.562276420 +0100 @@ -0,0 +1,145 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "ci/ciMethod.hpp" +#include "interpreter/interpreter.hpp" +#include "runtime/frame.inline.hpp" + +#ifndef CC_INTERP + +// asm based interpreter deoptimization helpers +template static int extra_locals_helper(M* method) { + return (method->max_locals() - method->size_of_parameters()) * + Interpreter::stackElementWords; +} + +template int AbstractInterpreter::size_activation(M* method, + int temps, + int popframe_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { + // Note: This calculation must exactly parallel the frame setup + // in AbstractInterpreterGenerator::generate_method_entry. + + // fixed size of an interpreter frame: + int extra_locals = extra_locals_helper(method); + + int overhead = frame::sender_sp_offset - + frame::interpreter_frame_initial_sp_offset; + // Our locals were accounted for by the caller (or last_frame_adjust + // on the transistion) Since the callee parameters already account + // for the callee's params we only need to account for the extra + // locals. + int size = overhead + + (callee_locals - callee_params)*Interpreter::stackElementWords + + monitors * frame::interpreter_frame_monitor_size() + + temps* Interpreter::stackElementWords + popframe_args; + + return size; +} + +template int AbstractInterpreter::size_activation(Method* method, + int temps, + int popframe_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame); + +template int AbstractInterpreter::size_activation(ciMethod* method, + int temps, + int popframe_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame); + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + // The frame interpreter_frame is guaranteed to be the right size, + // as determined by a previous call to the size_activation() method. + // It is also guaranteed to be walkable even though it is in a + // skeletal state + + int max_locals = method->max_locals() * Interpreter::stackElementWords; + int extra_locals = extra_locals_helper(method); + +#ifdef ASSERT + if (!EnableInvokeDynamic) + // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences? + // Probably, since deoptimization doesn't work yet. + assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable"); + assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); +#endif + + interpreter_frame->interpreter_frame_set_method(method); + // NOTE the difference in using sender_sp and + // interpreter_frame_sender_sp interpreter_frame_sender_sp is + // the original sp of the caller (the unextended_sp) and + // sender_sp is fp+8/16 (32bit/64bit) XXX + intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; + +#ifdef ASSERT + if (caller->is_interpreted_frame()) { + assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); + } +#endif + + interpreter_frame->interpreter_frame_set_locals(locals); + BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); + BasicObjectLock* monbot = montop - moncount; + interpreter_frame->interpreter_frame_set_monitor_end(monbot); + + // Set last_sp + intptr_t* esp = (intptr_t*) monbot - + tempcount*Interpreter::stackElementWords - + popframe_extra_args; + interpreter_frame->interpreter_frame_set_last_sp(esp); + + // All frames but the initial (oldest) interpreter frame we fill in have + // a value for sender_sp that allows walking the stack but isn't + // truly correct. Correct the value here. + if (extra_locals != 0 && + interpreter_frame->sender_sp() == + interpreter_frame->interpreter_frame_sender_sp()) { + interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + + extra_locals); + } + *interpreter_frame->interpreter_frame_cache_addr() = + method->constants()->cache(); +} + +#endif // CC_INTERP --- old/src/share/vm/runtime/vframeArray.hpp 2014-03-06 11:59:59.818659491 +0100 +++ new/src/share/vm/runtime/vframeArray.hpp 2014-03-06 11:59:59.501426642 +0100 @@ -85,10 +85,8 @@ // Returns the on stack word size for this frame // callee_parameters is the number of callee locals residing inside this frame - int on_stack_size(int caller_actual_parameters, - int callee_parameters, + int on_stack_size(int callee_parameters, int callee_locals, - bool is_bottom_frame, bool is_top_frame, int popframe_extra_stack_expression_els) const; --- /dev/null 2013-11-25 16:34:39.360817680 +0100 +++ new/test/compiler/uncommontrap/TestStackBangMonitorOwned.java 2014-03-06 11:59:59.650552920 +0100 @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8032410 + * @summary Stack overflow at deoptimization doesn't release owned monitors + * @run main/othervm -XX:-BackgroundCompilation -XX:CompileCommand=dontinline,TestStackBangMonitorOwned::m1 -XX:CompileCommand=exclude,TestStackBangMonitorOwned::m2 -Xss256K -XX:-UseOnStackReplacement TestStackBangMonitorOwned + * + */ +public class TestStackBangMonitorOwned { + + static class UnloadedClass1 { + volatile int field; + } + + static Object m1(boolean deopt) { + long l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, + l13, l14, l15, l16, l17, l18, l19, l20, l21, l22, l23, l24, + l25, l26, l27, l28, l29, l30, l31, l32, l33, l34, l35, l36, + l37, l38, l39, l40, l41, l42, l43, l44, l45, l46, l47, l48, + l49, l50, l51, l52, l53, l54, l55, l56, l57, l58, l59, l60, + l61, l62, l63, l64, l65, l66, l67, l68, l69, l70, l71, l72, + l73, l74, l75, l76, l77, l78, l79, l80, l81, l82, l83, l84, + l85, l86, l87, l88, l89, l90, l91, l92, l93, l94, l95, l96, + l97, l98, l99, l100, l101, l102, l103, l104, l105, l106, l107, + l108, l109, l110, l111, l112, l113, l114, l115, l116, l117, + l118, l119, l120, l121, l122, l123, l124, l125, l126, l127, + l128, l129, l130, l131, l132, l133, l134, l135, l136, l137, + l138, l139, l140, l141, l142, l143, l144, l145, l146, l147, + l148, l149, l150, l151, l152, l153, l154, l155, l156, l157, + l158, l159, l160, l161, l162, l163, l164, l165, l166, l167, + l168, l169, l170, l171, l172, l173, l174, l175, l176, l177, + l178, l179, l180, l181, l182, l183, l184, l185, l186, l187, + l188, l189, l190, l191, l192, l193, l194, l195, l196, l197, + l198, l199, l200, l201, l202, l203, l204, l205, l206, l207, + l208, l209, l210, l211, l212, l213, l214, l215, l216, l217, + l218, l219, l220, l221, l222, l223, l224, l225, l226, l227, + l228, l229, l230, l231, l232, l233, l234, l235, l236, l237, + l238, l239, l240, l241, l242, l243, l244, l245, l246, l247, + l248, l249, l250, l251, l252, l253, l254, l255, l256, l257, + l258, l259, l260, l261, l262, l263, l264, l265, l266, l267, + l268, l269, l270, l271, l272, l273, l274, l275, l276, l277, + l278, l279, l280, l281, l282, l283, l284, l285, l286, l287, + l288, l289, l290, l291, l292, l293, l294, l295, l296, l297, + l298, l299, l300, l301, l302, l303, l304, l305, l306, l307, + l308, l309, l310, l311, l312, l313, l314, l315, l316, l317, + l318, l319, l320, l321, l322, l323, l324, l325, l326, l327, + l328, l329, l330, l331, l332, l333, l334, l335, l336, l337, + l338, l339, l340, l341, l342, l343, l344, l345, l346, l347, + l348, l349, l350, l351, l352, l353, l354, l355, l356, l357, + l358, l359, l360, l361, l362, l363, l364, l365, l366, l367, + l368, l369, l370, l371, l372, l373, l374, l375, l376, l377, + l378, l379, l380, l381, l382, l383, l384, l385, l386, l387, + l388, l389, l390, l391, l392, l393, l394, l395, l396, l397, + l398, l399, l400, l401, l402, l403, l404, l405, l406, l407, + l408, l409, l410, l411, l412, l413, l414, l415, l416, l417, + l418, l419, l420, l421, l422, l423, l424, l425, l426, l427, + l428, l429, l430, l431, l432, l433, l434, l435, l436, l437, + l438, l439, l440, l441, l442, l443, l444, l445, l446, l447, + l448, l449, l450, l451, l452, l453, l454, l455, l456, l457, + l458, l459, l460, l461, l462, l463, l464, l465, l466, l467, + l468, l469, l470, l471, l472, l473, l474, l475, l476, l477, + l478, l479, l480, l481, l482, l483, l484, l485, l486, l487, + l488, l489, l490, l491, l492, l493, l494, l495, l496, l497, + l498, l499, l500, l501, l502, l503, l504, l505, l506, l507, + l508, l509, l510, l511; + + long ll0, ll1, ll2, ll3, ll4, ll5, ll6, ll7, ll8, ll9, ll10, ll11, ll12, + ll13, ll14, ll15, ll16, ll17, ll18, ll19, ll20, ll21, ll22, ll23, ll24, + ll25, ll26, ll27, ll28, ll29, ll30, ll31, ll32, ll33, ll34, ll35, ll36, + ll37, ll38, ll39, ll40, ll41, ll42, ll43, ll44, ll45, ll46, ll47, ll48, + ll49, ll50, ll51, ll52, ll53, ll54, ll55, ll56, ll57, ll58, ll59, ll60, + ll61, ll62, ll63, ll64, ll65, ll66, ll67, ll68, ll69, ll70, ll71, ll72, + ll73, ll74, ll75, ll76, ll77, ll78, ll79, ll80, ll81, ll82, ll83, ll84, + ll85, ll86, ll87, ll88, ll89, ll90, ll91, ll92, ll93, ll94, ll95, ll96, + ll97, ll98, ll99, ll100, ll101, ll102, ll103, ll104, ll105, ll106, ll107, + ll108, ll109, ll110, ll111, ll112, ll113, ll114, ll115, ll116, ll117, + ll118, ll119, ll120, ll121, ll122, ll123, ll124, ll125, ll126, ll127, + ll128, ll129, ll130, ll131, ll132, ll133, ll134, ll135, ll136, ll137, + ll138, ll139, ll140, ll141, ll142, ll143, ll144, ll145, ll146, ll147, + ll148, ll149, ll150, ll151, ll152, ll153, ll154, ll155, ll156, ll157, + ll158, ll159, ll160, ll161, ll162, ll163, ll164, ll165, ll166, ll167, + ll168, ll169, ll170, ll171, ll172, ll173, ll174, ll175, ll176, ll177, + ll178, ll179, ll180, ll181, ll182, ll183, ll184, ll185, ll186, ll187, + ll188, ll189, ll190, ll191, ll192, ll193, ll194, ll195, ll196, ll197, + ll198, ll199, ll200, ll201, ll202, ll203, ll204, ll205, ll206, ll207, + ll208, ll209, ll210, ll211, ll212, ll213, ll214, ll215, ll216, ll217, + ll218, ll219, ll220, ll221, ll222, ll223, ll224, ll225, ll226, ll227, + ll228, ll229, ll230, ll231, ll232, ll233, ll234, ll235, ll236, ll237, + ll238, ll239, ll240, ll241, ll242, ll243, ll244, ll245, ll246, ll247, + ll248, ll249, ll250, ll251, ll252, ll253, ll254, ll255, ll256, ll257, + ll258, ll259, ll260, ll261, ll262, ll263, ll264, ll265, ll266, ll267, + ll268, ll269, ll270, ll271, ll272, ll273, ll274, ll275, ll276, ll277, + ll278, ll279, ll280, ll281, ll282, ll283, ll284, ll285, ll286, ll287, + ll288, ll289, ll290, ll291, ll292, ll293, ll294, ll295, ll296, ll297, + ll298, ll299, ll300, ll301, ll302, ll303, ll304, ll305, ll306, ll307, + ll308, ll309, ll310, ll311, ll312, ll313, ll314, ll315, ll316, ll317, + ll318, ll319, ll320, ll321, ll322, ll323, ll324, ll325, ll326, ll327, + ll328, ll329, ll330, ll331, ll332, ll333, ll334, ll335, ll336, ll337, + ll338, ll339, ll340, ll341, ll342, ll343, ll344, ll345, ll346, ll347, + ll348, ll349, ll350, ll351, ll352, ll353, ll354, ll355, ll356, ll357, + ll358, ll359, ll360, ll361, ll362, ll363, ll364, ll365, ll366, ll367, + ll368, ll369, ll370, ll371, ll372, ll373, ll374, ll375, ll376, ll377, + ll378, ll379, ll380, ll381, ll382, ll383, ll384, ll385, ll386, ll387, + ll388, ll389, ll390, ll391, ll392, ll393, ll394, ll395, ll396, ll397, + ll398, ll399, ll400, ll401, ll402, ll403, ll404, ll405, ll406, ll407, + ll408, ll409, ll410, ll411, ll412, ll413, ll414, ll415, ll416, ll417, + ll418, ll419, ll420, ll421, ll422, ll423, ll424, ll425, ll426, ll427, + ll428, ll429, ll430, ll431, ll432, ll433, ll434, ll435, ll436, ll437, + ll438, ll439, ll440, ll441, ll442, ll443, ll444, ll445, ll446, ll447, + ll448, ll449, ll450, ll451, ll452, ll453, ll454, ll455, ll456, ll457, + ll458, ll459, ll460, ll461, ll462, ll463, ll464, ll465, ll466, ll467, + ll468, ll469, ll470, ll471, ll472, ll473, ll474, ll475, ll476, ll477, + ll478, ll479, ll480, ll481, ll482, ll483, ll484, ll485, ll486, ll487, + ll488, ll489, ll490, ll491, ll492, ll493, ll494, ll495, ll496, ll497, + ll498, ll499, ll500, ll501, ll502, ll503, ll504, ll505, ll506, ll507, + ll508, ll509, ll510, ll511; + + if (deopt) { + method_entered = true; + synchronized(monitor) { + do_monitor_acquire = true; + UnloadedClass1 res = new UnloadedClass1(); // forces deopt with c2 + res.field = 0; //forced deopt with c1 + return res; + } + } + return null; + } + + static boolean m2(boolean deopt) { + long l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, + l13, l14, l15, l16, l17, l18, l19, l20, l21, l22, l23, l24, + l25, l26, l27, l28, l29, l30, l31, l32, l33, l34, l35, l36, + l37, l38, l39, l40, l41, l42, l43, l44, l45, l46, l47, l48, + l49, l50, l51, l52, l53, l54, l55, l56, l57, l58, l59, l60, + l61, l62, l63, l64, l65, l66, l67, l68, l69, l70, l71, l72, + l73, l74, l75, l76, l77, l78, l79, l80, l81, l82, l83, l84, + l85, l86, l87, l88, l89, l90, l91, l92, l93, l94, l95, l96, + l97, l98, l99, l100, l101, l102, l103, l104, l105, l106, l107, + l108, l109, l110, l111, l112, l113, l114, l115, l116, l117, + l118, l119, l120, l121, l122, l123, l124, l125, l126, l127, + l128, l129, l130, l131, l132, l133, l134, l135, l136, l137, + l138, l139, l140, l141, l142, l143, l144, l145, l146, l147, + l148, l149, l150, l151, l152, l153, l154, l155, l156, l157, + l158, l159, l160, l161, l162, l163, l164, l165, l166, l167, + l168, l169, l170, l171, l172, l173, l174, l175, l176, l177, + l178, l179, l180, l181, l182, l183, l184, l185, l186, l187, + l188, l189, l190, l191, l192, l193, l194, l195, l196, l197, + l198, l199, l200, l201, l202, l203, l204, l205, l206, l207, + l208, l209, l210, l211, l212, l213, l214, l215, l216, l217, + l218, l219, l220, l221, l222, l223, l224, l225, l226, l227, + l228, l229, l230, l231, l232, l233, l234, l235, l236, l237, + l238, l239, l240, l241, l242, l243, l244, l245, l246, l247, + l248, l249, l250, l251, l252, l253, l254, l255, l256, l257, + l258, l259, l260, l261, l262, l263, l264, l265, l266, l267, + l268, l269, l270, l271, l272, l273, l274, l275, l276, l277, + l278, l279, l280, l281, l282, l283, l284, l285, l286, l287, + l288, l289, l290, l291, l292, l293, l294, l295, l296, l297, + l298, l299, l300, l301, l302, l303, l304, l305, l306, l307, + l308, l309, l310, l311, l312, l313, l314, l315, l316, l317, + l318, l319, l320, l321, l322, l323, l324, l325, l326, l327, + l328, l329, l330, l331, l332, l333, l334, l335, l336, l337, + l338, l339, l340, l341, l342, l343, l344, l345, l346, l347, + l348, l349, l350, l351, l352, l353, l354, l355, l356, l357, + l358, l359, l360, l361, l362, l363, l364, l365, l366, l367, + l368, l369, l370, l371, l372, l373, l374, l375, l376, l377, + l378, l379, l380, l381, l382, l383, l384, l385, l386, l387, + l388, l389, l390, l391, l392, l393, l394, l395, l396, l397, + l398, l399, l400, l401, l402, l403, l404, l405, l406, l407, + l408, l409, l410, l411, l412, l413, l414, l415, l416, l417, + l418, l419, l420, l421, l422, l423, l424, l425, l426, l427, + l428, l429, l430, l431, l432, l433, l434, l435, l436, l437, + l438, l439, l440, l441, l442, l443, l444, l445, l446, l447, + l448, l449, l450, l451, l452, l453, l454, l455, l456, l457, + l458, l459, l460, l461, l462, l463, l464, l465, l466, l467, + l468, l469, l470, l471, l472, l473, l474, l475, l476, l477, + l478, l479, l480, l481, l482, l483, l484, l485, l486, l487, + l488, l489, l490, l491, l492, l493, l494, l495, l496, l497, + l498, l499, l500, l501, l502, l503, l504, l505, l506, l507, + l508, l509, l510, l511; + + boolean do_m3 = false; + try { + do_m3 = m2(deopt); + } catch (StackOverflowError e) { + return true; + } + if (do_m3) { + try { + m1(deopt); + } catch (StackOverflowError e) {} + } + return false; + } + + // Used for synchronization betwen threads + static volatile boolean thread_started = false; + static volatile boolean do_monitor_acquire = false; + static volatile boolean monitor_acquired = false; + static volatile boolean method_entered = false; + + static Object monitor = new Object(); + + static public void main(String[] args) { + // get m1 compiled + for (int i = 0; i < 20000; i++) { + m1(false); + } + + Thread thread = new Thread() { + public void run() { + thread_started = true; + while(!do_monitor_acquire); + System.out.println("Ok to try to acquire the lock"); + synchronized(monitor) { + monitor_acquired = true; + } + } + }; + + thread.setDaemon(true); + thread.start(); + + while(!thread_started); + + m2(true); + + if (!method_entered) { + System.out.println("TEST PASSED"); + return; + } + + for (int i = 0; i < 10; i++) { + System.out.println("Is lock acquired?"); + if (monitor_acquired) { + System.out.println("TEST PASSED"); + return; + } + try { + Thread.sleep(10000); + } catch(InterruptedException ie) { + } + } + System.out.println("TEST FAILED"); + } +} --- old/src/share/vm/runtime/vframeArray.cpp 2014-03-06 11:59:59.924290762 +0100 +++ new/src/share/vm/runtime/vframeArray.cpp 2014-03-06 11:59:59.582841457 +0100 @@ -418,24 +418,20 @@ } -int vframeArrayElement::on_stack_size(int caller_actual_parameters, - int callee_parameters, +int vframeArrayElement::on_stack_size(int callee_parameters, int callee_locals, bool is_top_frame, - bool is_bottom_frame, int popframe_extra_stack_expression_els) const { assert(method()->max_locals() == locals()->size(), "just checking"); int locks = monitors() == NULL ? 0 : monitors()->number_of_monitors(); int temps = expressions()->size(); - return Interpreter::size_activation(method(), - temps + callee_parameters, - popframe_extra_stack_expression_els, - locks, - caller_actual_parameters, - callee_parameters, - callee_locals, - is_top_frame, - is_bottom_frame); + return Interpreter::size_activation(method(), + temps + callee_parameters, + popframe_extra_stack_expression_els, + locks, + callee_parameters, + callee_locals, + is_top_frame); }