--- old/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp 2011-12-15 13:30:15.592346784 +0100 +++ new/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp 2011-12-15 13:30:15.402530239 +0100 @@ -401,6 +401,18 @@ __ delayed()->nop(); } +void InvalidateProfileInliningStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + + assert(_klass->is_loaded(), "class should be loaded"); + __ set_oop_constant(_klass->constant_encoding(), G5); + + __ call(Runtime1::entry_for(Runtime1::invalidate_profile_inlining_id), relocInfo::runtime_call_type); + __ delayed()->mov(_obj->as_register(), G4); + + ce->add_call_info_here(_info); + debug_only(__ should_not_reach_here()); +} /////////////////////////////////////////////////////////////////////////////////// #ifndef SERIALGC --- old/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp 2011-12-15 13:30:17.078810715 +0100 +++ new/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp 2011-12-15 13:30:16.869208970 +0100 @@ -756,6 +756,19 @@ RelocationHolder rspec = virtual_call_Relocation::spec(pc()); __ set_oop((jobject)Universe::non_oop_word(), G5_inline_cache_reg); __ relocate(rspec); + + if (op->info()->is_profiled_call()) { + // The static call stub is not used for standard ic calls (a + // transition stub is allocated instead for calls to the + // interpreter). We emit the static call stub for profiled call + // sites anyway because the runtime locates the profile call stub + // by first looking up the static call stub and then walking over + // it to the profile call stub. + emit_static_call_stub(); + // Emit the profile call stub right behind the static call stub + emit_profile_call_stub(op->info()->method(), op->info()->stack()->bci(), SharedRuntime::get_resolve_profile_call_stub()); + } + __ call(op->addr(), relocInfo::none); // The peephole pass fills the delay slot, add_call_info is done in // LIR_Assembler::emit_delay. @@ -1589,6 +1602,74 @@ __ end_a_stub(); } +void LIR_Assembler::emit_profile_call_stub(ciMethod* method, int bci, address dest) { + ciMethodData* md = method->method_data(); + if (md == NULL) { + bailout("out of memory building methodDataOop"); + return; + } + address call_pc = __ pc(); + address stub = __ start_a_stub(profile_call_stub_size); + if (stub == NULL) { + bailout("profile call stub overflow"); + return; + } + + int start = __ offset(); + address off_addr = __ pc(); + + // The runtime needs the starting address of the profile call stub + // (to make the call site jump to the stub) and the location of the + // first jump in the stub (to make it branch to the callee). The + // starting address is found by first looking up the static call + // stub and then finding the profile call stub right behind + // it. Finding the jump is tricky because the code emitted before it + // depends on runtime conditions. Here, we first emit an integer (0) + // that we change to contain the offset of the jump within the stub + // when the jump is emitted and the offset is known. Locating the + // jump can then be done from the runtime by reading this offset and + // adding it to the address of the start of the stub. + __ a_long(0); + + ciProfileData* data = md->bci_to_data(bci); + assert(data->is_CounterData(), "need CounterData for calls"); + Register mdo = G4; + jobject2reg(md->constant_encoding(), mdo); + + int mdo_offset_bias = 0; + if (!Assembler::is_simm13(md->byte_offset_of_slot(data, CounterData::count_offset()) + + data->size_in_bytes())) { + // The offset is large so bias the mdo by the base of the slot so + // that the ld can use simm13s to reference the slots of the data + mdo_offset_bias = md->byte_offset_of_slot(data, CounterData::count_offset()); + __ set(mdo_offset_bias, G1); + __ add(mdo, G1, mdo); + } + Address flags_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias); + + __ lduw(flags_addr, G1); + + __ add(G1, DataLayout::counter_increment, G1); + __ set(C1ProfileCompileThreshold, G3); + __ cmp(G1, G3); + + Label L; + __ br(Assembler::greaterUnsigned, false, Assembler::pn, L); + __ delayed()->stw(G1, flags_addr); + + *(jint*)off_addr = __ offset() - start; + + AddressLiteral addrlit(-1); + __ jump_to(addrlit, G3); + __ delayed()->nop(); + + __ bind(L); + __ jump_to(AddressLiteral(dest), G3); + __ delayed()->nop(); + + assert(__ offset() - start <= profile_call_stub_size, "stub too big"); + __ end_a_stub(); +} void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) { if (opr1->is_single_fpu()) { --- old/src/cpu/sparc/vm/c1_LIRAssembler_sparc.hpp 2011-12-15 13:30:18.697608369 +0100 +++ new/src/cpu/sparc/vm/c1_LIRAssembler_sparc.hpp 2011-12-15 13:30:18.514963746 +0100 @@ -66,8 +66,10 @@ enum { #ifdef _LP64 call_stub_size = 68, + profile_call_stub_size = 148, #else call_stub_size = 20, + profile_call_stub_size = 84, #endif // _LP64 exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(10*4), deopt_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(10*4) }; --- old/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp 2011-12-15 13:30:20.135781845 +0100 +++ new/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp 2011-12-15 13:30:19.927212933 +0100 @@ -1034,19 +1034,24 @@ obj.load_item(); LIR_Opr out_reg = rlock_result(x); CodeStub* stub; - CodeEmitInfo* info_for_exception = state_for(x); if (x->is_incompatible_class_change_check()) { assert(patching_info == NULL, "can't patch this"); + CodeEmitInfo* info_for_exception = state_for(x); stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception); + } else if (x->is_profile_inlining()) { + assert(patching_info == NULL, "can't patch this"); + CodeEmitInfo* info = state_for(x, x->state_before()); + stub = new InvalidateProfileInliningStub(obj.result(), x->klass(), info); } else { + CodeEmitInfo* info_for_exception = state_for(x); stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception); } LIR_Opr tmp1 = FrameMap::G1_oop_opr; LIR_Opr tmp2 = FrameMap::G3_oop_opr; LIR_Opr tmp3 = FrameMap::G4_oop_opr; __ checkcast(out_reg, obj.result(), x->klass(), tmp1, tmp2, tmp3, - x->direct_compare(), info_for_exception, patching_info, stub, + x->direct_compare(), patching_info, stub, x->profiled_method(), x->profiled_bci()); } --- old/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp 2011-12-15 13:30:21.593189229 +0100 +++ new/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp 2011-12-15 13:30:21.393922787 +0100 @@ -701,6 +701,27 @@ } break; + case invalidate_profile_inlining_id: + { + OopMap* oop_map = save_live_registers(sasm); + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, invalidate_profile_inlining), G4, G5); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + restore_live_registers(sasm); + __ restore(); + // Invalid inlining is detected before the body of the + // inlinined method is executed so we need deoptimization to + // trigger "re-execution" of the method call. + __ br(Assembler::always, false, Assembler::pt, deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); + __ delayed()->nop(); + } + break; + case slow_subtype_check_id: { // Support for uint StubRoutine::partial_subtype_check( Klass sub, Klass super ); // Arguments : --- old/src/cpu/sparc/vm/c1_globals_sparc.hpp 2011-12-15 13:30:23.041062757 +0100 +++ new/src/cpu/sparc/vm/c1_globals_sparc.hpp 2011-12-15 13:30:22.847567031 +0100 @@ -68,4 +68,7 @@ define_pd_global(intx, SafepointPollOffset, 0 ); +define_pd_global(intx, C1ProfileHotFrequency, 5000 ); +define_pd_global(intx, C1ProfileWarmFrequency, 500 ); + #endif // CPU_SPARC_VM_C1_GLOBALS_SPARC_HPP --- old/src/cpu/x86/vm/c1_CodeStubs_x86.cpp 2011-12-15 13:30:24.459569395 +0100 +++ new/src/cpu/x86/vm/c1_CodeStubs_x86.cpp 2011-12-15 13:30:24.253723383 +0100 @@ -401,6 +401,17 @@ debug_only(__ should_not_reach_here()); } +void InvalidateProfileInliningStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + + ce->store_parameter(_obj->as_register(), 0); + assert(_klass->is_loaded(), "class should be loaded"); + ce->store_parameter(_klass->constant_encoding(), 1); + + __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::invalidate_profile_inlining_id))); + ce->add_call_info_here(_info); + debug_only(__ should_not_reach_here()); +} void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { assert(__ rsp_offset() == 0, "frame size should be fixed"); --- old/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp 2011-12-15 13:30:25.913554517 +0100 +++ new/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp 2011-12-15 13:30:25.702863551 +0100 @@ -2851,6 +2851,19 @@ assert(!os::is_MP() || (__ offset() + NativeCall::displacement_offset) % BytesPerWord == 0, "must be aligned"); + + if (op->info()->is_profiled_call()) { + // The static call stub is not used for standard ic calls (a + // transition stub is allocated instead for calls to the + // interpreter). We emit the static call stub for profiled call + // sites anyway because the runtime locates the profile call stub + // by first looking up the static call stub and then walking over + // it to the profile call stub. + emit_static_call_stub(); + // Emit the profile call stub right behind the static call stub + emit_profile_call_stub(op->info()->method(), op->info()->stack()->bci(), SharedRuntime::get_resolve_profile_call_stub()); + } + __ call(AddressLiteral(op->addr(), rh)); add_call_info(code_offset(), op->info()); } @@ -2889,6 +2902,56 @@ __ end_a_stub(); } +void LIR_Assembler::emit_profile_call_stub(ciMethod* method, int bci, address dest) { + ciMethodData* md = method->method_data(); + if (md == NULL) { + bailout("out of memory building methodDataOop"); + return; + } + address call_pc = __ pc(); + address stub = __ start_a_stub(profile_call_stub_size); + if (stub == NULL) { + bailout("profile call stub overflow"); + return; + } + + int start = __ offset(); + address off_addr = __ pc(); + + // The runtime needs the starting address of the profile call stub + // (to make the call site jump to the stub) and the location of the + // first jump in the stub (to make it branch to the callee). The + // starting address is found by first looking up the static call + // stub and then finding the profile call stub right behind + // it. Finding the jump is tricky because the code emitted before it + // depends on runtime conditions. Here, we first emit an integer (0) + // that we change to contain the offset of the jump within the stub + // when the jump is emitted and the offset is known. Locating the + // jump can then be done from the runtime by reading this offset and + // adding it to the address of the start of the stub. + __ a_long(0); + + ciProfileData* data = md->bci_to_data(bci); + assert(data->is_CounterData(), "need CounterData for calls"); + + Register tmp = NOT_LP64(rdi) LP64_ONLY(r12); + + __ movoop(tmp, md->constant_encoding()); + Address counter_addr(tmp, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ addl(counter_addr, DataLayout::counter_increment); + __ cmpl(counter_addr, C1ProfileCompileThreshold); + Label L; + __ jcc(Assembler::greater, L); + + *(jint*)off_addr = __ offset() - start; + __ jump(RuntimeAddress(__ pc())); + + __ bind(L); + __ jump(RuntimeAddress(dest)); + + assert(__ offset() - start <= profile_call_stub_size, "stub too big"); + __ end_a_stub(); +} void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) { assert(exceptionOop->as_register() == rax, "must match"); --- old/src/cpu/x86/vm/c1_LIRAssembler_x86.hpp 2011-12-15 13:30:27.487159577 +0100 +++ new/src/cpu/x86/vm/c1_LIRAssembler_x86.hpp 2011-12-15 13:30:27.286949700 +0100 @@ -54,6 +54,7 @@ void store_parameter(jobject c, int offset_from_esp_in_words); enum { call_stub_size = NOT_LP64(15) LP64_ONLY(28), + profile_call_stub_size = NOT_LP64(42) LP64_ONLY(51), exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175), deopt_handler_size = NOT_LP64(10) LP64_ONLY(17) }; --- old/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp 2011-12-15 13:30:28.872000612 +0100 +++ new/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp 2011-12-15 13:30:28.665669083 +0100 @@ -1147,14 +1147,20 @@ } obj.load_item(); - // info for exceptions - CodeEmitInfo* info_for_exception = state_for(x); - CodeStub* stub; + if (x->is_incompatible_class_change_check()) { assert(patching_info == NULL, "can't patch this"); + // info for exceptions + CodeEmitInfo* info_for_exception = state_for(x); stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception); + } else if (x->is_profile_inlining()) { + assert(patching_info == NULL, "can't patch this"); + CodeEmitInfo* info = state_for(x, x->state_before()); + stub = new InvalidateProfileInliningStub(obj.result(), x->klass(), info); } else { + // info for exceptions + CodeEmitInfo* info_for_exception = state_for(x); stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception); } LIR_Opr reg = rlock_result(x); @@ -1164,7 +1170,7 @@ } __ checkcast(reg, obj.result(), x->klass(), new_register(objectType), new_register(objectType), tmp3, - x->direct_compare(), info_for_exception, patching_info, stub, + x->direct_compare(), patching_info, stub, x->profiled_method(), x->profiled_bci()); } --- old/src/cpu/x86/vm/c1_Runtime1_x86.cpp 2011-12-15 13:30:30.455356648 +0100 +++ new/src/cpu/x86/vm/c1_Runtime1_x86.cpp 2011-12-15 13:30:30.260293926 +0100 @@ -1353,6 +1353,27 @@ } break; + case invalidate_profile_inlining_id: + { StubFrame f(sasm, "invalidate_profile_inlining", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, 3); + + f.load_argument(1, rax); + f.load_argument(0, rbx); + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, invalidate_profile_inlining), rbx, rax); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ leave(); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + // Invalid inlining is detected before the body of the + // inlinined method is executed so we need deoptimization to + // trigger "re-execution" of the method call. + __ jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + } + break; + case slow_subtype_check_id: { // Typical calling sequence: --- old/src/cpu/x86/vm/c1_globals_x86.hpp 2011-12-15 13:30:31.968338985 +0100 +++ new/src/cpu/x86/vm/c1_globals_x86.hpp 2011-12-15 13:30:31.787838249 +0100 @@ -67,4 +67,7 @@ define_pd_global(intx, SafepointPollOffset, 256 ); +define_pd_global(intx, C1ProfileHotFrequency, 10000); +define_pd_global(intx, C1ProfileWarmFrequency, 1000 ); + #endif // CPU_X86_VM_C1_GLOBALS_X86_HPP --- old/src/share/vm/c1/c1_Canonicalizer.cpp 2011-12-15 13:30:33.404974955 +0100 +++ new/src/share/vm/c1/c1_Canonicalizer.cpp 2011-12-15 13:30:33.216438781 +0100 @@ -546,7 +546,7 @@ void Canonicalizer::do_NewObjectArray (NewObjectArray* x) {} void Canonicalizer::do_NewMultiArray (NewMultiArray* x) {} void Canonicalizer::do_CheckCast (CheckCast* x) { - if (x->klass()->is_loaded()) { + if (!x->is_profile_inlining() && x->klass()->is_loaded()) { Value obj = x->obj(); ciType* klass = obj->exact_type(); if (klass == NULL) klass = obj->declared_type(); --- old/src/share/vm/c1/c1_CodeStubs.hpp 2011-12-15 13:30:34.851983693 +0100 +++ new/src/share/vm/c1/c1_CodeStubs.hpp 2011-12-15 13:30:34.660070563 +0100 @@ -474,6 +474,28 @@ #endif // PRODUCT }; +class InvalidateProfileInliningStub: public CodeStub { + private: + CodeEmitInfo* _info; + LIR_Opr _obj; + ciKlass* _klass; + + public: + InvalidateProfileInliningStub(LIR_Opr obj, ciKlass* k, CodeEmitInfo* info): + _obj(obj), _klass(k), _info(info) + { + } + + virtual void emit_code(LIR_Assembler* e); + virtual CodeEmitInfo* info() const { return _info; } + virtual void visit(LIR_OpVisitState* visitor) { + visitor->do_input(_obj); + visitor->do_slow_case(_info); + } +#ifndef PRODUCT + virtual void print_name(outputStream* out) const { out->print("InvalidateProfileInliningStub"); } +#endif // PRODUCT +}; class ArrayStoreExceptionStub: public SimpleExceptionStub { --- old/src/share/vm/c1/c1_Compilation.cpp 2011-12-15 13:30:36.320753564 +0100 +++ new/src/share/vm/c1/c1_Compilation.cpp 2011-12-15 13:30:36.128349064 +0100 @@ -253,7 +253,7 @@ locs_buffer_size / sizeof(relocInfo)); code->initialize_consts_size(Compilation::desired_max_constant_size()); // Call stubs + two deopt handlers (regular and MH) + exception handler - int stub_size = (call_stub_estimate * LIR_Assembler::call_stub_size) + + int stub_size = (call_stub_estimate * (LIR_Assembler::call_stub_size + (C1ProfileInlining ? LIR_Assembler::profile_call_stub_size : 0))) + LIR_Assembler::exception_handler_size + (2 * LIR_Assembler::deopt_handler_size); if (stub_size >= code->insts_capacity()) return false; --- old/src/share/vm/c1/c1_GraphBuilder.cpp 2011-12-15 13:30:37.828668050 +0100 +++ new/src/share/vm/c1/c1_GraphBuilder.cpp 2011-12-15 13:30:37.596662741 +0100 @@ -1691,7 +1691,58 @@ } code = Bytecodes::_invokespecial; } + + Value dynamic_receiver = NULL; + + if (C1TypeProfileInlining && + code != Bytecodes::_invokestatic && + code != Bytecodes::_invokespecial && + code != Bytecodes::_invokedynamic && + (code != Bytecodes::_invokevirtual || !target->is_loaded() || !target->is_final_method())) { + + ciInstanceKlass* k = method()->profile_single_hot_receiver(bci()); + + if (k != NULL) { + // Profiling reveals a single klass at the call. Attempt inlining (with a guard). + ValueStack* state_before = copy_state_before(); + + klass = k; + + ciMethod* receiver_method = target->resolve_invoke(calling_klass, k); + + assert(klass->is_loaded() && target->is_loaded(), "should be"); + assert(!target->is_static(), "should be"); + + int index = state()->stack_size() - (target->arg_size_no_receiver() + 1); + Value receiver = state()->stack_at(index); + // Insert a guard (specialized checkcast instruction) + CheckCast* c = new CheckCast(k, receiver, state_before); + c->set_profile_inlining(); + c->set_direct_compare(true); + dynamic_receiver = append_split(c); + + cha_monomorphic_target = receiver_method; + code = Bytecodes::_invokespecial; + + if (TraceC1ProfileInlining) { + ttyLocker ttyl; + tty->print("C1ProfileInlining: virtual method inlining "); + _compilation->method()->print_short_name(tty); + tty->print(" in "); + method()->print_short_name(tty); + tty->print(" at bci = %d", bci()); + tty->print(" to "); + receiver_method->print_short_name(tty); + tty->print(" expects class "); + k->print_name(); + tty->cr(); + } + } + } + // check if we could do inlining + bool do_profiling = false; + bool inlining_attempted = false; if (!PatchALot && Inline && klass->is_loaded() && (klass->is_initialized() || klass->is_interface() && target->holder()->is_initialized()) && target->will_link(klass, callee_holder, code)) { @@ -1709,7 +1760,8 @@ } if (!success) { // static binding => check if callee is ok - success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL)); + inlining_attempted = true; + success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL), C1ProfileInlining ? &do_profiling : NULL, dynamic_receiver); } CHECK_BAILOUT(); @@ -1732,6 +1784,28 @@ } } } + + if (C1TypeProfileInlining && (code == Bytecodes::_invokevirtual || code == Bytecodes::_invokeinterface) && !inlining_attempted) { + if (method()->method_data()) { + ciVirtualCallData* call = (ciVirtualCallData*)method()->method_data()->bci_to_data(bci())->as_VirtualCallData(); + + if (!call->is_hot() && !call->is_warm() && call->receiver(1) == NULL) { + do_profiling = true; + + if (TraceC1ProfileInlining) { + ttyLocker ttyl; + tty->print("C1ProfileInlining: virtual method profiling "); + _compilation->method()->print_short_name(tty); + tty->print(" in "); + method()->print_short_name(tty); + tty->print_cr(" at bci = %d", bci()); + } + } + } + } + + assert(!do_profiling || C1ProfileInlining, "profiling only if C1InlineFrequent"); + // If we attempted an inline which did not succeed because of a // bailout during construction of the callee graph, the entire // compilation has to be aborted. This is fairly rare and currently @@ -1800,8 +1874,8 @@ profile_call(recv, target_klass); } } - - Invoke* result = new Invoke(code, result_type, recv, args, vtable_index, target, state_before); + + Invoke* result = new Invoke(code, result_type, recv, args, vtable_index, target, state_before, do_profiling); // push result append_split(result); @@ -3034,7 +3108,7 @@ } -bool GraphBuilder::try_inline(ciMethod* callee, bool holder_known) { +bool GraphBuilder::try_inline(ciMethod* callee, bool holder_known, bool* do_profiling, Value dynamic_receiver) { // Clear out any existing inline bailout condition clear_inline_bailout(); @@ -3056,7 +3130,7 @@ } else if (callee->is_abstract()) { INLINE_BAILOUT("abstract") } else { - return try_inline_full(callee, holder_known); + return try_inline_full(callee, holder_known, do_profiling, dynamic_receiver); } } @@ -3405,7 +3479,7 @@ } -bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known, BlockBegin* cont_block) { +bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known, bool* do_profiling, Value dynamic_receiver, BlockBegin* cont_block) { assert(!callee->is_native(), "callee must not be native"); if (CompilationPolicy::policy()->should_not_inline(compilation()->env(), callee)) { INLINE_BAILOUT("inlining prohibited by policy"); @@ -3438,7 +3512,32 @@ } else { if (inline_level() > MaxInlineLevel ) INLINE_BAILOUT("too-deep inlining"); if (recursive_inline_level(callee) > MaxRecursiveInlineLevel) INLINE_BAILOUT("too-deep recursive inlining"); - if (callee->code_size_for_inlining() > max_inline_size() ) INLINE_BAILOUT("callee is too large"); + + if (callee->code_size_for_inlining() > max_inline_size()) { + + if (do_profiling != NULL && callee->code_size() <= C1ProfileInlineSize) { + bool warm = false; + bool do_inlining = method()->profile_is_hot(bci(), warm); + + if (!do_inlining) { + *do_profiling = !warm; + INLINE_BAILOUT("callee is too large and too infrequent"); + } else if (TraceC1ProfileInlining) { + ttyLocker ttyl; + tty->print("C1ProfileInlining: method inlining of "); + callee->print_short_name(tty); + tty->print(" from "); + method()->print_short_name(tty); + tty->print(" at bci = %d", bci()); + tty->print(" in "); + _compilation->method()->print_short_name(tty); + tty->cr(); + } + + } else { + INLINE_BAILOUT(" callee is too large"); + } + } // don't inline throwable methods unless the inlining tree is rooted in a throwable class if (callee->name() == ciSymbol::object_initializer_name() && @@ -3536,9 +3635,18 @@ ValueStack* callee_state = state(); ValueStack* caller_state = state()->caller_state(); { int i = args_base; + bool do_replace = dynamic_receiver != NULL; while (i < caller_state->stack_size()) { const int par_no = i - args_base; Value arg = caller_state->stack_at_inc(i); + if (do_replace) { + // Profiling data drove us to inline with a single expected + // receiver class. Offer more accurate type information in + // inlinee by replacing receiver with result of guard + // instruction (= expected receiver class). + arg = dynamic_receiver; + do_replace = false; + } // NOTE: take base() of arg->type() to avoid problems storing // constants store_local(callee_state, arg, arg->type()->base(), par_no); @@ -3673,7 +3781,7 @@ // Get an adapter for the MethodHandle. ciMethod* method_handle_adapter = method_handle->get_method_handle_adapter(); if (method_handle_adapter != NULL) { - return try_inline(method_handle_adapter, /*holder_known=*/ true); + return try_inline(method_handle_adapter, /*holder_known=*/ true, NULL, NULL); } } else if (receiver->as_CheckCast()) { // Match MethodHandle.selectAlternative idiom @@ -3716,7 +3824,7 @@ // Parse first adapter _last = _block = one; - if (!try_inline_full(mh1_adapter, /*holder_known=*/ true, end)) { + if (!try_inline_full(mh1_adapter, /*holder_known=*/ true, NULL, NULL, end)) { restore_inline_cleanup_info(); block()->clear_end(); // remove appended iff return false; @@ -3725,7 +3833,7 @@ // Parse second adapter _last = _block = two; _state = state_before; - if (!try_inline_full(mh2_adapter, /*holder_known=*/ true, end)) { + if (!try_inline_full(mh2_adapter, /*holder_known=*/ true, NULL, NULL, end)) { restore_inline_cleanup_info(); block()->clear_end(); // remove appended iff return false; @@ -3753,7 +3861,7 @@ // Get an adapter for the MethodHandle. ciMethod* method_handle_adapter = method_handle->get_invokedynamic_adapter(); if (method_handle_adapter != NULL) { - if (try_inline(method_handle_adapter, /*holder_known=*/ true)) { + if (try_inline(method_handle_adapter, /*holder_known=*/ true, NULL, NULL)) { // Add a dependence for invalidation of the optimization. if (!call_site->is_constant_call_site()) { dependency_recorder()->assert_call_site_target_value(call_site, method_handle); --- old/src/share/vm/c1/c1_GraphBuilder.hpp 2011-12-15 13:30:39.455390483 +0100 +++ new/src/share/vm/c1/c1_GraphBuilder.hpp 2011-12-15 13:30:39.264432071 +0100 @@ -337,9 +337,9 @@ void fill_sync_handler(Value lock, BlockBegin* sync_handler, bool default_handler = false); // inliners - bool try_inline( ciMethod* callee, bool holder_known); + bool try_inline( ciMethod* callee, bool holder_known, bool* do_profiling, Value dynamic_receiver); bool try_inline_intrinsics(ciMethod* callee); - bool try_inline_full( ciMethod* callee, bool holder_known, BlockBegin* cont_block = NULL); + bool try_inline_full( ciMethod* callee, bool holder_known, bool* do_profiling, Value dynamic_receiver, BlockBegin* cont_block = NULL); bool try_inline_jsr(int jsr_dest_bci); // JSR 292 support --- old/src/share/vm/c1/c1_IR.cpp 2011-12-15 13:30:40.853832860 +0100 +++ new/src/share/vm/c1/c1_IR.cpp 2011-12-15 13:30:40.660151623 +0100 @@ -187,7 +187,8 @@ , _oop_map(NULL) , _stack(stack) , _exception_handlers(exception_handlers) - , _is_method_handle_invoke(false) { + , _is_method_handle_invoke(false) + , _is_profiled_call(false) { assert(_stack != NULL, "must be non null"); } @@ -198,7 +199,8 @@ , _scope_debug_info(NULL) , _oop_map(NULL) , _stack(stack == NULL ? info->_stack : stack) - , _is_method_handle_invoke(info->_is_method_handle_invoke) { + , _is_method_handle_invoke(info->_is_method_handle_invoke) + , _is_profiled_call(false) { // deep copy of exception handlers if (info->_exception_handlers != NULL) { @@ -210,7 +212,7 @@ void CodeEmitInfo::record_debug_info(DebugInformationRecorder* recorder, int pc_offset) { // record the safepoint before recording the debug info for enclosing scopes recorder->add_safepoint(pc_offset, _oop_map->deep_copy()); - _scope_debug_info->record_debug_info(recorder, pc_offset, true/*topmost*/, _is_method_handle_invoke); + _scope_debug_info->record_debug_info(recorder, pc_offset, true/*topmost*/, _is_method_handle_invoke, is_profiled_call()); recorder->end_safepoint(pc_offset); } --- old/src/share/vm/c1/c1_IR.hpp 2011-12-15 13:30:42.336249928 +0100 +++ new/src/share/vm/c1/c1_IR.hpp 2011-12-15 13:30:42.132843555 +0100 @@ -226,7 +226,7 @@ //Whether we should reexecute this bytecode for deopt bool should_reexecute(); - void record_debug_info(DebugInformationRecorder* recorder, int pc_offset, bool topmost, bool is_method_handle_invoke = false) { + void record_debug_info(DebugInformationRecorder* recorder, int pc_offset, bool topmost, bool is_method_handle_invoke = false, bool is_profiled_call = false) { if (caller() != NULL) { // Order is significant: Must record caller first. caller()->record_debug_info(recorder, pc_offset, false/*topmost*/); @@ -237,7 +237,7 @@ // reexecute allowed only for the topmost frame bool reexecute = topmost ? should_reexecute() : false; bool return_oop = false; // This flag will be ignored since it used only for C2 with escape analysis. - recorder->describe_scope(pc_offset, scope()->method(), bci(), reexecute, is_method_handle_invoke, return_oop, locvals, expvals, monvals); + recorder->describe_scope(pc_offset, scope()->method(), bci(), reexecute, is_method_handle_invoke, is_profiled_call, return_oop, locvals, expvals, monvals); } }; @@ -251,6 +251,7 @@ OopMap* _oop_map; ValueStack* _stack; // used by deoptimization (contains also monitors bool _is_method_handle_invoke; // true if the associated call site is a MethodHandle call site. + bool _is_profiled_call; FrameMap* frame_map() const { return scope()->compilation()->frame_map(); } Compilation* compilation() const { return scope()->compilation(); } @@ -275,6 +276,9 @@ bool is_method_handle_invoke() const { return _is_method_handle_invoke; } void set_is_method_handle_invoke(bool x) { _is_method_handle_invoke = x; } + + bool is_profiled_call() const { return _is_profiled_call; } + void set_profiled_call(bool v) { _is_profiled_call = v; } }; --- old/src/share/vm/c1/c1_Instruction.cpp 2011-12-15 13:30:43.797580851 +0100 +++ new/src/share/vm/c1/c1_Instruction.cpp 2011-12-15 13:30:43.575495578 +0100 @@ -237,12 +237,10 @@ } ciType* CheckCast::exact_type() const { - if (klass()->is_instance_klass()) { - ciInstanceKlass* ik = (ciInstanceKlass*)klass(); - if (ik->is_loaded() && ik->is_final()) { - return ik; - } - } + if (direct_compare()) return klass(); + + assert(!klass()->is_instance_klass() || !klass()->is_loaded() || !((ciInstanceKlass*)klass())->is_final(), "should be a direct compare"); + return NULL; } @@ -341,13 +339,15 @@ Invoke::Invoke(Bytecodes::Code code, ValueType* result_type, Value recv, Values* args, - int vtable_index, ciMethod* target, ValueStack* state_before) + int vtable_index, ciMethod* target, ValueStack* state_before, + bool is_profiled) : StateSplit(result_type, state_before) , _code(code) , _recv(recv) , _args(args) , _vtable_index(vtable_index) , _target(target) + , _is_profiled(is_profiled) { set_flag(TargetIsLoadedFlag, target->is_loaded()); set_flag(TargetIsFinalFlag, target_is_loaded() && target->is_final_method()); --- old/src/share/vm/c1/c1_Instruction.hpp 2011-12-15 13:30:45.244697853 +0100 +++ new/src/share/vm/c1/c1_Instruction.hpp 2011-12-15 13:30:45.032076895 +0100 @@ -335,6 +335,7 @@ UnorderedIsTrueFlag, NeedsPatchingFlag, ThrowIncompatibleClassChangeErrorFlag, + ProfileInliningFlag, ProfileMDOFlag, IsLinkedInBlockFlag, InstructionLastFlag @@ -1137,10 +1138,13 @@ int _vtable_index; ciMethod* _target; + bool _is_profiled; + public: // creation Invoke(Bytecodes::Code code, ValueType* result_type, Value recv, Values* args, - int vtable_index, ciMethod* target, ValueStack* state_before); + int vtable_index, ciMethod* target, ValueStack* state_before, + bool is_profiled); // accessors Bytecodes::Code code() const { return _code; } @@ -1154,6 +1158,8 @@ ciType* declared_type() const; + bool is_profiled() const { return _is_profiled; } + // Returns false if target is not loaded bool target_is_final() const { return check_flag(TargetIsFinalFlag); } bool target_is_loaded() const { return check_flag(TargetIsLoadedFlag); } @@ -1339,8 +1345,17 @@ return check_flag(ThrowIncompatibleClassChangeErrorFlag); } + void set_profile_inlining() { + set_flag(ProfileInliningFlag, true); + } + bool is_profile_inlining() const { + return check_flag(ProfileInliningFlag); + } + ciType* declared_type() const; ciType* exact_type() const; + + virtual bool can_trap() const { return !is_profile_inlining(); } }; --- old/src/share/vm/c1/c1_LIR.cpp 2011-12-15 13:30:46.733178223 +0100 +++ new/src/share/vm/c1/c1_LIR.cpp 2011-12-15 13:30:46.543865947 +0100 @@ -351,7 +351,7 @@ LIR_OpTypeCheck::LIR_OpTypeCheck(LIR_Code code, LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, - bool fast_check, CodeEmitInfo* info_for_exception, CodeEmitInfo* info_for_patch, + bool fast_check, CodeEmitInfo* info_for_patch, CodeStub* stub) : LIR_Op(code, result, NULL) @@ -364,18 +364,12 @@ , _fast_check(fast_check) , _stub(stub) , _info_for_patch(info_for_patch) - , _info_for_exception(info_for_exception) + , _info_for_exception(NULL) , _profiled_method(NULL) , _profiled_bci(-1) , _should_profile(false) { - if (code == lir_checkcast) { - assert(info_for_exception != NULL, "checkcast throws exceptions"); - } else if (code == lir_instanceof) { - assert(info_for_exception == NULL, "instanceof throws no exceptions"); - } else { - ShouldNotReachHere(); - } + assert (code == lir_checkcast ||code == lir_instanceof, "expects checkcast or instanceof only"); } @@ -1371,10 +1365,10 @@ void LIR_List::checkcast (LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, - CodeEmitInfo* info_for_exception, CodeEmitInfo* info_for_patch, CodeStub* stub, + CodeEmitInfo* info_for_patch, CodeStub* stub, ciMethod* profiled_method, int profiled_bci) { LIR_OpTypeCheck* c = new LIR_OpTypeCheck(lir_checkcast, result, object, klass, - tmp1, tmp2, tmp3, fast_check, info_for_exception, info_for_patch, stub); + tmp1, tmp2, tmp3, fast_check, info_for_patch, stub); if (profiled_method != NULL) { c->set_profiled_method(profiled_method); c->set_profiled_bci(profiled_bci); @@ -1384,7 +1378,7 @@ } void LIR_List::instanceof(LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, CodeEmitInfo* info_for_patch, ciMethod* profiled_method, int profiled_bci) { - LIR_OpTypeCheck* c = new LIR_OpTypeCheck(lir_instanceof, result, object, klass, tmp1, tmp2, tmp3, fast_check, NULL, info_for_patch, NULL); + LIR_OpTypeCheck* c = new LIR_OpTypeCheck(lir_instanceof, result, object, klass, tmp1, tmp2, tmp3, fast_check, info_for_patch, NULL); if (profiled_method != NULL) { c->set_profiled_method(profiled_method); c->set_profiled_bci(profiled_bci); --- old/src/share/vm/c1/c1_LIR.hpp 2011-12-15 13:30:48.280465543 +0100 +++ new/src/share/vm/c1/c1_LIR.hpp 2011-12-15 13:30:48.024136885 +0100 @@ -1517,7 +1517,7 @@ public: LIR_OpTypeCheck(LIR_Code code, LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, - CodeEmitInfo* info_for_exception, CodeEmitInfo* info_for_patch, CodeStub* stub); + CodeEmitInfo* info_for_patch, CodeStub* stub); LIR_OpTypeCheck(LIR_Code code, LIR_Opr object, LIR_Opr array, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, CodeEmitInfo* info_for_exception); @@ -2104,7 +2104,7 @@ void checkcast (LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, - CodeEmitInfo* info_for_exception, CodeEmitInfo* info_for_patch, CodeStub* stub, + CodeEmitInfo* info_for_patch, CodeStub* stub, ciMethod* profiled_method, int profiled_bci); // methodDataOop profiling void profile_call(ciMethod* method, int bci, LIR_Opr mdo, LIR_Opr recv, LIR_Opr t1, ciKlass* cha_klass) { --- old/src/share/vm/c1/c1_LIRAssembler.cpp 2011-12-15 13:30:49.817560770 +0100 +++ new/src/share/vm/c1/c1_LIRAssembler.cpp 2011-12-15 13:30:49.608582608 +0100 @@ -443,8 +443,19 @@ align_call(op->code()); } - // emit the static call stub stuff out of line - emit_static_call_stub(); + if (op->code() != lir_icvirtual_call) { + // emit the static call stub stuff out of line + emit_static_call_stub(); + } + + address profile_dest = NULL; + // emit the profile call stub right behind the static call stub + if (op->info()->is_profiled_call() && (op->code() == lir_static_call || op->code() == lir_optvirtual_call)) { + profile_dest = op->code() == lir_static_call ? SharedRuntime::get_resolve_static_profile_call_stub() : SharedRuntime::get_resolve_profile_call_stub(); + } + if (profile_dest) { + emit_profile_call_stub(op->info()->method(), op->info()->stack()->bci(), profile_dest); + } switch (op->code()) { case lir_static_call: --- old/src/share/vm/c1/c1_LIRAssembler.hpp 2011-12-15 13:30:51.297733499 +0100 +++ new/src/share/vm/c1/c1_LIRAssembler.hpp 2011-12-15 13:30:51.100658627 +0100 @@ -138,6 +138,7 @@ // stubs void emit_slow_case_stubs(); void emit_static_call_stub(); + void emit_profile_call_stub(ciMethod* method, int bci, address dest); void emit_code_stub(CodeStub* op); void add_call_info_here(CodeEmitInfo* info) { add_call_info(code_offset(), info); } --- old/src/share/vm/c1/c1_LIRGenerator.cpp 2011-12-15 13:30:52.711387404 +0100 +++ new/src/share/vm/c1/c1_LIRGenerator.cpp 2011-12-15 13:30:52.504868777 +0100 @@ -2743,6 +2743,8 @@ __ move(FrameMap::stack_pointer(), FrameMap::method_handle_invoke_SP_save_opr()); } + info->set_profiled_call(x->is_profiled()); + switch (x->code()) { case Bytecodes::_invokestatic: __ call_static(target, result_register, --- old/src/share/vm/c1/c1_Runtime1.cpp 2011-12-15 13:30:54.288817762 +0100 +++ new/src/share/vm/c1/c1_Runtime1.cpp 2011-12-15 13:30:54.087869651 +0100 @@ -640,6 +640,52 @@ SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_IncompatibleClassChangeError()); JRT_END +JRT_ENTRY(void, Runtime1::invalidate_profile_inlining(JavaThread* thread, oopDesc* obj, klassOopDesc* klass)) +{ + // The compiler inlines a frequent virtual method if profile data + // suggests a single receiver klass. Correctness is enforced with a + // compiled guard. We get here if the compiled code detects an + // invalid inlining decision. We record the new class in the + // profiling data to prevent the same inlining decision, make the + // nmethod not entrant and then deoptimize the caller. + + ResourceMark rm; + + Handle receiver(obj); + KlassHandle kh(klass); + + assert(receiver->klass() != kh(), "should be different klasses"); + + vframeStream vfst(thread); + methodHandle m = methodHandle(vfst.method()); + int bci = vfst.bci(); + + VirtualCallData* profile = m->method_data()->bci_to_data(bci)->as_VirtualCallData(); + + profile->new_receiver(receiver); + + RegisterMap reg_map(thread, false); + frame runtime_frame = thread->last_frame(); + frame caller_frame = runtime_frame.sender(®_map); + nmethod* nm = CodeCache::find_nmethod(caller_frame.pc()); + + if (nm != NULL) { + nm->make_not_entrant(); + } + + Deoptimization::deoptimize_frame(thread, caller_frame.id()); + + if (TraceC1ProfileInlining) { + ttyLocker ttyl; + tty->print("C1ProfileInlining: invalidate profile deopt"); + nm->method()->print_short_name(tty); + tty->print(" because of "); + m->print_short_name(tty); + tty->print(" at bci = %d.", bci); + tty->print_cr(" class is %s. Expected %s", Klass::cast(receiver->klass())->internal_name(), kh->internal_name()); + } +} +JRT_END JRT_ENTRY_NO_ASYNC(void, Runtime1::monitorenter(JavaThread* thread, oopDesc* obj, BasicObjectLock* lock)) NOT_PRODUCT(_monitorenter_slowcase_cnt++;) --- old/src/share/vm/c1/c1_Runtime1.hpp 2011-12-15 13:30:55.754894038 +0100 +++ new/src/share/vm/c1/c1_Runtime1.hpp 2011-12-15 13:30:55.564747517 +0100 @@ -70,6 +70,7 @@ stub(g1_post_barrier_slow) \ stub(fpu2long_stub) \ stub(counter_overflow) \ + stub(invalidate_profile_inlining) \ last_entry(number_of_ids) #define DECLARE_STUB_ID(x) x ## _id , @@ -150,6 +151,8 @@ static void throw_incompatible_class_change_error(JavaThread* thread); static void throw_array_store_exception(JavaThread* thread, oopDesc* object); + static void invalidate_profile_inlining(JavaThread* thread, oopDesc* obj, klassOopDesc* klass); + static void monitorenter(JavaThread* thread, oopDesc* obj, BasicObjectLock* lock); static void monitorexit (JavaThread* thread, BasicObjectLock* lock); --- old/src/share/vm/c1/c1_globals.hpp 2011-12-15 13:30:57.146847611 +0100 +++ new/src/share/vm/c1/c1_globals.hpp 2011-12-15 13:30:56.958619540 +0100 @@ -329,6 +329,27 @@ develop(bool, PrintCFGToFile, false, \ "print control flow graph to a separate file during compilation") \ \ + product(bool, C1ProfileInlining, true, \ + "Inline large methods when frequently called") \ + \ + develop(bool, TraceC1ProfileInlining, false, \ + "Trace inlining of large frequently called methods") \ + \ + product(bool, C1TypeProfileInlining, true, \ + "type profile based inlining") \ + \ + product_pd(intx, C1ProfileHotFrequency, \ + "call count per second for hot call sites") \ + \ + product_pd(intx, C1ProfileWarmFrequency, \ + "call count per second for warm call sites") \ + \ + product(intx, C1ProfileCompileThreshold, 10000, \ + "threshold at which recompilation is triggered") \ + \ + product(intx, C1ProfileInlineSize, 200, \ + "maximum bytecode size of a method to be inlined") \ + // Read default values for c1 globals --- old/src/share/vm/ci/ciEnv.cpp 2011-12-15 13:30:58.616281324 +0100 +++ new/src/share/vm/ci/ciEnv.cpp 2011-12-15 13:30:58.417337999 +0100 @@ -1062,7 +1062,9 @@ if (task() != NULL) task()->set_code(nm); if (entry_bci == InvocationEntryBci) { - if (TieredCompilation) { + bool replace_old = TieredCompilation; + COMPILER1_PRESENT(replace_old = replace_old || C1ProfileInlining); + if (replace_old) { // If there is an old version we're done with it nmethod* old = method->code(); if (TraceMethodReplacement && old != NULL) { --- old/src/share/vm/ci/ciMethod.cpp 2011-12-15 13:31:00.085042690 +0100 +++ new/src/share/vm/ci/ciMethod.cpp 2011-12-15 13:30:59.893705135 +0100 @@ -53,6 +53,9 @@ #include "ci/ciTypeFlow.hpp" #include "oops/methodOop.hpp" #endif +#ifdef COMPILER1 +#include "runtime/compilationPolicy.hpp" +#endif #ifdef SHARK #include "ci/ciTypeFlow.hpp" #include "oops/methodOop.hpp" @@ -851,6 +854,12 @@ Thread* my_thread = JavaThread::current(); methodHandle h_m(my_thread, get_methodOop()); +#ifdef COMPILER1 + if (C1ProfileInlining) { + ensure_method_data(); + } +#endif + if (h_m()->method_data() != NULL) { _method_data = CURRENT_ENV->get_object(h_m()->method_data())->as_method_data(); _method_data->load_data(); @@ -1231,3 +1240,62 @@ st->print(" loaded=false"); } } + +#ifdef COMPILER1 +// The compiler works on a copy of the MDO profile data. If we +// identify a call site as hot, we want this state to stick across +// compilations so we have to mark the methodOop's MDO. +void ciMethod::profile_force_mark_hot(int bci) { + VM_ENTRY_MARK; + CounterData* p = get_methodOop()->method_data()->bci_to_data(bci)->as_CounterData(); + p->set_hot(); +} + +bool ciMethod::profile_is_hot_helper(ciProfileData* data, int bci, bool &warm, bool full) { + CounterData* profile = data->as_CounterData(); + bool is_hot = profile->is_hot(); + if (!is_hot) { + int site_count = profile->count(); + if (full && site_count < C1ProfileCompileThreshold) { + warm = false; + return false; + } + is_hot = SimpleProfiledCompPolicy::is_hot(site_count, profile->timestamp()); + if (is_hot) { + profile->set_hot(); + profile_force_mark_hot(bci); + } else { + warm = profile->is_warm(); + } + } + return is_hot; +} + +bool ciMethod::profile_is_hot(int bci, bool& warm) { + if (method_data() == NULL) { + return false; + } + ciProfileData* data = method_data()->bci_to_data(bci); + if (data == NULL) { + return false; + } + return profile_is_hot_helper(data, bci, warm, false); +} + +ciInstanceKlass* ciMethod::profile_single_hot_receiver(int bci) { + if (method_data() == NULL) { + return NULL; + } + ciProfileData* data = method_data()->bci_to_data(bci); + if (data == NULL) { + return NULL; + } + ciVirtualCallData* call = (ciVirtualCallData*)data->as_VirtualCallData(); + bool dummy; + CounterData* profile = data->as_CounterData(); + if (call->receiver(0) != NULL && call->receiver(1) == NULL && profile_is_hot_helper(data, bci, dummy, true) && call->receiver(0)->is_instance_klass()) { + return (ciInstanceKlass*)call->receiver(0); + } + return NULL; +} +#endif --- old/src/share/vm/ci/ciMethod.hpp 2011-12-15 13:31:01.613369123 +0100 +++ new/src/share/vm/ci/ciMethod.hpp 2011-12-15 13:31:01.425269925 +0100 @@ -38,7 +38,7 @@ class BitMap; class Arena; class BCEscapeAnalyzer; - +class ProfileData; // ciMethod // @@ -115,6 +115,12 @@ *bcp = code; } +#ifdef COMPILER1 + // C1 profile based inlining support + void profile_force_mark_hot(int bci); + bool profile_is_hot_helper(ProfileData* data, int bci, bool& warm, bool full); +#endif + public: // Basic method information. ciFlags flags() const { check_is_loaded(); return _flags; } @@ -295,6 +301,17 @@ // Print the name of this method in various incarnations. void print_name(outputStream* st = tty); void print_short_name(outputStream* st = tty); + +#ifdef COMPILER1 + // C1 profile based inlining support + + // return true if either the call site was already marked hot or + // from profile data gathered so far, we find it is hot + bool profile_is_hot(int bci, bool& warm); + // return klass if call site is hot and only a single receiver class + // was seen for this call site. + ciInstanceKlass* profile_single_hot_receiver(int bci); +#endif }; #endif // SHARE_VM_CI_CIMETHOD_HPP --- old/src/share/vm/ci/ciMethodData.hpp 2011-12-15 13:31:03.072713138 +0100 +++ new/src/share/vm/ci/ciMethodData.hpp 2011-12-15 13:31:02.873837426 +0100 @@ -65,13 +65,13 @@ void set_receiver(uint row, ciKlass* recv) { assert((uint)row < row_limit(), "oob"); - set_intptr_at(receiver0_offset + row * receiver_type_row_cell_count, + set_intptr_at(receiver_cell_index(row), (intptr_t) recv); } ciKlass* receiver(uint row) { assert((uint)row < row_limit(), "oob"); - ciObject* recv = (ciObject*)intptr_at(receiver0_offset + row * receiver_type_row_cell_count); + ciObject* recv = (ciObject*)intptr_at(receiver_cell_index(row)); assert(recv == NULL || recv->is_klass(), "wrong type"); return (ciKlass*)recv; } --- old/src/share/vm/code/compiledIC.cpp 2011-12-15 13:31:04.512640662 +0100 +++ new/src/share/vm/code/compiledIC.cpp 2011-12-15 13:31:04.299827667 +0100 @@ -91,7 +91,7 @@ } -void CompiledIC::set_ic_destination(address entry_point) { +void CompiledIC::set_ic_destination(address entry_point, bool set_profiled) { assert(entry_point != NULL, "must set legal entry point"); assert(CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), ""); if (TraceCompiledIC) { @@ -104,14 +104,29 @@ CodeBlob* cb = CodeCache::find_blob_unsafe(_ic_call); assert(cb != NULL && cb->is_nmethod(), "must be nmethod"); #endif - _ic_call->set_destination_mt_safe(entry_point); + if (set_profiled) { + if (is_profiled()) { + assert(SafepointSynchronize::is_at_safepoint(), "unsafe if not at safepoint"); + int off = *(int*)(_ic_call->destination() - 4); // first word of profile call stub is offset + NativeJump* jump = nativeJump_at(_ic_call->destination() + off - 4); + jump->set_jump_destination(entry_point); + } else { + set_up_profiling(entry_point); + } + } else { + _ic_call->set_destination_mt_safe(entry_point); + } } address CompiledIC::ic_destination() const { assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), ""); if (!is_in_transition_state()) { - return _ic_call->destination(); + if (is_profiled()) { + return profile_target(); + } else { + return _ic_call->destination(); + } } else { return InlineCacheBuffer::ic_destination_for((CompiledIC *)this); } @@ -120,6 +135,9 @@ bool CompiledIC::is_in_transition_state() const { assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), ""); + if (is_profiled()) { + return InlineCacheBuffer::contains(profile_target()); + } return InlineCacheBuffer::contains(_ic_call->destination()); } @@ -128,10 +146,109 @@ // the InlineCacheBuffer when it needs to find the stub. address CompiledIC::stub_address() const { assert(is_in_transition_state(), "should only be called when we are in a transition state"); + if (is_profiled()) { + return profile_target(); + } return _ic_call->destination(); } +bool CompiledProfile::is_profiled() const { + return is_profiled(call_instr()); +} +bool CompiledProfile::is_profiled(NativeCall* call) { +#ifdef COMPILER2 + return false; +#endif +#ifdef COMPILER1 + if (!C1ProfileInlining) return false; + + CodeBlob* cb = CodeCache::find_blob(call->instruction_address()); + if (cb == NULL || !cb->is_nmethod() || !((nmethod*)cb)->stub_contains(call->destination())) { + return false; + } + // The call points to this nmethod's stub area so it either branches + // to a static call stub or a profile call stub. Both start with a + // NativeMovConstReg. If this instruction loads an MDO then the + // target is a profile call stub. + oop obj = NULL; +#if defined(SPARC) || defined(PPC) || defined(ARM) + if (SafepointSynchronize::is_at_safepoint()) { + // When called at a safepoint the reloc info may have been updated + // if the oop that the instruction loads was moved by the GC but + // the code itself is not necessarily up to date. + address addr = call->destination(); + RelocIterator oops((nmethod*)cb, addr, addr + 1); + while (oops.next()) { + if (oops.type() == relocInfo::oop_type) { + oop_Relocation* r = oops.oop_reloc(); + obj = r->oop_value(); + break; + } + } +#ifdef ARM + // The first word of a static call stub has no reloc info. So if + // we don't find a reloc info we know, it's a static call stub. + return obj != NULL; +#endif + } else +#endif + { + NativeMovConstReg* mov = nativeMovConstReg_at(call->destination()); + + intptr_t data = mov->data(); + obj = (oop)data; + } + + return obj->is_methodData(); +#endif +} + +address CompiledProfile::profile_target() const { + return profile_target(call_instr()); +} + +address CompiledProfile::profile_target(NativeCall* call) { + assert(is_profiled(call), "only for profiled call sites"); + int off = *(int*)(call->destination() - 4); // first word of profile call stub is offset + NativeJump* jump = nativeJump_at(call->destination() + off - 4); + return jump->jump_destination(); +} + +address CompiledProfile::find_profile_stub() const { + return find_profile_stub(call_instr()); +} + +address CompiledProfile::find_profile_stub(NativeCall* call) { + // Profile stub is right after the static stub. So find the static + // stub, walk through it and we are on the profile stub. + CompiledStaticCall* csc = compiledStaticCall_at(call->instruction_address()); + address static_stub = csc->find_stub(); + + NativeMovConstReg* method_holder = nativeMovConstReg_at(static_stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + + // on arm, an extra word is emitted after the branch + return jump->next_instruction_address() ARM_ONLY( + 4 ); +} + +void CompiledProfile::set_up_profiling(address entry_point) { + address stub = find_profile_stub(); + set_up_profiling(call_instr(), stub, entry_point); +} + +void CompiledProfile::set_up_profiling(NativeCall* call, address stub, address entry_point) { + assert(Patching_lock->is_locked(), "should be protected by Patching_lock"); + + address jmp_addr = stub + *(int*)stub; + NativeJump* jump = nativeJump_at(jmp_addr); + jump->set_jump_destination(entry_point); + call->set_destination_mt_safe(stub+4); // first word of profile call stub is offset +} + +bool CompiledProfile::is_call_to_stub(NativeCall* call, address stub) { + return call->destination() == stub + 4; // first word of profile call stub is offset +} //----------------------------------------------------------------------------- // High-level access to an inline cache. Guaranteed to be MT-safe. @@ -269,7 +386,7 @@ ICStub* old_stub = ICStub_from_destination_address(stub_address()); old_stub->clear(); } - set_ic_destination(entry); + set_ic_destination(entry, false); } else { // Unsafe transition - create stub. InlineCacheBuffer::create_transition_stub(this, NULL, entry); @@ -353,9 +470,9 @@ (!is_in_transition_state() && (info.is_optimized() || static_bound || is_clean())); if (!safe) { - InlineCacheBuffer::create_transition_stub(this, info.cached_oop()(), info.entry()); + InlineCacheBuffer::create_transition_stub(this, info.cached_oop()(), info.entry(), info.is_profiled()); } else { - set_ic_destination(info.entry()); + set_ic_destination(info.entry(), info.is_profiled()); if (!is_optimized()) set_cached_oop(info.cached_oop()()); } @@ -386,8 +503,10 @@ bool is_optimized, bool static_bound, CompiledICInfo& info, + bool is_profiled, TRAPS) { info._is_optimized = is_optimized; + info._is_profiled = is_profiled; nmethod* method_code = method->code(); address entry = NULL; @@ -479,6 +598,14 @@ } +void CompiledIC::drop_profiling() { + assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); + + if (is_profiled()) { + set_ic_destination(profile_target(), false); + } +} + // ---------------------------------------------------------------------------- void CompiledStaticCall::set_to_clean() { @@ -496,6 +623,12 @@ // both the call and its stub. } +address CompiledStaticCall::destination() const { + if (CompiledProfile::is_profiled(call_instr())) { + return CompiledProfile::profile_target(call_instr()); + } + return NativeCall::destination(); +} bool CompiledStaticCall::is_clean() const { return destination() == SharedRuntime::get_resolve_static_call_stub(); @@ -561,16 +694,22 @@ } // Call to compiled code assert (CodeCache::contains(info.entry()), "wrong entry point"); - set_destination_mt_safe(info.entry()); + if (info.is_profiled()) { + address stub = CompiledProfile::find_profile_stub(call_instr()); + CompiledProfile::set_up_profiling(call_instr(), stub, info.entry()); + } else { + set_destination_mt_safe(info.entry()); + } } } // Compute settings for a CompiledStaticCall. Since we might have to set // the stub when calling to the interpreter, we need to return arguments. -void CompiledStaticCall::compute_entry(methodHandle m, StaticCallInfo& info) { +void CompiledStaticCall::compute_entry(methodHandle m, StaticCallInfo& info, bool is_profiled) { nmethod* m_code = m->code(); info._callee = m; + info._is_profiled = is_profiled; if (m_code != NULL) { info._to_interpreter = false; info._entry = m_code->verified_entry_point(); @@ -607,6 +746,8 @@ // from the CompiledIC implementation case relocInfo::opt_virtual_call_type: return iter.opt_virtual_call_reloc()->static_stub(); + case relocInfo::virtual_call_type: + return iter.virtual_call_reloc()->static_stub(); case relocInfo::poll_type: case relocInfo::poll_return_type: // A safepoint can't overlap a call. default: @@ -617,6 +758,15 @@ return NULL; } +void CompiledStaticCall::drop_profiling() { + assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); + + MutexLockerEx pl(Patching_lock, Mutex::_no_safepoint_check_flag); + + if (CompiledProfile::is_profiled(call_instr())) { + set_destination_mt_safe(CompiledProfile::profile_target(call_instr())); + } +} //----------------------------------------------------------------------------- // Non-product mode code --- old/src/share/vm/code/compiledIC.hpp 2011-12-15 13:31:06.107544274 +0100 +++ new/src/share/vm/code/compiledIC.hpp 2011-12-15 13:31:05.879554480 +0100 @@ -83,13 +83,62 @@ Handle _cached_oop; // Value of cached_oop (either in stub or inline cache) bool _is_optimized; // it is an optimized virtual call (i.e., can be statically bound) bool _to_interpreter; // Call it to interpreter + bool _is_profiled; public: address entry() const { return _entry; } Handle cached_oop() const { return _cached_oop; } bool is_optimized() const { return _is_optimized; } + bool is_profiled() const { return _is_profiled; } }; -class CompiledIC: public ResourceObj { +// Support for c1 profile based inlining. Some call sites are +// instrumented to gather profiling data used to drive further +// inlining through recompilation. The instrumentation code consists +// in incrementing a per call site counter stored in the MDO, testing +// whether it crosses a threshold, branching to the runtime if it is +// the case, jumping to the callee otherwise. +// +// The compiler identifies the candidate call sites and generates a +// stub similar to the static call stub in the nmethod's stub +// area. The profile call stub performs the following step: +// 1- load mdo pointer in register +// 2- increment counter for call site +// 3- branch to runtime if counter crosses threshold +// 4- jump to callee +// +// On call site resolution, for a call to a compiled method, the jump +// (4- above) is patched with the resolve call site info (to continue +// to callee's code or transition stub) then the call site is patched +// to point to the profile call stub. Profiling can be later fully +// disabled for the call site (if the call site is polymorphic or if +// the compilation policy finds it's better to not profile the call +// site anymore) by reresolving the call. +// +class CompiledProfile { + + private: + + friend class CompiledStaticCall; + static bool is_profiled(NativeCall* call); + static address profile_target(NativeCall* call); + + protected: + + address profile_target() const; + address find_profile_stub() const; + void set_up_profiling(address entry_point); + + virtual NativeCall* call_instr() const = 0; + + public: + bool is_profiled() const; // Use InlineCacheBuffer + + static address find_profile_stub(NativeCall* call); + static void set_up_profiling(NativeCall* call, address stub, address entry_point); + static bool is_call_to_stub(NativeCall* call, address stub); +}; + +class CompiledIC: public ResourceObj, CompiledProfile { friend class InlineCacheBuffer; friend class ICStub; @@ -106,12 +155,19 @@ // low-level inline-cache manipulation. Cannot be accessed directly, since it might not be MT-safe // to change an inline-cache. These changes the underlying inline-cache directly. They *newer* make // changes to a transition stub. - void set_ic_destination(address entry_point); + public: + void set_ic_destination(address entry_point, bool set_profiled); + private: void set_cached_oop(oop cache); // Reads the location of the transition stub. This will fail with an assertion, if no transition stub is // associated with the inline cache. address stub_address() const; + + protected: + NativeCall* call_instr() const { return _ic_call; } + + public: bool is_in_transition_state() const; // Use InlineCacheBuffer public: @@ -148,7 +204,7 @@ void set_to_megamorphic(CallInfo* call_info, Bytecodes::Code bytecode, TRAPS); static void compute_monomorphic_entry(methodHandle method, KlassHandle receiver_klass, - bool is_optimized, bool static_bound, CompiledICInfo& info, TRAPS); + bool is_optimized, bool static_bound, CompiledICInfo& info, bool is_profiled, TRAPS); // Location address instruction_address() const { return _ic_call->instruction_address(); } @@ -157,6 +213,8 @@ void print() PRODUCT_RETURN; void print_compiled_ic() PRODUCT_RETURN; void verify() PRODUCT_RETURN; + + void drop_profiling(); }; inline CompiledIC* CompiledIC_before(address return_addr) { @@ -201,11 +259,13 @@ address _entry; // Entrypoint methodHandle _callee; // Callee (used when calling interpreter) bool _to_interpreter; // call to interpreted method (otherwise compiled) + bool _is_profiled; friend class CompiledStaticCall; public: address entry() const { return _entry; } methodHandle callee() const { return _callee; } + bool is_profiled() const { return _is_profiled; } }; @@ -216,6 +276,10 @@ void set_to_interpreted(methodHandle callee, address entry); bool is_optimized_virtual(); + private: + + NativeCall* call_instr() const { return (NativeCall*)this; } + public: friend CompiledStaticCall* compiledStaticCall_before(address return_addr); friend CompiledStaticCall* compiledStaticCall_at(address native_call); @@ -235,7 +299,7 @@ void set(const StaticCallInfo& info); // Compute entry point given a method - static void compute_entry(methodHandle m, StaticCallInfo& info); + static void compute_entry(methodHandle m, StaticCallInfo& info, bool is_profiled = false); // Stub support address find_stub(); @@ -244,6 +308,10 @@ // Misc. void print() PRODUCT_RETURN; void verify() PRODUCT_RETURN; + + void drop_profiling(); + + address destination() const; }; --- old/src/share/vm/code/debugInfoRec.cpp 2011-12-15 13:31:07.502018346 +0100 +++ new/src/share/vm/code/debugInfoRec.cpp 2011-12-15 13:31:07.307153687 +0100 @@ -284,6 +284,7 @@ int bci, bool reexecute, bool is_method_handle_invoke, + bool is_profiled_call, bool return_oop, DebugToken* locals, DebugToken* expressions, @@ -300,6 +301,7 @@ last_pd->set_should_reexecute(reexecute); last_pd->set_is_method_handle_invoke(is_method_handle_invoke); last_pd->set_return_oop(return_oop); + last_pd->set_profiled_call(is_profiled_call); // serialize sender stream offest stream()->write_int(sender_stream_offset); --- old/src/share/vm/code/debugInfoRec.hpp 2011-12-15 13:31:09.021773634 +0100 +++ new/src/share/vm/code/debugInfoRec.hpp 2011-12-15 13:31:08.821451014 +0100 @@ -102,6 +102,7 @@ int bci, bool reexecute, bool is_method_handle_invoke = false, + bool is_profiled_call = false, bool return_oop = false, DebugToken* locals = NULL, DebugToken* expressions = NULL, --- old/src/share/vm/code/icBuffer.cpp 2011-12-15 13:31:10.373197270 +0100 +++ new/src/share/vm/code/icBuffer.cpp 2011-12-15 13:31:10.192688495 +0100 @@ -68,7 +68,7 @@ assert(this == ICStub_from_destination_address(ic->stub_address()), "wrong owner of ic buffer"); ic->set_cached_oop(cached_oop()); - ic->set_ic_destination(destination()); + ic->set_ic_destination(destination(), ic->is_profiled()); } } @@ -179,7 +179,7 @@ } -void InlineCacheBuffer::create_transition_stub(CompiledIC *ic, oop cached_oop, address entry) { +void InlineCacheBuffer::create_transition_stub(CompiledIC *ic, oop cached_oop, address entry, bool set_profiled) { assert(!SafepointSynchronize::is_at_safepoint(), "should not be called during a safepoint"); assert (CompiledIC_lock->is_locked(), ""); assert(cached_oop == NULL || cached_oop->is_perm(), "must belong to perm. space"); @@ -196,7 +196,7 @@ ic_stub->set_stub(ic, cached_oop, entry); // Update inline cache in nmethod to point to new "out-of-line" allocated inline cache - ic->set_ic_destination(ic_stub->code_begin()); + ic->set_ic_destination(ic_stub->code_begin(), set_profiled); set_next_stub(new_ic_stub()); // can cause safepoint synchronization } --- old/src/share/vm/code/icBuffer.hpp 2011-12-15 13:31:11.773091292 +0100 +++ new/src/share/vm/code/icBuffer.hpp 2011-12-15 13:31:11.585261991 +0100 @@ -129,7 +129,7 @@ // New interface - static void create_transition_stub(CompiledIC *ic, oop cached_oop, address entry); + static void create_transition_stub(CompiledIC *ic, oop cached_oop, address entry, bool set_profiled = false); static address ic_destination_for(CompiledIC *ic); static oop cached_oop_for(CompiledIC *ic); }; --- old/src/share/vm/code/nmethod.cpp 2011-12-15 13:31:13.182047299 +0100 +++ new/src/share/vm/code/nmethod.cpp 2011-12-15 13:31:12.961529755 +0100 @@ -670,6 +670,9 @@ _osr_entry_point = NULL; _exception_cache = NULL; _pc_desc_cache.reset_to(NULL); +#ifdef COMPILER1 + _needs_recomp = false; +#endif code_buffer->copy_oops_to(this); if (ScavengeRootsInCode && detect_scavenge_root_oops()) { @@ -751,6 +754,9 @@ _osr_entry_point = NULL; _exception_cache = NULL; _pc_desc_cache.reset_to(NULL); +#ifdef COMPILER1 + _needs_recomp = false; +#endif code_buffer->copy_oops_to(this); debug_only(verify_scavenge_root_oops()); @@ -872,6 +878,10 @@ CodeCache::commit(this); +#ifdef COMPILER1 + _needs_recomp = false; +#endif + // Copy contents of ExceptionHandlerTable to nmethod handler_table->copy_to(this); nul_chk_table->copy_to(this); @@ -1902,10 +1912,19 @@ #endif //ASSERT // Search for MethodHandle invokes and tag the nmethod. + bool searching_method_handle = true; + bool searching_profiled_call = false; + COMPILER1_PRESENT(searching_profiled_call = searching_profiled_call || C1ProfileInlining); for (int i = 0; i < count; i++) { if (pcs[i].is_method_handle_invoke()) { set_has_method_handle_invokes(true); - break; + searching_method_handle = false; + if (!searching_profiled_call) break; + } + if (pcs[i].is_profiled_call()) { + set_has_profiled_calls(true); + searching_profiled_call =false; + if (!searching_method_handle) break; } } assert(has_method_handle_invokes() == (_deoptimize_mh_offset != -1), "must have deopt mh handler"); @@ -2203,6 +2222,14 @@ return pd->is_method_handle_invoke(); } +bool nmethod::is_profiled_call(address call_pc) { + if (!has_profiled_calls()) return false; + PcDesc* pd = pc_desc_at(call_pc); + if (pd == NULL) + return false; + return pd->is_profiled_call(); +} + // ----------------------------------------------------------------------------- // Verification --- old/src/share/vm/code/nmethod.hpp 2011-12-15 13:31:15.205259602 +0100 +++ new/src/share/vm/code/nmethod.hpp 2011-12-15 13:31:14.997961114 +0100 @@ -169,6 +169,8 @@ bool _marked_for_reclamation; // Used by NMethodSweeper (set only by sweeper) bool _marked_for_deoptimization; // Used for stack deoptimization + bool _has_profiled_calls; // Any c1 profile call sites? + // used by jvmti to track if an unload event has been posted for this nmethod. bool _unload_reported; @@ -222,6 +224,10 @@ ByteSize _native_receiver_sp_offset; ByteSize _native_basic_lock_sp_offset; +#ifdef COMPILER1 + bool _needs_recomp; +#endif + friend class nmethodLocker; // For native wrappers @@ -432,6 +438,9 @@ bool is_speculatively_disconnected() const { return _speculatively_disconnected; } void set_speculatively_disconnected(bool z) { _speculatively_disconnected = z; } + bool has_profiled_calls() const { return _has_profiled_calls; } + void set_has_profiled_calls(bool z) { _has_profiled_calls = z; } + int comp_level() const { return _comp_level; } // Support for oops in scopes and relocs: @@ -512,6 +521,12 @@ return (addr >= code_begin() && addr < verified_entry_point()); } +#ifdef COMPILER1 + // recompilation with c1 + bool needs_recomp() const { return _needs_recomp; } + void set_needs_recomp(bool v) { _needs_recomp = v; } +#endif + // unlink and deallocate this nmethod // Only NMethodSweeper class is expected to use this. NMethodSweeper is not // expected to use any other private methods/data in this class. @@ -594,6 +609,8 @@ // MethodHandle bool is_method_handle_return(address return_pc); + bool is_profiled_call(address call_pc); + // jvmti support: void post_compiled_method_load_event(); jmethodID get_and_cache_jmethod_id(); --- old/src/share/vm/code/pcDesc.hpp 2011-12-15 13:31:16.691104322 +0100 +++ new/src/share/vm/code/pcDesc.hpp 2011-12-15 13:31:16.483373519 +0100 @@ -42,7 +42,8 @@ enum { PCDESC_reexecute = 1 << 0, PCDESC_is_method_handle_invoke = 1 << 1, - PCDESC_return_oop = 1 << 2 + PCDESC_return_oop = 1 << 2, + PCDESC_profiled_call = 1 << 3 }; int _flags; @@ -73,6 +74,8 @@ // Flags bool should_reexecute() const { return (_flags & PCDESC_reexecute) != 0; } void set_should_reexecute(bool z) { set_flag(PCDESC_reexecute, z); } + bool is_profiled_call() const { return (_flags & PCDESC_profiled_call) != 0; } + void set_profiled_call(bool z) { set_flag(PCDESC_profiled_call, z); } // Does pd refer to the same information as pd? bool is_same_info(const PcDesc* pd) { --- old/src/share/vm/code/relocInfo.cpp 2011-12-15 13:31:18.215090606 +0100 +++ new/src/share/vm/code/relocInfo.cpp 2011-12-15 13:31:17.961962731 +0100 @@ -911,7 +911,7 @@ } -address opt_virtual_call_Relocation::static_stub() { +address virtual_call_Relocation::static_stub() { // search for the static stub who points back to this static call address static_call_addr = addr(); RelocIterator iter(code()); @@ -925,6 +925,19 @@ return NULL; } +address opt_virtual_call_Relocation::static_stub() { + // search for the static stub who points back to this static call + address static_call_addr = addr(); + RelocIterator iter(code()); + while (iter.next()) { + if (iter.type() == relocInfo::static_stub_type) { + if (iter.static_stub_reloc()->static_call() == static_call_addr) { + return iter.addr(); + } + } + } + return NULL; +} void static_call_Relocation::clear_inline_cache() { // Safe call site info --- old/src/share/vm/code/relocInfo.hpp 2011-12-15 13:31:19.668792922 +0100 +++ new/src/share/vm/code/relocInfo.hpp 2011-12-15 13:31:19.456337648 +0100 @@ -1011,6 +1011,9 @@ void clear_inline_cache(); + // find the matching static_stub + address static_stub(); + // Figure out where an ic_call is hiding, given a set-oop or call. // Either ic_call or first_oop must be non-null; the other is deduced. // Code if non-NULL must be the nmethod, else it is deduced. --- old/src/share/vm/compiler/compileBroker.cpp 2011-12-15 13:31:21.170552526 +0100 +++ new/src/share/vm/compiler/compileBroker.cpp 2011-12-15 13:31:20.956154263 +0100 @@ -1162,7 +1162,7 @@ #ifndef TIERED // seems like an assert of dubious value assert(comp_level == CompLevel_highest_tier, - "all OSR compiles are assumed to be at a single compilation lavel"); + "all OSR compiles are assumed to be at a single compilation level"); #endif // TIERED // We accept a higher level osr method nmethod* nm = method->lookup_osr_nmethod_for(osr_bci, comp_level, false); @@ -1260,6 +1260,11 @@ } else { nmethod* result = method->code(); if (result == NULL) return false; +#ifdef COMPILER1 + if (C1ProfileInlining && result->needs_recomp()) { + return false; + } +#endif return comp_level == result->comp_level(); } } --- old/src/share/vm/oops/instanceKlass.cpp 2011-12-15 13:31:22.734031063 +0100 +++ new/src/share/vm/oops/instanceKlass.cpp 2011-12-15 13:31:22.542396808 +0100 @@ -2180,6 +2180,15 @@ void instanceKlass::add_osr_nmethod(nmethod* n) { // only one compilation can be active NEEDS_CLEANUP +#ifdef COMPILER1 + // Get rid of the osr methods for the same bci + if (C1ProfileInlining) { + nmethod *inv = lookup_osr_nmethod(n->method(), n->osr_entry_bci(), n->comp_level(), true); + if (inv != NULL && inv->is_in_use()) { + inv->make_not_entrant(); + } + } +#endif // This is a short non-blocking critical region, so the no safepoint check is ok. OsrList_lock->lock_without_safepoint_check(); assert(n->is_osr_method(), "wrong kind of nmethod"); --- old/src/share/vm/oops/methodDataOop.cpp 2011-12-15 13:31:24.278924111 +0100 +++ new/src/share/vm/oops/methodDataOop.cpp 2011-12-15 13:31:24.080861623 +0100 @@ -491,6 +491,19 @@ // a method. int methodDataOopDesc::bytecode_cell_count(Bytecodes::Code code) { +#ifdef COMPILER1 + if (C1ProfileInlining) { + switch (code) { + case Bytecodes::_invokespecial: + case Bytecodes::_invokestatic: + return CounterData::static_cell_count(); + case Bytecodes::_invokevirtual: + case Bytecodes::_invokeinterface: + return VirtualCallData::static_cell_count(); + } + return no_profile_data; + } +#endif switch (code) { case Bytecodes::_checkcast: case Bytecodes::_instanceof: @@ -612,6 +625,22 @@ int tag = DataLayout::no_tag; DataLayout* data_layout = data_layout_at(data_index); Bytecodes::Code c = stream->code(); +#ifdef COMPILER1 + if (C1ProfileInlining) { + switch (c) { + case Bytecodes::_invokespecial: + case Bytecodes::_invokestatic: + cell_count = CounterData::static_cell_count(); + tag = DataLayout::counter_data_tag; + break; + case Bytecodes::_invokevirtual: + case Bytecodes::_invokeinterface: + cell_count = VirtualCallData::static_cell_count(); + tag = DataLayout::virtual_call_data_tag; + break; + } + } else { +#endif switch (c) { case Bytecodes::_checkcast: case Bytecodes::_instanceof: @@ -675,6 +704,9 @@ tag = DataLayout::multi_branch_data_tag; break; } +#ifdef COMPILER1 + } +#endif assert(tag == DataLayout::multi_branch_data_tag || cell_count == bytecode_cell_count(c), "cell counts must agree"); if (cell_count >= 0) { --- old/src/share/vm/oops/methodDataOop.hpp 2011-12-15 13:31:25.841727085 +0100 +++ new/src/share/vm/oops/methodDataOop.hpp 2011-12-15 13:31:25.644913793 +0100 @@ -172,6 +172,11 @@ _header._struct._flags = (new_state << trap_shift) | old_flags; } +#ifdef COMPILER1 + void set_profile_state(int state) { assert(C1ProfileInlining, "c1 profiling only"); _header._struct._flags = state; } + int profile_state() { assert(C1ProfileInlining, "c1 profiling only"); return _header._struct._flags; } +#endif + u1 flags() { return _header._struct._flags; } @@ -364,6 +369,12 @@ _data = data; } +#ifdef COMPILER1 + protected: + void set_profile_state(int state) { data()->set_profile_state(state); } + int profile_state() { return data()->profile_state(); } +#endif + public: // Constructor for invalid ProfileData. ProfileData(); @@ -533,7 +544,7 @@ virtual bool is_CounterData() { return true; } static int static_cell_count() { - return counter_cell_count; + return counter_cell_count + ( COMPILER1_PRESENT(C1ProfileInlining ? BytesPerLong/BytesPerWord :) 0 ); } virtual int cell_count() { @@ -550,7 +561,7 @@ return cell_offset(count_off); } static ByteSize counter_data_size() { - return cell_offset(counter_cell_count); + return cell_offset(counter_cell_count + ( COMPILER1_PRESENT(C1ProfileInlining ? BytesPerLong/BytesPerWord :) 0 )); } void set_count(uint count) { @@ -560,6 +571,67 @@ #ifndef PRODUCT void print_data_on(outputStream* st); #endif + +#ifdef COMPILER1 + // C1 implements profile based inlining. To detect a hot call site, + // a count of the number of times the call is taken and a timestamp + // of the first time the call is taken are needed. Together, they + // are used to compute a call site frequency = + // count / (current time - timestamp) + // that, can can be compared to thresholds to identify hot or warm + // or cold call sites. The state of a call site is kept here as + // well. This way it persists during the execution: a hot call site + // can be marked as such once for all and subsequent compilation + // will attempt inlining. + private: + union stamp { + jlong l; + uint u[BytesPerLong/BytesPerWord]; + }; + + enum state { + unseen_yet = 0, + seen, + hot, + warm, + cold + }; + + public: + bool is_unseen_yet() { return profile_state() == unseen_yet; } + bool is_seen() { return profile_state() == seen; } + bool is_hot() { return profile_state() == hot; } + bool is_warm() { return profile_state() == warm; } + bool is_cold() { return profile_state() == cold; } + void set_seen() { set_profile_state(seen); } + void set_hot() { set_profile_state(hot); } + void set_warm() { set_profile_state(warm); } + void set_cold() { set_profile_state(cold); } + + jlong timestamp() { + union stamp ts; + for(int i = 0; i < BytesPerLong/BytesPerWord; i++) { + ts.u[i] = uint_at(counter_cell_count + i); + } + return ts.l; + } + + void set_timestamp(jlong stamp) { + union stamp ts; + ts.l = stamp; + for(int i = 0; i < BytesPerLong/BytesPerWord; i++) { + set_uint_at(counter_cell_count + i, ts.u[i]); + } + } + + void init_if_first_seen() { + if (is_unseen_yet()) { + set_seen(); + jlong now = os::javaTimeNanos(); + set_timestamp(now); + } + } +#endif }; // JumpData @@ -644,7 +716,7 @@ class ReceiverTypeData : public CounterData { protected: enum { - receiver0_offset = counter_cell_count, + receiver0_offset, count0_offset, receiver_type_row_cell_count = (count0_offset + 1) - receiver0_offset }; @@ -658,7 +730,7 @@ virtual bool is_ReceiverTypeData() { return true; } static int static_cell_count() { - return counter_cell_count + (uint) TypeProfileWidth * receiver_type_row_cell_count; + return CounterData::static_cell_count() + (uint) TypeProfileWidth * receiver_type_row_cell_count; } virtual int cell_count() { @@ -670,10 +742,10 @@ return TypeProfileWidth; } static int receiver_cell_index(uint row) { - return receiver0_offset + row * receiver_type_row_cell_count; + return CounterData::static_cell_count() + receiver0_offset + row * receiver_type_row_cell_count; } static int receiver_count_cell_index(uint row) { - return count0_offset + row * receiver_type_row_cell_count; + return CounterData::static_cell_count() + count0_offset + row * receiver_type_row_cell_count; } // Get the receiver at row. The 'unchecked' version is needed by parallel old @@ -791,6 +863,18 @@ return cell_offset(static_cell_count()); } + void new_receiver(Handle r) { + assert(TypeProfileWidth >= 2, "should be"); + + if (receiver(0) == NULL || receiver(0) == r->klass()) { + set_receiver(0, r->klass()); + set_receiver_count(0, DataLayout::counter_increment); + } else if (row_limit() > 1 && (receiver(1) == NULL || receiver(1) == r->klass())) { + set_receiver(1, r->klass()); + set_receiver_count(1, DataLayout::counter_increment); + } + } + #ifndef PRODUCT void print_data_on(outputStream* st); #endif --- old/src/share/vm/oops/methodKlass.cpp 2011-12-15 13:31:27.354414115 +0100 +++ new/src/share/vm/oops/methodKlass.cpp 2011-12-15 13:31:27.134223569 +0100 @@ -141,11 +141,9 @@ // know that Universe::methodKlassObj never moves. PSParallelCompact::mark_and_push(cm, m->adr_constMethod()); PSParallelCompact::mark_and_push(cm, m->adr_constants()); -#ifdef COMPILER2 if (m->method_data() != NULL) { PSParallelCompact::mark_and_push(cm, m->adr_method_data()); } -#endif // COMPILER2 } #endif // SERIALGC @@ -213,11 +211,9 @@ methodOop m = methodOop(obj); PSParallelCompact::adjust_pointer(m->adr_constMethod()); PSParallelCompact::adjust_pointer(m->adr_constants()); -#ifdef COMPILER2 if (m->method_data() != NULL) { PSParallelCompact::adjust_pointer(m->adr_method_data()); } -#endif // COMPILER2 return m->object_size(); } #endif // SERIALGC --- old/src/share/vm/opto/output.cpp 2011-12-15 13:31:28.807395694 +0100 +++ new/src/share/vm/opto/output.cpp 2011-12-15 13:31:28.605851215 +0100 @@ -944,7 +944,7 @@ assert(jvms->bci() >= InvocationEntryBci && jvms->bci() <= 0x10000, "must be a valid or entry BCI"); assert(!jvms->should_reexecute() || depth == max_depth, "reexecute allowed only for the youngest"); // Now we can describe the scope. - debug_info()->describe_scope(safepoint_pc_offset, scope_method, jvms->bci(), jvms->should_reexecute(), is_method_handle_invoke, return_oop, locvals, expvals, monvals); + debug_info()->describe_scope(safepoint_pc_offset, scope_method, jvms->bci(), jvms->should_reexecute(), is_method_handle_invoke, false, return_oop, locvals, expvals, monvals); } // End jvms loop // Mark the end of the scope set. --- old/src/share/vm/runtime/arguments.cpp 2011-12-15 13:31:30.510029843 +0100 +++ new/src/share/vm/runtime/arguments.cpp 2011-12-15 13:31:30.289919576 +0100 @@ -3080,6 +3080,19 @@ // Set object alignment values. set_object_alignment(); +#ifdef COMPILER1 + if (!UseInlineCaches) { + // keeping track of types is done at inline cache's state changes + C1TypeProfileInlining = false; + } +#ifdef TIERED + C1ProfileInlining = false; +#endif + if (!C1ProfileInlining) { + C1TypeProfileInlining = false; + } +#endif + #ifdef SERIALGC force_serial_gc(); #endif // SERIALGC --- old/src/share/vm/runtime/compilationPolicy.cpp 2011-12-15 13:31:32.051845970 +0100 +++ new/src/share/vm/runtime/compilationPolicy.cpp 2011-12-15 13:31:31.848924548 +0100 @@ -56,7 +56,15 @@ switch(CompilationPolicyChoice) { case 0: +#ifdef COMPILER1 + if (C1ProfileInlining) { + CompilationPolicy::set_policy(new SimpleProfiledCompPolicy()); + } else { +#endif CompilationPolicy::set_policy(new SimpleCompPolicy()); +#ifdef COMPILER1 + } +#endif break; case 1: @@ -420,6 +428,65 @@ NOT_PRODUCT(trace_osr_completion(m->lookup_osr_nmethod_for(bci, CompLevel_highest_tier, true));) } } + +#ifdef COMPILER1 +bool SimpleProfiledCompPolicy::profile_overflow_event(JavaThread *thread, TRAPS) { + RegisterMap map(thread, false); + frame fr = thread->last_frame().sender(&map); + nmethod* nm = (nmethod*) fr.cb(); + assert(nm!= NULL && nm->is_nmethod(), "what?"); + ResourceMark rm; + + vframeStream vfst(thread); + methodHandle mh = methodHandle(vfst.method()); + int bci = vfst.bci(); + + bool fix_call = false; + { + nmethodLocker nml(nm); + if (!nm->needs_recomp()) { + CounterData* profile = mh->method_data()->bci_to_data(bci)->as_CounterData(); + bool warm; + + if (is_hot(C1ProfileCompileThreshold, profile->timestamp(), &warm)) { + + assert(mh->method_data() != NULL, "should have a mdo if we get here"); + nm->set_needs_recomp(true); + methodHandle mh_recomp(nm->method()); + + if (TraceC1ProfileInlining) { + ttyLocker ttyl; + tty->print("C1ProfileInlining: recompiling: "); + mh_recomp->print_short_name(tty); + tty->print(" because of "); + mh->print_short_name(tty); + tty->print_cr(" at bci = %d", bci); + } + + const char* comment = "tier1 overflow"; + profile->set_hot(); + CompileBroker::compile_method(mh_recomp, InvocationEntryBci, CompLevel_highest_tier, + mh_recomp, C1ProfileCompileThreshold, comment, CHECK_(true)); + fix_call = true; + } else if (warm) { + fix_call = true; + profile->set_warm(); + } else { + profile->set_cold(); + } + if (!fix_call) { + profile->set_count(0); + profile->set_timestamp(os::javaTimeNanos()); + } + } else { + fix_call = true; + } + } + return fix_call; +} +#endif + + // StackWalkCompPolicy - walk up stack to find a suitable method to compile #ifdef COMPILER2 --- old/src/share/vm/runtime/compilationPolicy.hpp 2011-12-15 13:31:33.517916433 +0100 +++ new/src/share/vm/runtime/compilationPolicy.hpp 2011-12-15 13:31:33.333850517 +0100 @@ -116,6 +116,68 @@ virtual void method_back_branch_event(methodHandle m, int bci, TRAPS); }; +#ifdef COMPILER1 +class SimpleProfiledCompPolicy : public SimpleCompPolicy { + + // C1 implements profile based inlining. Execution of a method + // starts interpreted as usual. A method transitions from + // interpreted to compiled in the usual way as well. When the method + // is compiled, the compiler identifies a number of call sites that + // are candidate for profiling and further inlining. At those call + // sites, a per call site counter is incremented and tested for + // overflow. On first call site resolution, a timestamp is also + // recorded. The count and timestamp are used to compute a + // frequency. A frequency higher than a high water mark detects a + // hot call site. A hot call site triggers a recompilation of the + // caller method in which the callee is inlined. A frequency higher + // than a low water mark detects a warm call site. Otherwise the + // call site is cold. Recompiling with the extra inlining won't + // bring a performance advantage for a warm or cold call site. But + // keeping the profiling on at a warm call site is detrimental. At a + // cold call site profiling can be kept enabled to trigger later + // recompilation if the call site becomes hot. + + public: + static inline bool is_hot(int count, jlong ts, bool* warm = NULL) { + if (warm != NULL) { + *warm = false; + } + jlong now; + // Make sure we've gone through this call site many times. + // If time is going backward there's not much we can do. + if (count < C1ProfileCompileThreshold / 2 || (now = os::javaTimeNanos()) <= ts) { + return false; + } + + jlong elapsed = (now - ts) / 1000; + jlong c_l = (elapsed * C1ProfileHotFrequency) / 1000 / 1000; + int c = (int)c_l; + + if ((jlong)c == c_l && count >= c) { + return true; + } + + if (warm == NULL) { + return false; + } + + c_l = (elapsed * C1ProfileWarmFrequency) / 1000 / 1000; + c = (int)c_l; + if ((jlong)c == c_l && count >= c) { + *warm = true; + } + + return false; + } + + static int max_inline_size() { + return C1ProfileInlineSize; + } + + static bool profile_overflow_event(JavaThread *thread, TRAPS); +}; +#endif + // StackWalkCompPolicy - existing C2 policy #ifdef COMPILER2 --- old/src/share/vm/runtime/sharedRuntime.cpp 2011-12-15 13:31:34.973264114 +0100 +++ new/src/share/vm/runtime/sharedRuntime.cpp 2011-12-15 13:31:34.752794838 +0100 @@ -78,6 +78,7 @@ #endif #ifdef COMPILER1 #include "c1/c1_Runtime1.hpp" +#include "runtime/compilationPolicy.hpp" #endif // Shared stub locations @@ -86,6 +87,10 @@ RuntimeStub* SharedRuntime::_resolve_opt_virtual_call_blob; RuntimeStub* SharedRuntime::_resolve_virtual_call_blob; RuntimeStub* SharedRuntime::_resolve_static_call_blob; +#ifdef COMPILER1 +RuntimeStub* SharedRuntime::_resolve_profile_call_blob; +RuntimeStub* SharedRuntime::_resolve_static_profile_call_blob; +#endif DeoptimizationBlob* SharedRuntime::_deopt_blob; RicochetBlob* SharedRuntime::_ricochet_blob; @@ -105,6 +110,10 @@ _resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_opt_virtual_call_C), "resolve_opt_virtual_call"); _resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_virtual_call_C), "resolve_virtual_call"); _resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_call_C), "resolve_static_call"); +#ifdef COMPILER1 + _resolve_profile_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_profile_call_C), "resolve_profile_call"); + _resolve_static_profile_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_profile_call_C),"resolve_static_profile_call"); +#endif _polling_page_safepoint_handler_blob = generate_handler_blob(CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), false); _polling_page_return_handler_blob = generate_handler_blob(CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), true); @@ -1186,6 +1195,27 @@ assert((!is_virtual && invoke_code == Bytecodes::_invokestatic) || ( is_virtual && invoke_code != Bytecodes::_invokestatic), "inconsistent bytecode"); + bool is_profiled = false; + +#ifdef COMPILER1 + is_profiled = caller_nm->is_profiled_call(caller_frame.pc()); + if (is_profiled) { + if (callee_method->code_size() > SimpleProfiledCompPolicy::max_inline_size()) { + is_profiled = false; + } + if (is_profiled) { + vframeStream vfst(thread); + methodOop m = vfst.method(); + int bci = vfst.bci(); + + CounterData* profile = m->method_data()->bci_to_data(bci)->as_CounterData(); + + profile->init_if_first_seen(); + } + } +#endif + + #ifndef PRODUCT // tracing/debugging/statistics int *addr = (is_optimized) ? (&_resolve_opt_virtual_ctr) : @@ -1234,11 +1264,11 @@ bool static_bound = call_info.resolved_method()->can_be_statically_bound(); KlassHandle h_klass(THREAD, receiver->klass()); CompiledIC::compute_monomorphic_entry(callee_method, h_klass, - is_optimized, static_bound, virtual_call_info, - CHECK_(methodHandle())); + is_optimized, static_bound, virtual_call_info, is_profiled, + CHECK_(methodHandle())); } else { // static call - CompiledStaticCall::compute_entry(callee_method, static_call_info); + CompiledStaticCall::compute_entry(callee_method, static_call_info, is_profiled); } // grab lock, check for deoptimization and potentially patch caller @@ -1260,6 +1290,18 @@ CompiledIC* inline_cache = CompiledIC_before(caller_frame.pc()); if (inline_cache->is_clean()) { inline_cache->set_to_monomorphic(virtual_call_info); +#ifdef COMPILER1 + if (is_profiled && !is_optimized) { + assert(invoke_code == Bytecodes::_invokevirtual || invoke_code == Bytecodes::_invokeinterface, "unexpected call"); + vframeStream vfst(thread); + methodOop m = vfst.method(); + int bci = vfst.bci(); + + VirtualCallData* profile = m->method_data()->bci_to_data(bci)->as_VirtualCallData(); + + profile->new_receiver(receiver); + } +#endif } } else { CompiledStaticCall* ssc = compiledStaticCall_before(caller_frame.pc()); @@ -1391,9 +1433,75 @@ return callee_method->verified_code_entry(); JRT_END +#ifdef COMPILER1 +methodHandle SharedRuntime::resolve_profile_helper(JavaThread *thread, TRAPS) { + ResourceMark rm(thread); + RegisterMap cbl_map(thread, false); + frame caller_frame = thread->last_frame().sender(&cbl_map); + + CodeBlob* cb = caller_frame.cb(); + guarantee(cb != NULL && cb->is_nmethod(), "must be called from nmethod"); + nmethodLocker caller_lock((nmethod*)cb); + CallInfo call_info; + Bytecodes::Code invoke_code = Bytecodes::_illegal; + Handle receiver = find_callee_info(thread, invoke_code, + call_info, CHECK_(methodHandle())); + + return call_info.selected_method(); +} + +JRT_BLOCK_ENTRY(address, SharedRuntime::resolve_profile_call_C(JavaThread *thread )) + methodHandle callee_method; + JRT_BLOCK + { + callee_method = resolve_profile_helper(thread, CHECK_NULL); + + bool fix_call = SimpleProfiledCompPolicy::profile_overflow_event(thread, CHECK_NULL); + + if (fix_call) { + ResourceMark rm; + RegisterMap map(thread, false); + frame fr = thread->last_frame().sender(&map); + + MutexLocker ml_patch(CompiledIC_lock); + + CompiledIC* inline_cache = CompiledIC_before(fr.pc()); + inline_cache->drop_profiling(); + } + thread->set_vm_result(callee_method()); + } + JRT_BLOCK_END + // return compiled code entry point after potential safepoints + assert(callee_method->verified_code_entry() != NULL, " Jump to zero!"); + return callee_method->verified_code_entry(); +JRT_END +JRT_BLOCK_ENTRY(address, SharedRuntime::resolve_static_profile_call_C(JavaThread *thread )) + methodHandle callee_method; + JRT_BLOCK + { + callee_method = resolve_profile_helper(thread, CHECK_NULL); + bool fix_call = SimpleProfiledCompPolicy::profile_overflow_event(thread, CHECK_NULL); + + if (fix_call) { + + RegisterMap map(thread, false); + frame fr = thread->last_frame().sender(&map); + + MutexLocker ml_patch(CompiledIC_lock); + CompiledStaticCall* ssc = compiledStaticCall_before(fr.pc()); + ssc->drop_profiling(); + } + thread->set_vm_result(callee_method()); + } + JRT_BLOCK_END + // return compiled code entry point after potential safepoints + assert(callee_method->verified_code_entry() != NULL, " Jump to zero!"); + return callee_method->verified_code_entry(); +JRT_END +#endif methodHandle SharedRuntime::handle_ic_miss_helper(JavaThread *thread, TRAPS) { ResourceMark rm(thread); @@ -1500,6 +1608,10 @@ if (should_be_mono) { + bool is_profiled = false; +#ifdef COMPILER1 + is_profiled = ((nmethod*)cb)->is_profiled_call(caller_frame.pc()); +#endif // We have a path that was monomorphic but was going interpreted // and now we have (or had) a compiled entry. We correct the IC // by using a new icBuffer. @@ -1509,11 +1621,35 @@ receiver_klass, inline_cache->is_optimized(), false, - info, CHECK_(methodHandle())); + info, is_profiled, CHECK_(methodHandle())); +#ifdef COMPILER1 + if (is_profiled) { + vframeStream vfst(thread); + methodOop m = vfst.method(); + int bci = vfst.bci(); + + CounterData* profile = m->method_data()->bci_to_data(bci)->as_CounterData(); + + profile->init_if_first_seen(); + } +#endif + inline_cache->set_to_monomorphic(info); } else if (!inline_cache->is_megamorphic() && !inline_cache->is_clean()) { // Change to megamorphic inline_cache->set_to_megamorphic(&call_info, bc, CHECK_(methodHandle())); +#ifdef COMPILER1 + if (((nmethod*)cb)->is_profiled_call(caller_frame.pc())) { + vframeStream vfst(thread); + methodOop m = vfst.method(); + int bci = vfst.bci(); + + VirtualCallData* profile = m->method_data()->bci_to_data(bci)->as_VirtualCallData(); + + profile->new_receiver(receiver); + } +#endif + } else { // Either clean or megamorphic } @@ -1681,6 +1817,11 @@ return; } + bool is_profiled = false; +#ifdef COMPILER1 + is_profiled = nm->is_profiled_call(caller_pc+frame::pc_return_offset); +#endif + // There is a benign race here. We could be attempting to patch to a compiled // entry point at the same time the callee is being deoptimized. If that is // the case then entry_point may in fact point to a c2i and we'd patch the @@ -1718,7 +1859,11 @@ return; } address destination = call->destination(); - if (destination != entry_point) { + address stub = NULL; + if (is_profiled) { + stub = CompiledProfile::find_profile_stub(call); + } + if (destination != entry_point && !CompiledProfile::is_call_to_stub(call, stub)) { CodeBlob* callee = CodeCache::find_blob(destination); // callee == cb seems weird. It means calling interpreter thru stub. if (callee == cb || callee->is_adapter_blob()) { @@ -1728,7 +1873,11 @@ moop->print_short_name(tty); tty->print_cr(" to " INTPTR_FORMAT, entry_point); } - call->set_destination_mt_safe(entry_point); + if (is_profiled) { + CompiledProfile::set_up_profiling(call, stub, entry_point); + } else { + call->set_destination_mt_safe(entry_point); + } } else { if (TraceCallFixup) { tty->print("failed to fixup callsite at " INTPTR_FORMAT " to compiled code for", caller_pc); --- old/src/share/vm/runtime/sharedRuntime.hpp 2011-12-15 13:31:36.617736573 +0100 +++ new/src/share/vm/runtime/sharedRuntime.hpp 2011-12-15 13:31:36.422152517 +0100 @@ -52,6 +52,8 @@ bool is_virtual, bool is_optimized, TRAPS); + static methodHandle resolve_profile_helper(JavaThread *thread, TRAPS); + // Shared stub locations static RuntimeStub* _wrong_method_blob; @@ -59,6 +61,10 @@ static RuntimeStub* _resolve_opt_virtual_call_blob; static RuntimeStub* _resolve_virtual_call_blob; static RuntimeStub* _resolve_static_call_blob; +#ifdef COMPILER1 + static RuntimeStub* _resolve_profile_call_blob; + static RuntimeStub* _resolve_static_profile_call_blob; +#endif static DeoptimizationBlob* _deopt_blob; static RicochetBlob* _ricochet_blob; @@ -199,6 +205,16 @@ assert(_ic_miss_blob!= NULL, "oops"); return _ic_miss_blob->entry_point(); } +#ifdef COMPILER1 + static address get_resolve_profile_call_stub() { + assert(_resolve_profile_call_blob != NULL, "oops"); + return _resolve_profile_call_blob->entry_point(); + } + static address get_resolve_static_profile_call_stub() { + assert(_resolve_static_profile_call_blob != NULL, "oops"); + return _resolve_static_profile_call_blob->entry_point(); + } +#endif static address get_handle_wrong_method_stub() { assert(_wrong_method_blob!= NULL, "oops"); @@ -487,6 +503,10 @@ static address resolve_static_call_C (JavaThread *thread); static address resolve_virtual_call_C (JavaThread *thread); static address resolve_opt_virtual_call_C(JavaThread *thread); +#ifdef COMPILER1 + static address resolve_profile_call_C (JavaThread *thread); + static address resolve_static_profile_call_C(JavaThread *thread); +#endif // arraycopy, the non-leaf version. (See StubRoutines for all the leaf calls.) static void slow_arraycopy_C(oopDesc* src, jint src_pos,