src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp

Print this page
rev 2893 : 7121756: Improve C1 inlining policy by using profiling at call sites
Summary: profile based recompilation of methods with C1 with more inlining.
Reviewed-by:

*** 754,763 **** --- 754,776 ---- void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { RelocationHolder rspec = virtual_call_Relocation::spec(pc()); __ set_oop((jobject)Universe::non_oop_word(), G5_inline_cache_reg); __ relocate(rspec); + + if (op->info()->is_profiled_call()) { + // The static call stub is not used for standard ic calls (a + // transition stub is allocated instead for calls to the + // interpreter). We emit the static call stub for profiled call + // sites anyway because the runtime locates the profile call stub + // by first looking up the static call stub and then walking over + // it to the profile call stub. + emit_static_call_stub(); + // Emit the profile call stub right behind the static call stub + emit_profile_call_stub(op->info()->method(), op->info()->stack()->bci(), SharedRuntime::get_resolve_profile_call_stub()); + } + __ call(op->addr(), relocInfo::none); // The peephole pass fills the delay slot, add_call_info is done in // LIR_Assembler::emit_delay. }
*** 1587,1596 **** --- 1600,1677 ---- assert(__ offset() - start <= call_stub_size, "stub too big"); __ end_a_stub(); } + void LIR_Assembler::emit_profile_call_stub(ciMethod* method, int bci, address dest) { + ciMethodData* md = method->method_data(); + if (md == NULL) { + bailout("out of memory building methodDataOop"); + return; + } + address call_pc = __ pc(); + address stub = __ start_a_stub(profile_call_stub_size); + if (stub == NULL) { + bailout("profile call stub overflow"); + return; + } + + int start = __ offset(); + address off_addr = __ pc(); + + // The runtime needs the starting address of the profile call stub + // (to make the call site jump to the stub) and the location of the + // first jump in the stub (to make it branch to the callee). The + // starting address is found by first looking up the static call + // stub and then finding the profile call stub right behind + // it. Finding the jump is tricky because the code emitted before it + // depends on runtime conditions. Here, we first emit an integer (0) + // that we change to contain the offset of the jump within the stub + // when the jump is emitted and the offset is known. Locating the + // jump can then be done from the runtime by reading this offset and + // adding it to the address of the start of the stub. + __ a_long(0); + + ciProfileData* data = md->bci_to_data(bci); + assert(data->is_CounterData(), "need CounterData for calls"); + Register mdo = G4; + jobject2reg(md->constant_encoding(), mdo); + + int mdo_offset_bias = 0; + if (!Assembler::is_simm13(md->byte_offset_of_slot(data, CounterData::count_offset()) + + data->size_in_bytes())) { + // The offset is large so bias the mdo by the base of the slot so + // that the ld can use simm13s to reference the slots of the data + mdo_offset_bias = md->byte_offset_of_slot(data, CounterData::count_offset()); + __ set(mdo_offset_bias, G1); + __ add(mdo, G1, mdo); + } + Address flags_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias); + + __ lduw(flags_addr, G1); + + __ add(G1, DataLayout::counter_increment, G1); + __ set(C1ProfileCompileThreshold, G3); + __ cmp(G1, G3); + + Label L; + __ br(Assembler::greaterUnsigned, false, Assembler::pn, L); + __ delayed()->stw(G1, flags_addr); + + *(jint*)off_addr = __ offset() - start; + + AddressLiteral addrlit(-1); + __ jump_to(addrlit, G3); + __ delayed()->nop(); + + __ bind(L); + __ jump_to(AddressLiteral(dest), G3); + __ delayed()->nop(); + + assert(__ offset() - start <= profile_call_stub_size, "stub too big"); + __ end_a_stub(); + } void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) { if (opr1->is_single_fpu()) { __ fcmp(FloatRegisterImpl::S, Assembler::fcc0, opr1->as_float_reg(), opr2->as_float_reg()); } else if (opr1->is_double_fpu()) {