src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
Print this page
rev 2893 : 7121756: Improve C1 inlining policy by using profiling at call sites
Summary: profile based recompilation of methods with C1 with more inlining.
Reviewed-by:
*** 754,763 ****
--- 754,776 ----
void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
RelocationHolder rspec = virtual_call_Relocation::spec(pc());
__ set_oop((jobject)Universe::non_oop_word(), G5_inline_cache_reg);
__ relocate(rspec);
+
+ if (op->info()->is_profiled_call()) {
+ // The static call stub is not used for standard ic calls (a
+ // transition stub is allocated instead for calls to the
+ // interpreter). We emit the static call stub for profiled call
+ // sites anyway because the runtime locates the profile call stub
+ // by first looking up the static call stub and then walking over
+ // it to the profile call stub.
+ emit_static_call_stub();
+ // Emit the profile call stub right behind the static call stub
+ emit_profile_call_stub(op->info()->method(), op->info()->stack()->bci(), SharedRuntime::get_resolve_profile_call_stub());
+ }
+
__ call(op->addr(), relocInfo::none);
// The peephole pass fills the delay slot, add_call_info is done in
// LIR_Assembler::emit_delay.
}
*** 1587,1596 ****
--- 1600,1677 ----
assert(__ offset() - start <= call_stub_size, "stub too big");
__ end_a_stub();
}
+ void LIR_Assembler::emit_profile_call_stub(ciMethod* method, int bci, address dest) {
+ ciMethodData* md = method->method_data();
+ if (md == NULL) {
+ bailout("out of memory building methodDataOop");
+ return;
+ }
+ address call_pc = __ pc();
+ address stub = __ start_a_stub(profile_call_stub_size);
+ if (stub == NULL) {
+ bailout("profile call stub overflow");
+ return;
+ }
+
+ int start = __ offset();
+ address off_addr = __ pc();
+
+ // The runtime needs the starting address of the profile call stub
+ // (to make the call site jump to the stub) and the location of the
+ // first jump in the stub (to make it branch to the callee). The
+ // starting address is found by first looking up the static call
+ // stub and then finding the profile call stub right behind
+ // it. Finding the jump is tricky because the code emitted before it
+ // depends on runtime conditions. Here, we first emit an integer (0)
+ // that we change to contain the offset of the jump within the stub
+ // when the jump is emitted and the offset is known. Locating the
+ // jump can then be done from the runtime by reading this offset and
+ // adding it to the address of the start of the stub.
+ __ a_long(0);
+
+ ciProfileData* data = md->bci_to_data(bci);
+ assert(data->is_CounterData(), "need CounterData for calls");
+ Register mdo = G4;
+ jobject2reg(md->constant_encoding(), mdo);
+
+ int mdo_offset_bias = 0;
+ if (!Assembler::is_simm13(md->byte_offset_of_slot(data, CounterData::count_offset()) +
+ data->size_in_bytes())) {
+ // The offset is large so bias the mdo by the base of the slot so
+ // that the ld can use simm13s to reference the slots of the data
+ mdo_offset_bias = md->byte_offset_of_slot(data, CounterData::count_offset());
+ __ set(mdo_offset_bias, G1);
+ __ add(mdo, G1, mdo);
+ }
+ Address flags_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
+
+ __ lduw(flags_addr, G1);
+
+ __ add(G1, DataLayout::counter_increment, G1);
+ __ set(C1ProfileCompileThreshold, G3);
+ __ cmp(G1, G3);
+
+ Label L;
+ __ br(Assembler::greaterUnsigned, false, Assembler::pn, L);
+ __ delayed()->stw(G1, flags_addr);
+
+ *(jint*)off_addr = __ offset() - start;
+
+ AddressLiteral addrlit(-1);
+ __ jump_to(addrlit, G3);
+ __ delayed()->nop();
+
+ __ bind(L);
+ __ jump_to(AddressLiteral(dest), G3);
+ __ delayed()->nop();
+
+ assert(__ offset() - start <= profile_call_stub_size, "stub too big");
+ __ end_a_stub();
+ }
void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
if (opr1->is_single_fpu()) {
__ fcmp(FloatRegisterImpl::S, Assembler::fcc0, opr1->as_float_reg(), opr2->as_float_reg());
} else if (opr1->is_double_fpu()) {