src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp

Print this page
rev 2893 : 7121756: Improve C1 inlining policy by using profiling at call sites
Summary: profile based recompilation of methods with C1 with more inlining.
Reviewed-by:


 739   }
 740 }
 741 
 742 
 743 void LIR_Assembler::align_call(LIR_Code) {
 744   // do nothing since all instructions are word aligned on sparc
 745 }
 746 
 747 
 748 void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
 749   __ call(op->addr(), rtype);
 750   // The peephole pass fills the delay slot, add_call_info is done in
 751   // LIR_Assembler::emit_delay.
 752 }
 753 
 754 
 755 void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
 756   RelocationHolder rspec = virtual_call_Relocation::spec(pc());
 757   __ set_oop((jobject)Universe::non_oop_word(), G5_inline_cache_reg);
 758   __ relocate(rspec);













 759   __ call(op->addr(), relocInfo::none);
 760   // The peephole pass fills the delay slot, add_call_info is done in
 761   // LIR_Assembler::emit_delay.
 762 }
 763 
 764 
 765 void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
 766   add_debug_info_for_null_check_here(op->info());
 767   __ load_klass(O0, G3_scratch);
 768   if (Assembler::is_simm13(op->vtable_offset())) {
 769     __ ld_ptr(G3_scratch, op->vtable_offset(), G5_method);
 770   } else {
 771     // This will generate 2 instructions
 772     __ set(op->vtable_offset(), G5_method);
 773     // ld_ptr, set_hi, set
 774     __ ld_ptr(G3_scratch, G5_method, G5_method);
 775   }
 776   __ ld_ptr(G5_method, methodOopDesc::from_compiled_offset(), G3_scratch);
 777   __ callr(G3_scratch, G0);
 778   // the peephole pass fills the delay slot


1572   address call_pc = __ pc();
1573   address stub = __ start_a_stub(call_stub_size);
1574   if (stub == NULL) {
1575     bailout("static call stub overflow");
1576     return;
1577   }
1578 
1579   int start = __ offset();
1580   __ relocate(static_stub_Relocation::spec(call_pc));
1581 
1582   __ set_oop(NULL, G5);
1583   // must be set to -1 at code generation time
1584   AddressLiteral addrlit(-1);
1585   __ jump_to(addrlit, G3);
1586   __ delayed()->nop();
1587 
1588   assert(__ offset() - start <= call_stub_size, "stub too big");
1589   __ end_a_stub();
1590 }
1591 




































































1592 
1593 void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
1594   if (opr1->is_single_fpu()) {
1595     __ fcmp(FloatRegisterImpl::S, Assembler::fcc0, opr1->as_float_reg(), opr2->as_float_reg());
1596   } else if (opr1->is_double_fpu()) {
1597     __ fcmp(FloatRegisterImpl::D, Assembler::fcc0, opr1->as_double_reg(), opr2->as_double_reg());
1598   } else if (opr1->is_single_cpu()) {
1599     if (opr2->is_constant()) {
1600       switch (opr2->as_constant_ptr()->type()) {
1601         case T_INT:
1602           { jint con = opr2->as_constant_ptr()->as_jint();
1603             if (Assembler::is_simm13(con)) {
1604               __ cmp(opr1->as_register(), con);
1605             } else {
1606               __ set(con, O7);
1607               __ cmp(opr1->as_register(), O7);
1608             }
1609           }
1610           break;
1611 




 739   }
 740 }
 741 
 742 
 743 void LIR_Assembler::align_call(LIR_Code) {
 744   // do nothing since all instructions are word aligned on sparc
 745 }
 746 
 747 
 748 void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
 749   __ call(op->addr(), rtype);
 750   // The peephole pass fills the delay slot, add_call_info is done in
 751   // LIR_Assembler::emit_delay.
 752 }
 753 
 754 
 755 void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
 756   RelocationHolder rspec = virtual_call_Relocation::spec(pc());
 757   __ set_oop((jobject)Universe::non_oop_word(), G5_inline_cache_reg);
 758   __ relocate(rspec);
 759 
 760   if (op->info()->is_profiled_call()) {
 761     // The static call stub is not used for standard ic calls (a
 762     // transition stub is allocated instead for calls to the
 763     // interpreter). We emit the static call stub for profiled call
 764     // sites anyway because the runtime locates the profile call stub
 765     // by first looking up the static call stub and then walking over
 766     // it to the profile call stub.
 767     emit_static_call_stub();
 768     // Emit the profile call stub right behind the static call stub
 769     emit_profile_call_stub(op->info()->method(), op->info()->stack()->bci(), SharedRuntime::get_resolve_profile_call_stub());
 770   }
 771 
 772   __ call(op->addr(), relocInfo::none);
 773   // The peephole pass fills the delay slot, add_call_info is done in
 774   // LIR_Assembler::emit_delay.
 775 }
 776 
 777 
 778 void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
 779   add_debug_info_for_null_check_here(op->info());
 780   __ load_klass(O0, G3_scratch);
 781   if (Assembler::is_simm13(op->vtable_offset())) {
 782     __ ld_ptr(G3_scratch, op->vtable_offset(), G5_method);
 783   } else {
 784     // This will generate 2 instructions
 785     __ set(op->vtable_offset(), G5_method);
 786     // ld_ptr, set_hi, set
 787     __ ld_ptr(G3_scratch, G5_method, G5_method);
 788   }
 789   __ ld_ptr(G5_method, methodOopDesc::from_compiled_offset(), G3_scratch);
 790   __ callr(G3_scratch, G0);
 791   // the peephole pass fills the delay slot


1585   address call_pc = __ pc();
1586   address stub = __ start_a_stub(call_stub_size);
1587   if (stub == NULL) {
1588     bailout("static call stub overflow");
1589     return;
1590   }
1591 
1592   int start = __ offset();
1593   __ relocate(static_stub_Relocation::spec(call_pc));
1594 
1595   __ set_oop(NULL, G5);
1596   // must be set to -1 at code generation time
1597   AddressLiteral addrlit(-1);
1598   __ jump_to(addrlit, G3);
1599   __ delayed()->nop();
1600 
1601   assert(__ offset() - start <= call_stub_size, "stub too big");
1602   __ end_a_stub();
1603 }
1604 
1605 void LIR_Assembler::emit_profile_call_stub(ciMethod* method, int bci, address dest) {
1606   ciMethodData* md = method->method_data();
1607   if (md == NULL) {
1608     bailout("out of memory building methodDataOop");
1609     return;
1610   }
1611   address call_pc = __ pc();
1612   address stub = __ start_a_stub(profile_call_stub_size);
1613   if (stub == NULL) {
1614     bailout("profile call stub overflow");
1615     return;
1616   }
1617 
1618   int start = __ offset();
1619   address off_addr = __ pc();
1620   
1621   // The runtime needs the starting address of the profile call stub
1622   // (to make the call site jump to the stub) and the location of the
1623   // first jump in the stub (to make it branch to the callee). The
1624   // starting address is found by first looking up the static call
1625   // stub and then finding the profile call stub right behind
1626   // it. Finding the jump is tricky because the code emitted before it
1627   // depends on runtime conditions. Here, we first emit an integer (0)
1628   // that we change to contain the offset of the jump within the stub
1629   // when the jump is emitted and the offset is known. Locating the
1630   // jump can then be done from the runtime by reading this offset and
1631   // adding it to the address of the start of the stub.
1632   __ a_long(0);
1633   
1634   ciProfileData* data = md->bci_to_data(bci);
1635   assert(data->is_CounterData(), "need CounterData for calls");
1636   Register mdo = G4;
1637   jobject2reg(md->constant_encoding(), mdo);
1638 
1639   int mdo_offset_bias = 0;
1640   if (!Assembler::is_simm13(md->byte_offset_of_slot(data, CounterData::count_offset()) +
1641                             data->size_in_bytes())) {
1642     // The offset is large so bias the mdo by the base of the slot so
1643     // that the ld can use simm13s to reference the slots of the data
1644     mdo_offset_bias = md->byte_offset_of_slot(data, CounterData::count_offset());
1645     __ set(mdo_offset_bias, G1);
1646     __ add(mdo, G1, mdo);
1647   }
1648   Address flags_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
1649 
1650   __ lduw(flags_addr, G1);
1651 
1652   __ add(G1, DataLayout::counter_increment, G1);
1653   __ set(C1ProfileCompileThreshold, G3);
1654   __ cmp(G1, G3);
1655 
1656   Label L;
1657   __ br(Assembler::greaterUnsigned, false, Assembler::pn, L);
1658   __ delayed()->stw(G1, flags_addr);
1659   
1660   *(jint*)off_addr = __ offset() - start;
1661 
1662   AddressLiteral addrlit(-1);
1663   __ jump_to(addrlit, G3);
1664   __ delayed()->nop();
1665 
1666   __ bind(L);
1667   __ jump_to(AddressLiteral(dest), G3);
1668   __ delayed()->nop();
1669 
1670   assert(__ offset() - start <= profile_call_stub_size, "stub too big");
1671   __ end_a_stub();
1672 }
1673 
1674 void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
1675   if (opr1->is_single_fpu()) {
1676     __ fcmp(FloatRegisterImpl::S, Assembler::fcc0, opr1->as_float_reg(), opr2->as_float_reg());
1677   } else if (opr1->is_double_fpu()) {
1678     __ fcmp(FloatRegisterImpl::D, Assembler::fcc0, opr1->as_double_reg(), opr2->as_double_reg());
1679   } else if (opr1->is_single_cpu()) {
1680     if (opr2->is_constant()) {
1681       switch (opr2->as_constant_ptr()->type()) {
1682         case T_INT:
1683           { jint con = opr2->as_constant_ptr()->as_jint();
1684             if (Assembler::is_simm13(con)) {
1685               __ cmp(opr1->as_register(), con);
1686             } else {
1687               __ set(con, O7);
1688               __ cmp(opr1->as_register(), O7);
1689             }
1690           }
1691           break;
1692