open Sdiff src/hotspot/cpu/sparc

src/hotspot/cpu/sparc/templateTable_sparc.cpp

rev 47596 : imported patch Interpreter-Poll-7
rev 47597 : imported patch Interpreter-Poll-Wide_Ret-8
rev 47598 : imported patch Interpreter-Poll-Switch-10
rev 47599 : imported patch Interpreter-Poll-Ret-11

1482 
1483   // get (wide) offset to O1_disp
1484   const Register O1_disp = O1;
1485   if (is_wide)  __ get_4_byte_integer_at_bcp( 1,  G4_scratch, O1_disp,                                    InterpreterMacroAssembler::set_CC);
1486   else          __ get_2_byte_integer_at_bcp( 1,  G4_scratch, O1_disp, InterpreterMacroAssembler::Signed, InterpreterMacroAssembler::set_CC);
1487 
1488   // Handle all the JSR stuff here, then exit.
1489   // It's much shorter and cleaner than intermingling with the
1490   // non-JSR normal-branch stuff occurring below.
1491   if( is_jsr ) {
1492     // compute return address as bci in Otos_i
1493     __ ld_ptr(Lmethod, Method::const_offset(), G3_scratch);
1494     __ sub(Lbcp, G3_scratch, G3_scratch);
1495     __ sub(G3_scratch, in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3), Otos_i);
1496 
1497     // Bump Lbcp to target of JSR
1498     __ add(Lbcp, O1_disp, Lbcp);
1499     // Push returnAddress for "ret" on stack
1500     __ push_ptr(Otos_i);
1501     // And away we go!
1502     __ dispatch_next(vtos);
1503     return;
1504   }
1505 
1506   // Normal (non-jsr) branch handling
1507 
1508   // Save the current Lbcp
1509   const Register l_cur_bcp = Lscratch;
1510   __ mov( Lbcp, l_cur_bcp );
1511 
1512   bool increment_invocation_counter_for_backward_branches = UseCompiler && UseLoopCounter;
1513   if ( increment_invocation_counter_for_backward_branches ) {
1514     Label Lforward;
1515     // check branch direction
1516     __ br( Assembler::positive, false,  Assembler::pn, Lforward );
1517     // Bump bytecode pointer by displacement (take the branch)
1518     __ delayed()->add( O1_disp, Lbcp, Lbcp );     // add to bc addr
1519 
1520     const Register G3_method_counters = G3_scratch;
1521     __ get_method_counters(Lmethod, G3_method_counters, Lforward);
1522

1590         __ test_invocation_counter_for_mdp(G4_invoke_ctr, G3_method_counters, G1_scratch, Lforward);
1591         if (UseOnStackReplacement) {
1592 
1593           __ test_backedge_count_for_osr(O2_bumped_count, G3_method_counters, l_cur_bcp, G1_scratch);
1594         }
1595       } else {
1596         if (UseOnStackReplacement) {
1597           __ test_backedge_count_for_osr(G4_invoke_ctr, G3_method_counters, l_cur_bcp, G1_scratch);
1598         }
1599       }
1600     }
1601 
1602     __ bind(Lforward);
1603   } else
1604     // Bump bytecode pointer by displacement (take the branch)
1605     __ add( O1_disp, Lbcp, Lbcp );// add to bc addr
1606 
1607   // continue with bytecode @ target
1608   // %%%%% Like Intel, could speed things up by moving bytecode fetch to code above,
1609   // %%%%% and changing dispatch_next to dispatch_only
1610   __ dispatch_next(vtos);
1611 }
1612 
1613 
1614 // Note Condition in argument is TemplateTable::Condition
1615 // arg scope is within class scope
1616 
1617 void TemplateTable::if_0cmp(Condition cc) {
1618   // no pointers, integer only!
1619   transition(itos, vtos);
1620   // assume branch is more often taken than not (loops use backward branches)
1621   __ cmp( Otos_i, 0);
1622   __ if_cmp(ccNot(cc), false);
1623 }
1624 
1625 
1626 void TemplateTable::if_icmp(Condition cc) {
1627   transition(itos, vtos);
1628   __ pop_i(O1);
1629   __ cmp(O1, Otos_i);
1630   __ if_cmp(ccNot(cc), false);

1659   // jsr result was labeled as an 'itos' not an 'atos' because we cannot GC
1660   // the result.  The return address (really a BCI) was stored with an
1661   // 'astore' because JVM specs claim it's a pointer-sized thing.  Hence in
1662   // the 64-bit build the 32-bit BCI is actually in the low bits of a 64-bit
1663   // loaded value.
1664   { Label zzz ;
1665      __ set (65536, G3_scratch) ;
1666      __ cmp (Otos_i, G3_scratch) ;
1667      __ bp( Assembler::lessEqualUnsigned, false, Assembler::xcc, Assembler::pn, zzz);
1668      __ delayed()->nop();
1669      __ stop("BCI is in the wrong register half?");
1670      __ bind (zzz) ;
1671   }
1672 #endif
1673 
1674   __ profile_ret(vtos, Otos_i, G4_scratch);
1675 
1676   __ ld_ptr(Lmethod, Method::const_offset(), G3_scratch);
1677   __ add(G3_scratch, Otos_i, G3_scratch);
1678   __ add(G3_scratch, in_bytes(ConstMethod::codes_offset()), Lbcp);
1679   __ dispatch_next(vtos);
1680 }
1681 
1682 
1683 void TemplateTable::wide_ret() {
1684   transition(vtos, vtos);
1685   locals_index_wide(G3_scratch);
1686   __ access_local_returnAddress(G3_scratch, Otos_i);
1687   // Otos_i contains the bci, compute the bcp from that
1688 
1689   __ profile_ret(vtos, Otos_i, G4_scratch);
1690 
1691   __ ld_ptr(Lmethod, Method::const_offset(), G3_scratch);
1692   __ add(G3_scratch, Otos_i, G3_scratch);
1693   __ add(G3_scratch, in_bytes(ConstMethod::codes_offset()), Lbcp);
1694   __ dispatch_next(vtos);
1695 }
1696 
1697 
1698 void TemplateTable::tableswitch() {
1699   transition(itos, vtos);
1700   Label default_case, continue_execution;
1701 
1702   // align bcp
1703   __ add(Lbcp, BytesPerInt, O1);
1704   __ and3(O1, -BytesPerInt, O1);
1705   // load lo, hi
1706   __ ld(O1, 1 * BytesPerInt, O2);       // Low Byte
1707   __ ld(O1, 2 * BytesPerInt, O3);       // High Byte
1708   // Sign extend the 32 bits
1709   __ sra ( Otos_i, 0, Otos_i );
1710 
1711   // check against lo & hi
1712   __ cmp( Otos_i, O2);
1713   __ br( Assembler::less, false, Assembler::pn, default_case);
1714   __ delayed()->cmp( Otos_i, O3 );
1715   __ br( Assembler::greater, false, Assembler::pn, default_case);
1716   // lookup dispatch offset
1717   __ delayed()->sub(Otos_i, O2, O2);
1718   __ profile_switch_case(O2, O3, G3_scratch, G4_scratch);
1719   __ sll(O2, LogBytesPerInt, O2);
1720   __ add(O2, 3 * BytesPerInt, O2);
1721   __ ba(continue_execution);
1722   __ delayed()->ld(O1, O2, O2);
1723   // handle default
1724   __ bind(default_case);
1725   __ profile_switch_default(O3);
1726   __ ld(O1, 0, O2); // get default offset
1727   // continue execution
1728   __ bind(continue_execution);
1729   __ add(Lbcp, O2, Lbcp);
1730   __ dispatch_next(vtos);
1731 }
1732 
1733 
1734 void TemplateTable::lookupswitch() {
1735   transition(itos, itos);
1736   __ stop("lookupswitch bytecode should have been rewritten");
1737 }
1738 
1739 void TemplateTable::fast_linearswitch() {
1740   transition(itos, vtos);
1741     Label loop_entry, loop, found, continue_execution;
1742   // align bcp
1743   __ add(Lbcp, BytesPerInt, O1);
1744   __ and3(O1, -BytesPerInt, O1);
1745  // set counter
1746   __ ld(O1, BytesPerInt, O2);
1747   __ sll(O2, LogBytesPerInt + 1, O2); // in word-pairs
1748   __ add(O1, 2 * BytesPerInt, O3); // set first pair addr
1749   __ ba(loop_entry);
1750   __ delayed()->add(O3, O2, O2); // counter now points past last pair

1762   __ delayed()->ld(O3, 0, O4);
1763 
1764   // default case
1765   __ ld(O1, 0, O4); // get default offset
1766   if (ProfileInterpreter) {
1767     __ profile_switch_default(O3);
1768     __ ba_short(continue_execution);
1769   }
1770 
1771   // entry found -> get offset
1772   __ bind(found);
1773   if (ProfileInterpreter) {
1774     __ sub(O3, O1, O3);
1775     __ sub(O3, 2*BytesPerInt, O3);
1776     __ srl(O3, LogBytesPerInt + 1, O3); // in word-pairs
1777     __ profile_switch_case(O3, O1, O2, G3_scratch);
1778 
1779     __ bind(continue_execution);
1780   }
1781   __ add(Lbcp, O4, Lbcp);
1782   __ dispatch_next(vtos);
1783 }
1784 
1785 
1786 void TemplateTable::fast_binaryswitch() {
1787   transition(itos, vtos);
1788   // Implementation using the following core algorithm: (copied from Intel)
1789   //
1790   // int binary_search(int key, LookupswitchPair* array, int n) {
1791   //   // Binary search according to "Methodik des Programmierens" by
1792   //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
1793   //   int i = 0;
1794   //   int j = n;
1795   //   while (i+1 < j) {
1796   //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
1797   //     // with      Q: for all i: 0 <= i < n: key < a[i]
1798   //     // where a stands for the array and assuming that the (inexisting)
1799   //     // element a[n] is infinitely big.
1800   //     int h = (i + j) >> 1;
1801   //     // i < h < j
1802   //     if (key < array[h].fast_match()) {

1871   // (Rscratch is already in the native byte-ordering.)
1872   __ cmp( Rkey, Rscratch );
1873   __ br( Assembler::notEqual, true, Assembler::pn, default_case );
1874   __ delayed()->ld( Rarray, -2 * BytesPerInt, Rj ); // load default offset -> j
1875 
1876   // entry found -> j = offset
1877   __ inc( Ri, BytesPerInt );
1878   __ profile_switch_case(Rh, Rj, Rscratch, Rkey);
1879   __ ld( Rarray, Ri, Rj );
1880   // (Rj is already in the native byte-ordering.)
1881 
1882   if (ProfileInterpreter) {
1883     __ ba_short(continue_execution);
1884   }
1885 
1886   __ bind(default_case); // fall through (if not profiling)
1887   __ profile_switch_default(Ri);
1888 
1889   __ bind(continue_execution);
1890   __ add( Lbcp, Rj, Lbcp );
1891   __ dispatch_next( vtos );
1892 }
1893 
1894 
1895 void TemplateTable::_return(TosState state) {
1896   transition(state, state);
1897   assert(_desc->calls_vm(), "inconsistent calls_vm information");












1898 
1899   if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
1900     assert(state == vtos, "only valid state");
1901     __ mov(G0, G3_scratch);
1902     __ access_local_ptr(G3_scratch, Otos_i);
1903     __ load_klass(Otos_i, O2);
1904     __ set(JVM_ACC_HAS_FINALIZER, G3);
1905     __ ld(O2, in_bytes(Klass::access_flags_offset()), O2);
1906     __ andcc(G3, O2, G0);
1907     Label skip_register_finalizer;
1908     __ br(Assembler::zero, false, Assembler::pn, skip_register_finalizer);
1909     __ delayed()->nop();
1910 
1911     // Call out to do finalizer registration
1912     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), Otos_i);
1913 
1914     __ bind(skip_register_finalizer);
1915   }
1916 
1917   // Narrow result if state is itos but result type is smaller.

1482 
1483   // get (wide) offset to O1_disp
1484   const Register O1_disp = O1;
1485   if (is_wide)  __ get_4_byte_integer_at_bcp( 1,  G4_scratch, O1_disp,                                    InterpreterMacroAssembler::set_CC);
1486   else          __ get_2_byte_integer_at_bcp( 1,  G4_scratch, O1_disp, InterpreterMacroAssembler::Signed, InterpreterMacroAssembler::set_CC);
1487 
1488   // Handle all the JSR stuff here, then exit.
1489   // It's much shorter and cleaner than intermingling with the
1490   // non-JSR normal-branch stuff occurring below.
1491   if( is_jsr ) {
1492     // compute return address as bci in Otos_i
1493     __ ld_ptr(Lmethod, Method::const_offset(), G3_scratch);
1494     __ sub(Lbcp, G3_scratch, G3_scratch);
1495     __ sub(G3_scratch, in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3), Otos_i);
1496 
1497     // Bump Lbcp to target of JSR
1498     __ add(Lbcp, O1_disp, Lbcp);
1499     // Push returnAddress for "ret" on stack
1500     __ push_ptr(Otos_i);
1501     // And away we go!
1502     __ dispatch_next(vtos, 0, true);
1503     return;
1504   }
1505 
1506   // Normal (non-jsr) branch handling
1507 
1508   // Save the current Lbcp
1509   const Register l_cur_bcp = Lscratch;
1510   __ mov( Lbcp, l_cur_bcp );
1511 
1512   bool increment_invocation_counter_for_backward_branches = UseCompiler && UseLoopCounter;
1513   if ( increment_invocation_counter_for_backward_branches ) {
1514     Label Lforward;
1515     // check branch direction
1516     __ br( Assembler::positive, false,  Assembler::pn, Lforward );
1517     // Bump bytecode pointer by displacement (take the branch)
1518     __ delayed()->add( O1_disp, Lbcp, Lbcp );     // add to bc addr
1519 
1520     const Register G3_method_counters = G3_scratch;
1521     __ get_method_counters(Lmethod, G3_method_counters, Lforward);
1522

1590         __ test_invocation_counter_for_mdp(G4_invoke_ctr, G3_method_counters, G1_scratch, Lforward);
1591         if (UseOnStackReplacement) {
1592 
1593           __ test_backedge_count_for_osr(O2_bumped_count, G3_method_counters, l_cur_bcp, G1_scratch);
1594         }
1595       } else {
1596         if (UseOnStackReplacement) {
1597           __ test_backedge_count_for_osr(G4_invoke_ctr, G3_method_counters, l_cur_bcp, G1_scratch);
1598         }
1599       }
1600     }
1601 
1602     __ bind(Lforward);
1603   } else
1604     // Bump bytecode pointer by displacement (take the branch)
1605     __ add( O1_disp, Lbcp, Lbcp );// add to bc addr
1606 
1607   // continue with bytecode @ target
1608   // %%%%% Like Intel, could speed things up by moving bytecode fetch to code above,
1609   // %%%%% and changing dispatch_next to dispatch_only
1610   __ dispatch_next(vtos, 0, true);
1611 }
1612 
1613 
1614 // Note Condition in argument is TemplateTable::Condition
1615 // arg scope is within class scope
1616 
1617 void TemplateTable::if_0cmp(Condition cc) {
1618   // no pointers, integer only!
1619   transition(itos, vtos);
1620   // assume branch is more often taken than not (loops use backward branches)
1621   __ cmp( Otos_i, 0);
1622   __ if_cmp(ccNot(cc), false);
1623 }
1624 
1625 
1626 void TemplateTable::if_icmp(Condition cc) {
1627   transition(itos, vtos);
1628   __ pop_i(O1);
1629   __ cmp(O1, Otos_i);
1630   __ if_cmp(ccNot(cc), false);

1659   // jsr result was labeled as an 'itos' not an 'atos' because we cannot GC
1660   // the result.  The return address (really a BCI) was stored with an
1661   // 'astore' because JVM specs claim it's a pointer-sized thing.  Hence in
1662   // the 64-bit build the 32-bit BCI is actually in the low bits of a 64-bit
1663   // loaded value.
1664   { Label zzz ;
1665      __ set (65536, G3_scratch) ;
1666      __ cmp (Otos_i, G3_scratch) ;
1667      __ bp( Assembler::lessEqualUnsigned, false, Assembler::xcc, Assembler::pn, zzz);
1668      __ delayed()->nop();
1669      __ stop("BCI is in the wrong register half?");
1670      __ bind (zzz) ;
1671   }
1672 #endif
1673 
1674   __ profile_ret(vtos, Otos_i, G4_scratch);
1675 
1676   __ ld_ptr(Lmethod, Method::const_offset(), G3_scratch);
1677   __ add(G3_scratch, Otos_i, G3_scratch);
1678   __ add(G3_scratch, in_bytes(ConstMethod::codes_offset()), Lbcp);
1679   __ dispatch_next(vtos, 0, true);
1680 }
1681 
1682 
1683 void TemplateTable::wide_ret() {
1684   transition(vtos, vtos);
1685   locals_index_wide(G3_scratch);
1686   __ access_local_returnAddress(G3_scratch, Otos_i);
1687   // Otos_i contains the bci, compute the bcp from that
1688 
1689   __ profile_ret(vtos, Otos_i, G4_scratch);
1690 
1691   __ ld_ptr(Lmethod, Method::const_offset(), G3_scratch);
1692   __ add(G3_scratch, Otos_i, G3_scratch);
1693   __ add(G3_scratch, in_bytes(ConstMethod::codes_offset()), Lbcp);
1694   __ dispatch_next(vtos, 0, true);
1695 }
1696 
1697 
1698 void TemplateTable::tableswitch() {
1699   transition(itos, vtos);
1700   Label default_case, continue_execution;
1701 
1702   // align bcp
1703   __ add(Lbcp, BytesPerInt, O1);
1704   __ and3(O1, -BytesPerInt, O1);
1705   // load lo, hi
1706   __ ld(O1, 1 * BytesPerInt, O2);       // Low Byte
1707   __ ld(O1, 2 * BytesPerInt, O3);       // High Byte
1708   // Sign extend the 32 bits
1709   __ sra ( Otos_i, 0, Otos_i );
1710 
1711   // check against lo & hi
1712   __ cmp( Otos_i, O2);
1713   __ br( Assembler::less, false, Assembler::pn, default_case);
1714   __ delayed()->cmp( Otos_i, O3 );
1715   __ br( Assembler::greater, false, Assembler::pn, default_case);
1716   // lookup dispatch offset
1717   __ delayed()->sub(Otos_i, O2, O2);
1718   __ profile_switch_case(O2, O3, G3_scratch, G4_scratch);
1719   __ sll(O2, LogBytesPerInt, O2);
1720   __ add(O2, 3 * BytesPerInt, O2);
1721   __ ba(continue_execution);
1722   __ delayed()->ld(O1, O2, O2);
1723   // handle default
1724   __ bind(default_case);
1725   __ profile_switch_default(O3);
1726   __ ld(O1, 0, O2); // get default offset
1727   // continue execution
1728   __ bind(continue_execution);
1729   __ add(Lbcp, O2, Lbcp);
1730   __ dispatch_next(vtos, 0, true);
1731 }
1732 
1733 
1734 void TemplateTable::lookupswitch() {
1735   transition(itos, itos);
1736   __ stop("lookupswitch bytecode should have been rewritten");
1737 }
1738 
1739 void TemplateTable::fast_linearswitch() {
1740   transition(itos, vtos);
1741     Label loop_entry, loop, found, continue_execution;
1742   // align bcp
1743   __ add(Lbcp, BytesPerInt, O1);
1744   __ and3(O1, -BytesPerInt, O1);
1745  // set counter
1746   __ ld(O1, BytesPerInt, O2);
1747   __ sll(O2, LogBytesPerInt + 1, O2); // in word-pairs
1748   __ add(O1, 2 * BytesPerInt, O3); // set first pair addr
1749   __ ba(loop_entry);
1750   __ delayed()->add(O3, O2, O2); // counter now points past last pair

1762   __ delayed()->ld(O3, 0, O4);
1763 
1764   // default case
1765   __ ld(O1, 0, O4); // get default offset
1766   if (ProfileInterpreter) {
1767     __ profile_switch_default(O3);
1768     __ ba_short(continue_execution);
1769   }
1770 
1771   // entry found -> get offset
1772   __ bind(found);
1773   if (ProfileInterpreter) {
1774     __ sub(O3, O1, O3);
1775     __ sub(O3, 2*BytesPerInt, O3);
1776     __ srl(O3, LogBytesPerInt + 1, O3); // in word-pairs
1777     __ profile_switch_case(O3, O1, O2, G3_scratch);
1778 
1779     __ bind(continue_execution);
1780   }
1781   __ add(Lbcp, O4, Lbcp);
1782   __ dispatch_next(vtos, 0, true);
1783 }
1784 
1785 
1786 void TemplateTable::fast_binaryswitch() {
1787   transition(itos, vtos);
1788   // Implementation using the following core algorithm: (copied from Intel)
1789   //
1790   // int binary_search(int key, LookupswitchPair* array, int n) {
1791   //   // Binary search according to "Methodik des Programmierens" by
1792   //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
1793   //   int i = 0;
1794   //   int j = n;
1795   //   while (i+1 < j) {
1796   //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
1797   //     // with      Q: for all i: 0 <= i < n: key < a[i]
1798   //     // where a stands for the array and assuming that the (inexisting)
1799   //     // element a[n] is infinitely big.
1800   //     int h = (i + j) >> 1;
1801   //     // i < h < j
1802   //     if (key < array[h].fast_match()) {

1871   // (Rscratch is already in the native byte-ordering.)
1872   __ cmp( Rkey, Rscratch );
1873   __ br( Assembler::notEqual, true, Assembler::pn, default_case );
1874   __ delayed()->ld( Rarray, -2 * BytesPerInt, Rj ); // load default offset -> j
1875 
1876   // entry found -> j = offset
1877   __ inc( Ri, BytesPerInt );
1878   __ profile_switch_case(Rh, Rj, Rscratch, Rkey);
1879   __ ld( Rarray, Ri, Rj );
1880   // (Rj is already in the native byte-ordering.)
1881 
1882   if (ProfileInterpreter) {
1883     __ ba_short(continue_execution);
1884   }
1885 
1886   __ bind(default_case); // fall through (if not profiling)
1887   __ profile_switch_default(Ri);
1888 
1889   __ bind(continue_execution);
1890   __ add( Lbcp, Rj, Lbcp );
1891   __ dispatch_next(vtos, 0, true);
1892 }
1893 
1894 
1895 void TemplateTable::_return(TosState state) {
1896   transition(state, state);
1897   assert(_desc->calls_vm(), "inconsistent calls_vm information");
1898 
1899   if (SafepointMechanism::uses_thread_local_poll()) {
1900     Label no_safepoint;
1901     __ ldx(Address(G2_thread, Thread::polling_page_offset()), G3_scratch, 0);
1902     __ btst(SafepointMechanism::poll_bit(), G3_scratch);
1903     __ br(Assembler::zero, false, Assembler::pt, no_safepoint);
1904     __ delayed()->nop();
1905     __ push(state); //?
1906     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint));
1907     __ pop(state); //?
1908     __ bind(no_safepoint);
1909   }
1910 
1911   if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
1912     assert(state == vtos, "only valid state");
1913     __ mov(G0, G3_scratch);
1914     __ access_local_ptr(G3_scratch, Otos_i);
1915     __ load_klass(Otos_i, O2);
1916     __ set(JVM_ACC_HAS_FINALIZER, G3);
1917     __ ld(O2, in_bytes(Klass::access_flags_offset()), O2);
1918     __ andcc(G3, O2, G0);
1919     Label skip_register_finalizer;
1920     __ br(Assembler::zero, false, Assembler::pn, skip_register_finalizer);
1921     __ delayed()->nop();
1922 
1923     // Call out to do finalizer registration
1924     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), Otos_i);
1925 
1926     __ bind(skip_register_finalizer);
1927   }
1928 
1929   // Narrow result if state is itos but result type is smaller.

< prev index next >