1482
1483 // get (wide) offset to O1_disp
1484 const Register O1_disp = O1;
1485 if (is_wide) __ get_4_byte_integer_at_bcp( 1, G4_scratch, O1_disp, InterpreterMacroAssembler::set_CC);
1486 else __ get_2_byte_integer_at_bcp( 1, G4_scratch, O1_disp, InterpreterMacroAssembler::Signed, InterpreterMacroAssembler::set_CC);
1487
1488 // Handle all the JSR stuff here, then exit.
1489 // It's much shorter and cleaner than intermingling with the
1490 // non-JSR normal-branch stuff occurring below.
1491 if( is_jsr ) {
1492 // compute return address as bci in Otos_i
1493 __ ld_ptr(Lmethod, Method::const_offset(), G3_scratch);
1494 __ sub(Lbcp, G3_scratch, G3_scratch);
1495 __ sub(G3_scratch, in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3), Otos_i);
1496
1497 // Bump Lbcp to target of JSR
1498 __ add(Lbcp, O1_disp, Lbcp);
1499 // Push returnAddress for "ret" on stack
1500 __ push_ptr(Otos_i);
1501 // And away we go!
1502 __ dispatch_next(vtos);
1503 return;
1504 }
1505
1506 // Normal (non-jsr) branch handling
1507
1508 // Save the current Lbcp
1509 const Register l_cur_bcp = Lscratch;
1510 __ mov( Lbcp, l_cur_bcp );
1511
1512 bool increment_invocation_counter_for_backward_branches = UseCompiler && UseLoopCounter;
1513 if ( increment_invocation_counter_for_backward_branches ) {
1514 Label Lforward;
1515 // check branch direction
1516 __ br( Assembler::positive, false, Assembler::pn, Lforward );
1517 // Bump bytecode pointer by displacement (take the branch)
1518 __ delayed()->add( O1_disp, Lbcp, Lbcp ); // add to bc addr
1519
1520 const Register G3_method_counters = G3_scratch;
1521 __ get_method_counters(Lmethod, G3_method_counters, Lforward);
1522
1590 __ test_invocation_counter_for_mdp(G4_invoke_ctr, G3_method_counters, G1_scratch, Lforward);
1591 if (UseOnStackReplacement) {
1592
1593 __ test_backedge_count_for_osr(O2_bumped_count, G3_method_counters, l_cur_bcp, G1_scratch);
1594 }
1595 } else {
1596 if (UseOnStackReplacement) {
1597 __ test_backedge_count_for_osr(G4_invoke_ctr, G3_method_counters, l_cur_bcp, G1_scratch);
1598 }
1599 }
1600 }
1601
1602 __ bind(Lforward);
1603 } else
1604 // Bump bytecode pointer by displacement (take the branch)
1605 __ add( O1_disp, Lbcp, Lbcp );// add to bc addr
1606
1607 // continue with bytecode @ target
1608 // %%%%% Like Intel, could speed things up by moving bytecode fetch to code above,
1609 // %%%%% and changing dispatch_next to dispatch_only
1610 __ dispatch_next(vtos);
1611 }
1612
1613
1614 // Note Condition in argument is TemplateTable::Condition
1615 // arg scope is within class scope
1616
1617 void TemplateTable::if_0cmp(Condition cc) {
1618 // no pointers, integer only!
1619 transition(itos, vtos);
1620 // assume branch is more often taken than not (loops use backward branches)
1621 __ cmp( Otos_i, 0);
1622 __ if_cmp(ccNot(cc), false);
1623 }
1624
1625
1626 void TemplateTable::if_icmp(Condition cc) {
1627 transition(itos, vtos);
1628 __ pop_i(O1);
1629 __ cmp(O1, Otos_i);
1630 __ if_cmp(ccNot(cc), false);
1659 // jsr result was labeled as an 'itos' not an 'atos' because we cannot GC
1660 // the result. The return address (really a BCI) was stored with an
1661 // 'astore' because JVM specs claim it's a pointer-sized thing. Hence in
1662 // the 64-bit build the 32-bit BCI is actually in the low bits of a 64-bit
1663 // loaded value.
1664 { Label zzz ;
1665 __ set (65536, G3_scratch) ;
1666 __ cmp (Otos_i, G3_scratch) ;
1667 __ bp( Assembler::lessEqualUnsigned, false, Assembler::xcc, Assembler::pn, zzz);
1668 __ delayed()->nop();
1669 __ stop("BCI is in the wrong register half?");
1670 __ bind (zzz) ;
1671 }
1672 #endif
1673
1674 __ profile_ret(vtos, Otos_i, G4_scratch);
1675
1676 __ ld_ptr(Lmethod, Method::const_offset(), G3_scratch);
1677 __ add(G3_scratch, Otos_i, G3_scratch);
1678 __ add(G3_scratch, in_bytes(ConstMethod::codes_offset()), Lbcp);
1679 __ dispatch_next(vtos);
1680 }
1681
1682
1683 void TemplateTable::wide_ret() {
1684 transition(vtos, vtos);
1685 locals_index_wide(G3_scratch);
1686 __ access_local_returnAddress(G3_scratch, Otos_i);
1687 // Otos_i contains the bci, compute the bcp from that
1688
1689 __ profile_ret(vtos, Otos_i, G4_scratch);
1690
1691 __ ld_ptr(Lmethod, Method::const_offset(), G3_scratch);
1692 __ add(G3_scratch, Otos_i, G3_scratch);
1693 __ add(G3_scratch, in_bytes(ConstMethod::codes_offset()), Lbcp);
1694 __ dispatch_next(vtos);
1695 }
1696
1697
1698 void TemplateTable::tableswitch() {
1699 transition(itos, vtos);
1700 Label default_case, continue_execution;
1701
1702 // align bcp
1703 __ add(Lbcp, BytesPerInt, O1);
1704 __ and3(O1, -BytesPerInt, O1);
1705 // load lo, hi
1706 __ ld(O1, 1 * BytesPerInt, O2); // Low Byte
1707 __ ld(O1, 2 * BytesPerInt, O3); // High Byte
1708 // Sign extend the 32 bits
1709 __ sra ( Otos_i, 0, Otos_i );
1710
1711 // check against lo & hi
1712 __ cmp( Otos_i, O2);
1713 __ br( Assembler::less, false, Assembler::pn, default_case);
1714 __ delayed()->cmp( Otos_i, O3 );
1715 __ br( Assembler::greater, false, Assembler::pn, default_case);
1716 // lookup dispatch offset
1717 __ delayed()->sub(Otos_i, O2, O2);
1718 __ profile_switch_case(O2, O3, G3_scratch, G4_scratch);
1719 __ sll(O2, LogBytesPerInt, O2);
1720 __ add(O2, 3 * BytesPerInt, O2);
1721 __ ba(continue_execution);
1722 __ delayed()->ld(O1, O2, O2);
1723 // handle default
1724 __ bind(default_case);
1725 __ profile_switch_default(O3);
1726 __ ld(O1, 0, O2); // get default offset
1727 // continue execution
1728 __ bind(continue_execution);
1729 __ add(Lbcp, O2, Lbcp);
1730 __ dispatch_next(vtos);
1731 }
1732
1733
1734 void TemplateTable::lookupswitch() {
1735 transition(itos, itos);
1736 __ stop("lookupswitch bytecode should have been rewritten");
1737 }
1738
1739 void TemplateTable::fast_linearswitch() {
1740 transition(itos, vtos);
1741 Label loop_entry, loop, found, continue_execution;
1742 // align bcp
1743 __ add(Lbcp, BytesPerInt, O1);
1744 __ and3(O1, -BytesPerInt, O1);
1745 // set counter
1746 __ ld(O1, BytesPerInt, O2);
1747 __ sll(O2, LogBytesPerInt + 1, O2); // in word-pairs
1748 __ add(O1, 2 * BytesPerInt, O3); // set first pair addr
1749 __ ba(loop_entry);
1750 __ delayed()->add(O3, O2, O2); // counter now points past last pair
1762 __ delayed()->ld(O3, 0, O4);
1763
1764 // default case
1765 __ ld(O1, 0, O4); // get default offset
1766 if (ProfileInterpreter) {
1767 __ profile_switch_default(O3);
1768 __ ba_short(continue_execution);
1769 }
1770
1771 // entry found -> get offset
1772 __ bind(found);
1773 if (ProfileInterpreter) {
1774 __ sub(O3, O1, O3);
1775 __ sub(O3, 2*BytesPerInt, O3);
1776 __ srl(O3, LogBytesPerInt + 1, O3); // in word-pairs
1777 __ profile_switch_case(O3, O1, O2, G3_scratch);
1778
1779 __ bind(continue_execution);
1780 }
1781 __ add(Lbcp, O4, Lbcp);
1782 __ dispatch_next(vtos);
1783 }
1784
1785
1786 void TemplateTable::fast_binaryswitch() {
1787 transition(itos, vtos);
1788 // Implementation using the following core algorithm: (copied from Intel)
1789 //
1790 // int binary_search(int key, LookupswitchPair* array, int n) {
1791 // // Binary search according to "Methodik des Programmierens" by
1792 // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
1793 // int i = 0;
1794 // int j = n;
1795 // while (i+1 < j) {
1796 // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
1797 // // with Q: for all i: 0 <= i < n: key < a[i]
1798 // // where a stands for the array and assuming that the (inexisting)
1799 // // element a[n] is infinitely big.
1800 // int h = (i + j) >> 1;
1801 // // i < h < j
1802 // if (key < array[h].fast_match()) {
1871 // (Rscratch is already in the native byte-ordering.)
1872 __ cmp( Rkey, Rscratch );
1873 __ br( Assembler::notEqual, true, Assembler::pn, default_case );
1874 __ delayed()->ld( Rarray, -2 * BytesPerInt, Rj ); // load default offset -> j
1875
1876 // entry found -> j = offset
1877 __ inc( Ri, BytesPerInt );
1878 __ profile_switch_case(Rh, Rj, Rscratch, Rkey);
1879 __ ld( Rarray, Ri, Rj );
1880 // (Rj is already in the native byte-ordering.)
1881
1882 if (ProfileInterpreter) {
1883 __ ba_short(continue_execution);
1884 }
1885
1886 __ bind(default_case); // fall through (if not profiling)
1887 __ profile_switch_default(Ri);
1888
1889 __ bind(continue_execution);
1890 __ add( Lbcp, Rj, Lbcp );
1891 __ dispatch_next( vtos );
1892 }
1893
1894
1895 void TemplateTable::_return(TosState state) {
1896 transition(state, state);
1897 assert(_desc->calls_vm(), "inconsistent calls_vm information");
1898
1899 if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
1900 assert(state == vtos, "only valid state");
1901 __ mov(G0, G3_scratch);
1902 __ access_local_ptr(G3_scratch, Otos_i);
1903 __ load_klass(Otos_i, O2);
1904 __ set(JVM_ACC_HAS_FINALIZER, G3);
1905 __ ld(O2, in_bytes(Klass::access_flags_offset()), O2);
1906 __ andcc(G3, O2, G0);
1907 Label skip_register_finalizer;
1908 __ br(Assembler::zero, false, Assembler::pn, skip_register_finalizer);
1909 __ delayed()->nop();
1910
1911 // Call out to do finalizer registration
1912 __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), Otos_i);
1913
1914 __ bind(skip_register_finalizer);
1915 }
1916
1917 // Narrow result if state is itos but result type is smaller.
|
1482
1483 // get (wide) offset to O1_disp
1484 const Register O1_disp = O1;
1485 if (is_wide) __ get_4_byte_integer_at_bcp( 1, G4_scratch, O1_disp, InterpreterMacroAssembler::set_CC);
1486 else __ get_2_byte_integer_at_bcp( 1, G4_scratch, O1_disp, InterpreterMacroAssembler::Signed, InterpreterMacroAssembler::set_CC);
1487
1488 // Handle all the JSR stuff here, then exit.
1489 // It's much shorter and cleaner than intermingling with the
1490 // non-JSR normal-branch stuff occurring below.
1491 if( is_jsr ) {
1492 // compute return address as bci in Otos_i
1493 __ ld_ptr(Lmethod, Method::const_offset(), G3_scratch);
1494 __ sub(Lbcp, G3_scratch, G3_scratch);
1495 __ sub(G3_scratch, in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3), Otos_i);
1496
1497 // Bump Lbcp to target of JSR
1498 __ add(Lbcp, O1_disp, Lbcp);
1499 // Push returnAddress for "ret" on stack
1500 __ push_ptr(Otos_i);
1501 // And away we go!
1502 __ dispatch_next(vtos, 0, true);
1503 return;
1504 }
1505
1506 // Normal (non-jsr) branch handling
1507
1508 // Save the current Lbcp
1509 const Register l_cur_bcp = Lscratch;
1510 __ mov( Lbcp, l_cur_bcp );
1511
1512 bool increment_invocation_counter_for_backward_branches = UseCompiler && UseLoopCounter;
1513 if ( increment_invocation_counter_for_backward_branches ) {
1514 Label Lforward;
1515 // check branch direction
1516 __ br( Assembler::positive, false, Assembler::pn, Lforward );
1517 // Bump bytecode pointer by displacement (take the branch)
1518 __ delayed()->add( O1_disp, Lbcp, Lbcp ); // add to bc addr
1519
1520 const Register G3_method_counters = G3_scratch;
1521 __ get_method_counters(Lmethod, G3_method_counters, Lforward);
1522
1590 __ test_invocation_counter_for_mdp(G4_invoke_ctr, G3_method_counters, G1_scratch, Lforward);
1591 if (UseOnStackReplacement) {
1592
1593 __ test_backedge_count_for_osr(O2_bumped_count, G3_method_counters, l_cur_bcp, G1_scratch);
1594 }
1595 } else {
1596 if (UseOnStackReplacement) {
1597 __ test_backedge_count_for_osr(G4_invoke_ctr, G3_method_counters, l_cur_bcp, G1_scratch);
1598 }
1599 }
1600 }
1601
1602 __ bind(Lforward);
1603 } else
1604 // Bump bytecode pointer by displacement (take the branch)
1605 __ add( O1_disp, Lbcp, Lbcp );// add to bc addr
1606
1607 // continue with bytecode @ target
1608 // %%%%% Like Intel, could speed things up by moving bytecode fetch to code above,
1609 // %%%%% and changing dispatch_next to dispatch_only
1610 __ dispatch_next(vtos, 0, true);
1611 }
1612
1613
1614 // Note Condition in argument is TemplateTable::Condition
1615 // arg scope is within class scope
1616
1617 void TemplateTable::if_0cmp(Condition cc) {
1618 // no pointers, integer only!
1619 transition(itos, vtos);
1620 // assume branch is more often taken than not (loops use backward branches)
1621 __ cmp( Otos_i, 0);
1622 __ if_cmp(ccNot(cc), false);
1623 }
1624
1625
1626 void TemplateTable::if_icmp(Condition cc) {
1627 transition(itos, vtos);
1628 __ pop_i(O1);
1629 __ cmp(O1, Otos_i);
1630 __ if_cmp(ccNot(cc), false);
1659 // jsr result was labeled as an 'itos' not an 'atos' because we cannot GC
1660 // the result. The return address (really a BCI) was stored with an
1661 // 'astore' because JVM specs claim it's a pointer-sized thing. Hence in
1662 // the 64-bit build the 32-bit BCI is actually in the low bits of a 64-bit
1663 // loaded value.
1664 { Label zzz ;
1665 __ set (65536, G3_scratch) ;
1666 __ cmp (Otos_i, G3_scratch) ;
1667 __ bp( Assembler::lessEqualUnsigned, false, Assembler::xcc, Assembler::pn, zzz);
1668 __ delayed()->nop();
1669 __ stop("BCI is in the wrong register half?");
1670 __ bind (zzz) ;
1671 }
1672 #endif
1673
1674 __ profile_ret(vtos, Otos_i, G4_scratch);
1675
1676 __ ld_ptr(Lmethod, Method::const_offset(), G3_scratch);
1677 __ add(G3_scratch, Otos_i, G3_scratch);
1678 __ add(G3_scratch, in_bytes(ConstMethod::codes_offset()), Lbcp);
1679 __ dispatch_next(vtos, 0, true);
1680 }
1681
1682
1683 void TemplateTable::wide_ret() {
1684 transition(vtos, vtos);
1685 locals_index_wide(G3_scratch);
1686 __ access_local_returnAddress(G3_scratch, Otos_i);
1687 // Otos_i contains the bci, compute the bcp from that
1688
1689 __ profile_ret(vtos, Otos_i, G4_scratch);
1690
1691 __ ld_ptr(Lmethod, Method::const_offset(), G3_scratch);
1692 __ add(G3_scratch, Otos_i, G3_scratch);
1693 __ add(G3_scratch, in_bytes(ConstMethod::codes_offset()), Lbcp);
1694 __ dispatch_next(vtos, 0, true);
1695 }
1696
1697
1698 void TemplateTable::tableswitch() {
1699 transition(itos, vtos);
1700 Label default_case, continue_execution;
1701
1702 // align bcp
1703 __ add(Lbcp, BytesPerInt, O1);
1704 __ and3(O1, -BytesPerInt, O1);
1705 // load lo, hi
1706 __ ld(O1, 1 * BytesPerInt, O2); // Low Byte
1707 __ ld(O1, 2 * BytesPerInt, O3); // High Byte
1708 // Sign extend the 32 bits
1709 __ sra ( Otos_i, 0, Otos_i );
1710
1711 // check against lo & hi
1712 __ cmp( Otos_i, O2);
1713 __ br( Assembler::less, false, Assembler::pn, default_case);
1714 __ delayed()->cmp( Otos_i, O3 );
1715 __ br( Assembler::greater, false, Assembler::pn, default_case);
1716 // lookup dispatch offset
1717 __ delayed()->sub(Otos_i, O2, O2);
1718 __ profile_switch_case(O2, O3, G3_scratch, G4_scratch);
1719 __ sll(O2, LogBytesPerInt, O2);
1720 __ add(O2, 3 * BytesPerInt, O2);
1721 __ ba(continue_execution);
1722 __ delayed()->ld(O1, O2, O2);
1723 // handle default
1724 __ bind(default_case);
1725 __ profile_switch_default(O3);
1726 __ ld(O1, 0, O2); // get default offset
1727 // continue execution
1728 __ bind(continue_execution);
1729 __ add(Lbcp, O2, Lbcp);
1730 __ dispatch_next(vtos, 0, true);
1731 }
1732
1733
1734 void TemplateTable::lookupswitch() {
1735 transition(itos, itos);
1736 __ stop("lookupswitch bytecode should have been rewritten");
1737 }
1738
1739 void TemplateTable::fast_linearswitch() {
1740 transition(itos, vtos);
1741 Label loop_entry, loop, found, continue_execution;
1742 // align bcp
1743 __ add(Lbcp, BytesPerInt, O1);
1744 __ and3(O1, -BytesPerInt, O1);
1745 // set counter
1746 __ ld(O1, BytesPerInt, O2);
1747 __ sll(O2, LogBytesPerInt + 1, O2); // in word-pairs
1748 __ add(O1, 2 * BytesPerInt, O3); // set first pair addr
1749 __ ba(loop_entry);
1750 __ delayed()->add(O3, O2, O2); // counter now points past last pair
1762 __ delayed()->ld(O3, 0, O4);
1763
1764 // default case
1765 __ ld(O1, 0, O4); // get default offset
1766 if (ProfileInterpreter) {
1767 __ profile_switch_default(O3);
1768 __ ba_short(continue_execution);
1769 }
1770
1771 // entry found -> get offset
1772 __ bind(found);
1773 if (ProfileInterpreter) {
1774 __ sub(O3, O1, O3);
1775 __ sub(O3, 2*BytesPerInt, O3);
1776 __ srl(O3, LogBytesPerInt + 1, O3); // in word-pairs
1777 __ profile_switch_case(O3, O1, O2, G3_scratch);
1778
1779 __ bind(continue_execution);
1780 }
1781 __ add(Lbcp, O4, Lbcp);
1782 __ dispatch_next(vtos, 0, true);
1783 }
1784
1785
1786 void TemplateTable::fast_binaryswitch() {
1787 transition(itos, vtos);
1788 // Implementation using the following core algorithm: (copied from Intel)
1789 //
1790 // int binary_search(int key, LookupswitchPair* array, int n) {
1791 // // Binary search according to "Methodik des Programmierens" by
1792 // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
1793 // int i = 0;
1794 // int j = n;
1795 // while (i+1 < j) {
1796 // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
1797 // // with Q: for all i: 0 <= i < n: key < a[i]
1798 // // where a stands for the array and assuming that the (inexisting)
1799 // // element a[n] is infinitely big.
1800 // int h = (i + j) >> 1;
1801 // // i < h < j
1802 // if (key < array[h].fast_match()) {
1871 // (Rscratch is already in the native byte-ordering.)
1872 __ cmp( Rkey, Rscratch );
1873 __ br( Assembler::notEqual, true, Assembler::pn, default_case );
1874 __ delayed()->ld( Rarray, -2 * BytesPerInt, Rj ); // load default offset -> j
1875
1876 // entry found -> j = offset
1877 __ inc( Ri, BytesPerInt );
1878 __ profile_switch_case(Rh, Rj, Rscratch, Rkey);
1879 __ ld( Rarray, Ri, Rj );
1880 // (Rj is already in the native byte-ordering.)
1881
1882 if (ProfileInterpreter) {
1883 __ ba_short(continue_execution);
1884 }
1885
1886 __ bind(default_case); // fall through (if not profiling)
1887 __ profile_switch_default(Ri);
1888
1889 __ bind(continue_execution);
1890 __ add( Lbcp, Rj, Lbcp );
1891 __ dispatch_next(vtos, 0, true);
1892 }
1893
1894
1895 void TemplateTable::_return(TosState state) {
1896 transition(state, state);
1897 assert(_desc->calls_vm(), "inconsistent calls_vm information");
1898
1899 if (SafepointMechanism::uses_thread_local_poll()) {
1900 Label no_safepoint;
1901 __ ldx(Address(G2_thread, Thread::polling_page_offset()), G3_scratch, 0);
1902 __ btst(SafepointMechanism::poll_bit(), G3_scratch);
1903 __ br(Assembler::zero, false, Assembler::pt, no_safepoint);
1904 __ delayed()->nop();
1905 __ push(state); //?
1906 __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint));
1907 __ pop(state); //?
1908 __ bind(no_safepoint);
1909 }
1910
1911 if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
1912 assert(state == vtos, "only valid state");
1913 __ mov(G0, G3_scratch);
1914 __ access_local_ptr(G3_scratch, Otos_i);
1915 __ load_klass(Otos_i, O2);
1916 __ set(JVM_ACC_HAS_FINALIZER, G3);
1917 __ ld(O2, in_bytes(Klass::access_flags_offset()), O2);
1918 __ andcc(G3, O2, G0);
1919 Label skip_register_finalizer;
1920 __ br(Assembler::zero, false, Assembler::pn, skip_register_finalizer);
1921 __ delayed()->nop();
1922
1923 // Call out to do finalizer registration
1924 __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), Otos_i);
1925
1926 __ bind(skip_register_finalizer);
1927 }
1928
1929 // Narrow result if state is itos but result type is smaller.
|