src/cpu/sparc/vm/templateInterpreter_sparc.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File hotspot Sdiff src/cpu/sparc/vm

src/cpu/sparc/vm/templateInterpreter_sparc.cpp

Print this page
rev 6086 : 8032410: compiler/uncommontrap/TestStackBangRbp.java times out on Solaris-Sparc V9
Summary: make compiled code bang the stack by the worst case size of the interpreter frame at deoptimization points.
Reviewed-by:


   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"

  27 #include "interpreter/bytecodeHistogram.hpp"
  28 #include "interpreter/interpreter.hpp"
  29 #include "interpreter/interpreterGenerator.hpp"
  30 #include "interpreter/interpreterRuntime.hpp"
  31 #include "interpreter/templateTable.hpp"
  32 #include "oops/arrayOop.hpp"
  33 #include "oops/methodData.hpp"
  34 #include "oops/method.hpp"
  35 #include "oops/oop.inline.hpp"
  36 #include "prims/jvmtiExport.hpp"
  37 #include "prims/jvmtiThreadState.hpp"
  38 #include "runtime/arguments.hpp"
  39 #include "runtime/deoptimization.hpp"
  40 #include "runtime/frame.inline.hpp"
  41 #include "runtime/sharedRuntime.hpp"
  42 #include "runtime/stubRoutines.hpp"
  43 #include "runtime/synchronizer.hpp"
  44 #include "runtime/timer.hpp"
  45 #include "runtime/vframeArray.hpp"
  46 #include "utilities/debug.hpp"


1509 // |               |
1510 // +---------------+ <--- fp + 0x40
1511 // |               |
1512 // : extra 7 slots :      note: these slots are not really needed for the interpreter (fix later)
1513 // |               |
1514 // +---------------+ <--- fp + 0x5c
1515 // |               |
1516 // :     free      :
1517 // |               |
1518 // +---------------+
1519 // |               |
1520 // : nonarg locals :
1521 // |               |
1522 // +---------------+
1523 // |               |
1524 // :   arguments   :
1525 // |               | <--- Llocals
1526 // +---------------+ <--- Gargs
1527 // |               |
1528 
1529 static int size_activation_helper(int callee_extra_locals, int max_stack, int monitor_size) {
1530 
1531   // Figure out the size of an interpreter frame (in words) given that we have a fully allocated
1532   // expression stack, the callee will have callee_extra_locals (so we can account for
1533   // frame extension) and monitor_size for monitors. Basically we need to calculate
1534   // this exactly like generate_fixed_frame/generate_compute_interpreter_state.
1535   //
1536   //
1537   // The big complicating thing here is that we must ensure that the stack stays properly
1538   // aligned. This would be even uglier if monitor size wasn't modulo what the stack
1539   // needs to be aligned for). We are given that the sp (fp) is already aligned by
1540   // the caller so we must ensure that it is properly aligned for our callee.
1541   //
1542   const int rounded_vm_local_words =
1543        round_to(frame::interpreter_frame_vm_local_words,WordsPerLong);
1544   // callee_locals and max_stack are counts, not the size in frame.
1545   const int locals_size =
1546        round_to(callee_extra_locals * Interpreter::stackElementWords, WordsPerLong);
1547   const int max_stack_words = max_stack * Interpreter::stackElementWords;
1548   return (round_to((max_stack_words
1549                    + rounded_vm_local_words
1550                    + frame::memory_parameter_word_sp_offset), WordsPerLong)
1551                    // already rounded
1552                    + locals_size + monitor_size);
1553 }
1554 
1555 // How much stack a method top interpreter activation needs in words.
1556 int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
1557 
1558   // See call_stub code
1559   int call_stub_size  = round_to(7 + frame::memory_parameter_word_sp_offset,
1560                                  WordsPerLong);    // 7 + register save area
1561 
1562   // Save space for one monitor to get into the interpreted method in case
1563   // the method is synchronized
1564   int monitor_size    = method->is_synchronized() ?
1565                                 1*frame::interpreter_frame_monitor_size() : 0;
1566   return size_activation_helper(method->max_locals(), method->max_stack(),
1567                                  monitor_size) + call_stub_size;
1568 }
1569 
1570 int AbstractInterpreter::layout_activation(Method* method,
1571                                            int tempcount,
1572                                            int popframe_extra_args,
1573                                            int moncount,
1574                                            int caller_actual_parameters,
1575                                            int callee_param_count,
1576                                            int callee_local_count,
1577                                            frame* caller,
1578                                            frame* interpreter_frame,
1579                                            bool is_top_frame,
1580                                            bool is_bottom_frame) {
1581   // Note: This calculation must exactly parallel the frame setup
1582   // in InterpreterGenerator::generate_fixed_frame.
1583   // If f!=NULL, set up the following variables:
1584   //   - Lmethod
1585   //   - Llocals
1586   //   - Lmonitors (to the indicated number of monitors)
1587   //   - Lesp (to the indicated number of temps)
1588   // The frame f (if not NULL) on entry is a description of the caller of the frame
1589   // we are about to layout. We are guaranteed that we will be able to fill in a
1590   // new interpreter frame as its callee (i.e. the stack space is allocated and
1591   // the amount was determined by an earlier call to this method with f == NULL).
1592   // On return f (if not NULL) while describe the interpreter frame we just layed out.
1593 
1594   int monitor_size           = moncount * frame::interpreter_frame_monitor_size();
1595   int rounded_vm_local_words = round_to(frame::interpreter_frame_vm_local_words,WordsPerLong);
1596 
1597   assert(monitor_size == round_to(monitor_size, WordsPerLong), "must align");

1598   //
1599   // Note: if you look closely this appears to be doing something much different
1600   // than generate_fixed_frame. What is happening is this. On sparc we have to do
1601   // this dance with interpreter_sp_adjustment because the window save area would
1602   // appear just below the bottom (tos) of the caller's java expression stack. Because
1603   // the interpreter want to have the locals completely contiguous generate_fixed_frame
1604   // will adjust the caller's sp for the "extra locals" (max_locals - parameter_size).
1605   // Now in generate_fixed_frame the extension of the caller's sp happens in the callee.
1606   // In this code the opposite occurs the caller adjusts it's own stack base on the callee.
1607   // This is mostly ok but it does cause a problem when we get to the initial frame (the oldest)
1608   // because the oldest frame would have adjust its callers frame and yet that frame
1609   // already exists and isn't part of this array of frames we are unpacking. So at first
1610   // glance this would seem to mess up that frame. However Deoptimization::fetch_unroll_info_helper()
1611   // will after it calculates all of the frame's on_stack_size()'s will then figure out the
1612   // amount to adjust the caller of the initial (oldest) frame and the calculation will all
1613   // add up. It does seem like it simpler to account for the adjustment here (and remove the
1614   // callee... parameters here). However this would mean that this routine would have to take
1615   // the caller frame as input so we could adjust its sp (and set it's interpreter_sp_adjustment)
1616   // and run the calling loop in the reverse order. This would also would appear to mean making
1617   // this code aware of what the interactions are when that initial caller fram was an osr or
1618   // other adapter frame. deoptimization is complicated enough and  hard enough to debug that
1619   // there is no sense in messing working code.
1620   //
1621 
1622   int rounded_cls = round_to((callee_local_count - callee_param_count), WordsPerLong);
1623   assert(rounded_cls == round_to(rounded_cls, WordsPerLong), "must align");
1624 
1625   int raw_frame_size = size_activation_helper(rounded_cls, method->max_stack(),
1626                                               monitor_size);
1627 
1628   if (interpreter_frame != NULL) {









































1629     // The skeleton frame must already look like an interpreter frame
1630     // even if not fully filled out.
1631     assert(interpreter_frame->is_interpreted_frame(), "Must be interpreted frame");
1632 




1633     intptr_t* fp = interpreter_frame->fp();
1634 
1635     JavaThread* thread = JavaThread::current();
1636     RegisterMap map(thread, false);
1637     // More verification that skeleton frame is properly walkable
1638     assert(fp == caller->sp(), "fp must match");
1639 
1640     intptr_t* montop     = fp - rounded_vm_local_words;
1641 
1642     // preallocate monitors (cf. __ add_monitor_to_stack)
1643     intptr_t* monitors = montop - monitor_size;
1644 
1645     // preallocate stack space
1646     intptr_t*  esp = monitors - 1 -
1647                      (tempcount * Interpreter::stackElementWords) -
1648                      popframe_extra_args;
1649 
1650     int local_words = method->max_locals() * Interpreter::stackElementWords;
1651     NEEDS_CLEANUP;
1652     intptr_t* locals;


1739 #ifdef FAST_DISPATCH
1740     *interpreter_frame->register_addr(IdispatchTables) = (intptr_t) Interpreter::dispatch_table();
1741 #endif
1742 
1743 
1744 #ifdef ASSERT
1745     BasicObjectLock* mp = (BasicObjectLock*)monitors;
1746 
1747     assert(interpreter_frame->interpreter_frame_method() == method, "method matches");
1748     assert(interpreter_frame->interpreter_frame_local_at(9) == (intptr_t *)((intptr_t)locals - (9 * Interpreter::stackElementSize)), "locals match");
1749     assert(interpreter_frame->interpreter_frame_monitor_end()   == mp, "monitor_end matches");
1750     assert(((intptr_t *)interpreter_frame->interpreter_frame_monitor_begin()) == ((intptr_t *)mp)+monitor_size, "monitor_begin matches");
1751     assert(interpreter_frame->interpreter_frame_tos_address()-1 == esp, "esp matches");
1752 
1753     // check bounds
1754     intptr_t* lo = interpreter_frame->sp() + (frame::memory_parameter_word_sp_offset - 1);
1755     intptr_t* hi = interpreter_frame->fp() - rounded_vm_local_words;
1756     assert(lo < monitors && montop <= hi, "monitors in bounds");
1757     assert(lo <= esp && esp < monitors, "esp in bounds");
1758 #endif // ASSERT
1759   }
1760 
1761   return raw_frame_size;
1762 }
1763 
1764 //----------------------------------------------------------------------------------------------------
1765 // Exceptions
1766 void TemplateInterpreterGenerator::generate_throw_exception() {
1767 
1768   // Entry point in previous activation (i.e., if the caller was interpreted)
1769   Interpreter::_rethrow_exception_entry = __ pc();
1770   // O0: exception
1771 
1772   // entry point for exceptions thrown within interpreter code
1773   Interpreter::_throw_exception_entry = __ pc();
1774   __ verify_thread();
1775   // expression stack is undefined here
1776   // O0: exception, i.e. Oexception
1777   // Lbcp: exception bcx
1778   __ verify_oop(Oexception);
1779 
1780 
1781   // expression stack must be empty before entering the VM in case of an exception




   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "ci/ciMethod.hpp"
  28 #include "interpreter/bytecodeHistogram.hpp"
  29 #include "interpreter/interpreter.hpp"
  30 #include "interpreter/interpreterGenerator.hpp"
  31 #include "interpreter/interpreterRuntime.hpp"
  32 #include "interpreter/templateTable.hpp"
  33 #include "oops/arrayOop.hpp"
  34 #include "oops/methodData.hpp"
  35 #include "oops/method.hpp"
  36 #include "oops/oop.inline.hpp"
  37 #include "prims/jvmtiExport.hpp"
  38 #include "prims/jvmtiThreadState.hpp"
  39 #include "runtime/arguments.hpp"
  40 #include "runtime/deoptimization.hpp"
  41 #include "runtime/frame.inline.hpp"
  42 #include "runtime/sharedRuntime.hpp"
  43 #include "runtime/stubRoutines.hpp"
  44 #include "runtime/synchronizer.hpp"
  45 #include "runtime/timer.hpp"
  46 #include "runtime/vframeArray.hpp"
  47 #include "utilities/debug.hpp"


1510 // |               |
1511 // +---------------+ <--- fp + 0x40
1512 // |               |
1513 // : extra 7 slots :      note: these slots are not really needed for the interpreter (fix later)
1514 // |               |
1515 // +---------------+ <--- fp + 0x5c
1516 // |               |
1517 // :     free      :
1518 // |               |
1519 // +---------------+
1520 // |               |
1521 // : nonarg locals :
1522 // |               |
1523 // +---------------+
1524 // |               |
1525 // :   arguments   :
1526 // |               | <--- Llocals
1527 // +---------------+ <--- Gargs
1528 // |               |
1529 
1530 template<class M> static int size_activation_helper(int callee_extra_locals, int max_stack, int monitor_size) {
1531 
1532   // Figure out the size of an interpreter frame (in words) given that we have a fully allocated
1533   // expression stack, the callee will have callee_extra_locals (so we can account for
1534   // frame extension) and monitor_size for monitors. Basically we need to calculate
1535   // this exactly like generate_fixed_frame/generate_compute_interpreter_state.
1536   //
1537   //
1538   // The big complicating thing here is that we must ensure that the stack stays properly
1539   // aligned. This would be even uglier if monitor size wasn't modulo what the stack
1540   // needs to be aligned for). We are given that the sp (fp) is already aligned by
1541   // the caller so we must ensure that it is properly aligned for our callee.
1542   //
1543   const int rounded_vm_local_words =
1544        round_to(frame::interpreter_frame_vm_local_words,WordsPerLong);
1545   // callee_locals and max_stack are counts, not the size in frame.
1546   const int locals_size =
1547        round_to(callee_extra_locals * Interpreter::stackElementWords, WordsPerLong);
1548   const int max_stack_words = max_stack * Interpreter::stackElementWords;
1549   return (round_to((max_stack_words
1550                    + rounded_vm_local_words
1551                    + frame::memory_parameter_word_sp_offset), WordsPerLong)
1552                    // already rounded
1553                    + locals_size + monitor_size);
1554 }
1555 
1556 // How much stack a method top interpreter activation needs in words.
1557 int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
1558 
1559   // See call_stub code
1560   int call_stub_size  = round_to(7 + frame::memory_parameter_word_sp_offset,
1561                                  WordsPerLong);    // 7 + register save area
1562 
1563   // Save space for one monitor to get into the interpreted method in case
1564   // the method is synchronized
1565   int monitor_size    = method->is_synchronized() ?
1566                                 1*frame::interpreter_frame_monitor_size() : 0;
1567   return size_activation_helper<Method>(method->max_locals(), method->max_stack(),
1568                                         monitor_size) + call_stub_size;
1569 }
1570 
1571 template<class M> int AbstractInterpreter::size_activation(M* method,
1572                                                            int temps,
1573                                                            int popframe_args,
1574                                                            int monitors,
1575                                                            int callee_params,
1576                                                            int callee_locals,
1577                                                            bool is_top_frame) {




1578   // Note: This calculation must exactly parallel the frame setup
1579   // in InterpreterGenerator::generate_fixed_frame.










1580 
1581   int monitor_size           = monitors * frame::interpreter_frame_monitor_size();

1582 
1583   assert(monitor_size == round_to(monitor_size, WordsPerLong), "must align");
1584 
1585   //
1586   // Note: if you look closely this appears to be doing something much different
1587   // than generate_fixed_frame. What is happening is this. On sparc we have to do
1588   // this dance with interpreter_sp_adjustment because the window save area would
1589   // appear just below the bottom (tos) of the caller's java expression stack. Because
1590   // the interpreter want to have the locals completely contiguous generate_fixed_frame
1591   // will adjust the caller's sp for the "extra locals" (max_locals - parameter_size).
1592   // Now in generate_fixed_frame the extension of the caller's sp happens in the callee.
1593   // In this code the opposite occurs the caller adjusts it's own stack base on the callee.
1594   // This is mostly ok but it does cause a problem when we get to the initial frame (the oldest)
1595   // because the oldest frame would have adjust its callers frame and yet that frame
1596   // already exists and isn't part of this array of frames we are unpacking. So at first
1597   // glance this would seem to mess up that frame. However Deoptimization::fetch_unroll_info_helper()
1598   // will after it calculates all of the frame's on_stack_size()'s will then figure out the
1599   // amount to adjust the caller of the initial (oldest) frame and the calculation will all
1600   // add up. It does seem like it simpler to account for the adjustment here (and remove the
1601   // callee... parameters here). However this would mean that this routine would have to take
1602   // the caller frame as input so we could adjust its sp (and set it's interpreter_sp_adjustment)
1603   // and run the calling loop in the reverse order. This would also would appear to mean making
1604   // this code aware of what the interactions are when that initial caller fram was an osr or
1605   // other adapter frame. deoptimization is complicated enough and  hard enough to debug that
1606   // there is no sense in messing working code.
1607   //
1608 
1609   int rounded_cls = round_to((callee_locals - callee_params), WordsPerLong);
1610   assert(rounded_cls == round_to(rounded_cls, WordsPerLong), "must align");
1611 
1612   int raw_frame_size = size_activation_helper<M>(rounded_cls, method->max_stack(),
1613                                                  monitor_size);
1614 
1615   return raw_frame_size;
1616 }
1617 
1618 template int AbstractInterpreter::size_activation<Method>(Method* method,
1619                                                           int temps,
1620                                                           int popframe_args,
1621                                                           int monitors,
1622                                                           int callee_params,
1623                                                           int callee_locals,
1624                                                           bool is_top_frame);
1625 
1626 template int AbstractInterpreter::size_activation<ciMethod>(ciMethod* method,
1627                                                             int temps,
1628                                                             int popframe_args,
1629                                                             int monitors,
1630                                                             int callee_params,
1631                                                             int callee_locals,
1632                                                             bool is_top_frame);
1633 
1634 void AbstractInterpreter::layout_activation(Method* method,
1635                                             int tempcount,
1636                                             int popframe_extra_args,
1637                                             int moncount,
1638                                             int caller_actual_parameters,
1639                                             int callee_param_count,
1640                                             int callee_local_count,
1641                                             frame* caller,
1642                                             frame* interpreter_frame,
1643                                             bool is_top_frame,
1644                                             bool is_bottom_frame) {
1645   // Set up the following variables:
1646   //   - Lmethod
1647   //   - Llocals
1648   //   - Lmonitors (to the indicated number of monitors)
1649   //   - Lesp (to the indicated number of temps)
1650   // The frame caller on entry is a description of the caller of the
1651   // frame we are about to layout. We are guaranteed that we will be
1652   // able to fill in a new interpreter frame as its callee (i.e. the
1653   // stack space is allocated and the amount was determined by an
1654   // earlier call to the size_activation() method).  On return caller
1655   // while describe the interpreter frame we just layed out.
1656 
1657   // The skeleton frame must already look like an interpreter frame
1658   // even if not fully filled out.
1659   assert(interpreter_frame->is_interpreted_frame(), "Must be interpreted frame");
1660 
1661   int rounded_vm_local_words = round_to(frame::interpreter_frame_vm_local_words,WordsPerLong);
1662   int monitor_size           = moncount * frame::interpreter_frame_monitor_size();
1663   assert(monitor_size == round_to(monitor_size, WordsPerLong), "must align");
1664 
1665   intptr_t* fp = interpreter_frame->fp();
1666 
1667   JavaThread* thread = JavaThread::current();
1668   RegisterMap map(thread, false);
1669   // More verification that skeleton frame is properly walkable
1670   assert(fp == caller->sp(), "fp must match");
1671 
1672   intptr_t* montop     = fp - rounded_vm_local_words;
1673 
1674   // preallocate monitors (cf. __ add_monitor_to_stack)
1675   intptr_t* monitors = montop - monitor_size;
1676 
1677   // preallocate stack space
1678   intptr_t*  esp = monitors - 1 -
1679     (tempcount * Interpreter::stackElementWords) -
1680     popframe_extra_args;
1681 
1682   int local_words = method->max_locals() * Interpreter::stackElementWords;
1683   NEEDS_CLEANUP;
1684   intptr_t* locals;


1771 #ifdef FAST_DISPATCH
1772   *interpreter_frame->register_addr(IdispatchTables) = (intptr_t) Interpreter::dispatch_table();
1773 #endif
1774 
1775 
1776 #ifdef ASSERT
1777   BasicObjectLock* mp = (BasicObjectLock*)monitors;
1778 
1779   assert(interpreter_frame->interpreter_frame_method() == method, "method matches");
1780   assert(interpreter_frame->interpreter_frame_local_at(9) == (intptr_t *)((intptr_t)locals - (9 * Interpreter::stackElementSize)), "locals match");
1781   assert(interpreter_frame->interpreter_frame_monitor_end()   == mp, "monitor_end matches");
1782   assert(((intptr_t *)interpreter_frame->interpreter_frame_monitor_begin()) == ((intptr_t *)mp)+monitor_size, "monitor_begin matches");
1783   assert(interpreter_frame->interpreter_frame_tos_address()-1 == esp, "esp matches");
1784 
1785   // check bounds
1786   intptr_t* lo = interpreter_frame->sp() + (frame::memory_parameter_word_sp_offset - 1);
1787   intptr_t* hi = interpreter_frame->fp() - rounded_vm_local_words;
1788   assert(lo < monitors && montop <= hi, "monitors in bounds");
1789   assert(lo <= esp && esp < monitors, "esp in bounds");
1790 #endif // ASSERT



1791 }
1792 
1793 //----------------------------------------------------------------------------------------------------
1794 // Exceptions
1795 void TemplateInterpreterGenerator::generate_throw_exception() {
1796 
1797   // Entry point in previous activation (i.e., if the caller was interpreted)
1798   Interpreter::_rethrow_exception_entry = __ pc();
1799   // O0: exception
1800 
1801   // entry point for exceptions thrown within interpreter code
1802   Interpreter::_throw_exception_entry = __ pc();
1803   __ verify_thread();
1804   // expression stack is undefined here
1805   // O0: exception, i.e. Oexception
1806   // Lbcp: exception bcx
1807   __ verify_oop(Oexception);
1808 
1809 
1810   // expression stack must be empty before entering the VM in case of an exception


src/cpu/sparc/vm/templateInterpreter_sparc.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File