hotspot Wdiff src/share/vm/opto/output.cpp

Print this page

rev 4505 : 8014189: JVM crash with SEGV in ConnectionGraph::record_for_escape_analysis()
Summary: Add NULL checks and asserts for Type::make_ptr() returned value.
Reviewed-by: kvn

Split	Split	Close
Expand all
Collapse all

          --- old/src/share/vm/opto/output.cpp
          +++ new/src/share/vm/opto/output.cpp

   1    1  /*
   2    2   * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
   3    3   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4    4   *
   5    5   * This code is free software; you can redistribute it and/or modify it
   6    6   * under the terms of the GNU General Public License version 2 only, as
   7    7   * published by the Free Software Foundation.
   8    8   *
   9    9   * This code is distributed in the hope that it will be useful, but WITHOUT
  10   10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11   11   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12   12   * version 2 for more details (a copy is included in the LICENSE file that
  13   13   * accompanied this code).
  14   14   *
  15   15   * You should have received a copy of the GNU General Public License version
  16   16   * 2 along with this work; if not, write to the Free Software Foundation,
  17   17   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18   18   *
  19   19   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20   20   * or visit www.oracle.com if you need additional information or have any
  21   21   * questions.
  22   22   *
  23   23   */
  24   24  
  25   25  #include "precompiled.hpp"
  26   26  #include "asm/assembler.inline.hpp"
  27   27  #include "code/debugInfo.hpp"
  28   28  #include "code/debugInfoRec.hpp"
  29   29  #include "compiler/compileBroker.hpp"
  30   30  #include "compiler/oopMap.hpp"
  31   31  #include "memory/allocation.inline.hpp"
  32   32  #include "opto/callnode.hpp"
  33   33  #include "opto/cfgnode.hpp"
  34   34  #include "opto/locknode.hpp"
  35   35  #include "opto/machnode.hpp"
  36   36  #include "opto/output.hpp"
  37   37  #include "opto/regalloc.hpp"
  38   38  #include "opto/runtime.hpp"
  39   39  #include "opto/subnode.hpp"
  40   40  #include "opto/type.hpp"
  41   41  #include "runtime/handles.inline.hpp"
  42   42  #include "utilities/xmlstream.hpp"
  43   43  
  44   44  extern uint size_java_to_interp();
  45   45  extern uint reloc_java_to_interp();
  46   46  extern uint size_exception_handler();
  47   47  extern uint size_deopt_handler();
  48   48  
  49   49  #ifndef PRODUCT
  50   50  #define DEBUG_ARG(x) , x
  51   51  #else
  52   52  #define DEBUG_ARG(x)
  53   53  #endif
  54   54  
  55   55  extern int emit_exception_handler(CodeBuffer &cbuf);
  56   56  extern int emit_deopt_handler(CodeBuffer &cbuf);
  57   57  
  58   58  //------------------------------Output-----------------------------------------
  59   59  // Convert Nodes to instruction bits and pass off to the VM
  60   60  void Compile::Output() {
  61   61    // RootNode goes
  62   62    assert( _cfg->_broot->_nodes.size() == 0, "" );
  63   63  
  64   64    // The number of new nodes (mostly MachNop) is proportional to
  65   65    // the number of java calls and inner loops which are aligned.
  66   66    if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 +
  67   67                              C->inner_loops()*(OptoLoopAlignment-1)),
  68   68                             "out of nodes before code generation" ) ) {
  69   69      return;
  70   70    }
  71   71    // Make sure I can find the Start Node
  72   72    Block_Array& bbs = _cfg->_bbs;
  73   73    Block *entry = _cfg->_blocks[1];
  74   74    Block *broot = _cfg->_broot;
  75   75  
  76   76    const StartNode *start = entry->_nodes[0]->as_Start();
  77   77  
  78   78    // Replace StartNode with prolog
  79   79    MachPrologNode *prolog = new (this) MachPrologNode();
  80   80    entry->_nodes.map( 0, prolog );
  81   81    bbs.map( prolog->_idx, entry );
  82   82    bbs.map( start->_idx, NULL ); // start is no longer in any block
  83   83  
  84   84    // Virtual methods need an unverified entry point
  85   85  
  86   86    if( is_osr_compilation() ) {
  87   87      if( PoisonOSREntry ) {
  88   88        // TODO: Should use a ShouldNotReachHereNode...
  89   89        _cfg->insert( broot, 0, new (this) MachBreakpointNode() );
  90   90      }
  91   91    } else {
  92   92      if( _method && !_method->flags().is_static() ) {
  93   93        // Insert unvalidated entry point
  94   94        _cfg->insert( broot, 0, new (this) MachUEPNode() );
  95   95      }
  96   96  
  97   97    }
  98   98  
  99   99  
 100  100    // Break before main entry point
 101  101    if( (_method && _method->break_at_execute())
 102  102  #ifndef PRODUCT
 103  103      ||(OptoBreakpoint && is_method_compilation())
 104  104      ||(OptoBreakpointOSR && is_osr_compilation())
 105  105      ||(OptoBreakpointC2R && !_method)
 106  106  #endif
 107  107      ) {
 108  108      // checking for _method means that OptoBreakpoint does not apply to
 109  109      // runtime stubs or frame converters
 110  110      _cfg->insert( entry, 1, new (this) MachBreakpointNode() );
 111  111    }
 112  112  
 113  113    // Insert epilogs before every return
 114  114    for( uint i=0; i<_cfg->_num_blocks; i++ ) {
 115  115      Block *b = _cfg->_blocks[i];
 116  116      if( !b->is_connector() && b->non_connector_successor(0) == _cfg->_broot ) { // Found a program exit point?
 117  117        Node *m = b->end();
 118  118        if( m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt ) {
 119  119          MachEpilogNode *epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return);
 120  120          b->add_inst( epilog );
 121  121          bbs.map(epilog->_idx, b);
 122  122          //_regalloc->set_bad(epilog->_idx); // Already initialized this way.
 123  123        }
 124  124      }
 125  125    }
 126  126  
 127  127  # ifdef ENABLE_ZAP_DEAD_LOCALS
 128  128    if ( ZapDeadCompiledLocals )  Insert_zap_nodes();
 129  129  # endif
 130  130  
 131  131    uint* blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1);
 132  132    blk_starts[0]    = 0;
 133  133  
 134  134    // Initialize code buffer and process short branches.
 135  135    CodeBuffer* cb = init_buffer(blk_starts);
 136  136  
 137  137    if (cb == NULL || failing())  return;
 138  138  
 139  139    ScheduleAndBundle();
 140  140  
 141  141  #ifndef PRODUCT
 142  142    if (trace_opto_output()) {
 143  143      tty->print("\n---- After ScheduleAndBundle ----\n");
 144  144      for (uint i = 0; i < _cfg->_num_blocks; i++) {
 145  145        tty->print("\nBB#%03d:\n", i);
 146  146        Block *bb = _cfg->_blocks[i];
 147  147        for (uint j = 0; j < bb->_nodes.size(); j++) {
 148  148          Node *n = bb->_nodes[j];
 149  149          OptoReg::Name reg = _regalloc->get_reg_first(n);
 150  150          tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : "");
 151  151          n->dump();
 152  152        }
 153  153      }
 154  154    }
 155  155  #endif
 156  156  
 157  157    if (failing())  return;
 158  158  
 159  159    BuildOopMaps();
 160  160  
 161  161    if (failing())  return;
 162  162  
 163  163    fill_buffer(cb, blk_starts);
 164  164  }
 165  165  
 166  166  bool Compile::need_stack_bang(int frame_size_in_bytes) const {
 167  167    // Determine if we need to generate a stack overflow check.
 168  168    // Do it if the method is not a stub function and
 169  169    // has java calls or has frame size > vm_page_size/8.
 170  170    return (UseStackBanging && stub_function() == NULL &&
 171  171            (has_java_calls() || frame_size_in_bytes > os::vm_page_size()>>3));
 172  172  }
 173  173  
 174  174  bool Compile::need_register_stack_bang() const {
 175  175    // Determine if we need to generate a register stack overflow check.
 176  176    // This is only used on architectures which have split register
 177  177    // and memory stacks (ie. IA64).
 178  178    // Bang if the method is not a stub function and has java calls
 179  179    return (stub_function() == NULL && has_java_calls());
 180  180  }
 181  181  
 182  182  # ifdef ENABLE_ZAP_DEAD_LOCALS
 183  183  
 184  184  
 185  185  // In order to catch compiler oop-map bugs, we have implemented
 186  186  // a debugging mode called ZapDeadCompilerLocals.
 187  187  // This mode causes the compiler to insert a call to a runtime routine,
 188  188  // "zap_dead_locals", right before each place in compiled code
 189  189  // that could potentially be a gc-point (i.e., a safepoint or oop map point).
 190  190  // The runtime routine checks that locations mapped as oops are really
 191  191  // oops, that locations mapped as values do not look like oops,
 192  192  // and that locations mapped as dead are not used later
 193  193  // (by zapping them to an invalid address).
 194  194  
 195  195  int Compile::_CompiledZap_count = 0;
 196  196  
 197  197  void Compile::Insert_zap_nodes() {
 198  198    bool skip = false;
 199  199  
 200  200  
 201  201    // Dink with static counts because code code without the extra
 202  202    // runtime calls is MUCH faster for debugging purposes
 203  203  
 204  204         if ( CompileZapFirst  ==  0  ) ; // nothing special
 205  205    else if ( CompileZapFirst  >  CompiledZap_count() )  skip = true;
 206  206    else if ( CompileZapFirst  == CompiledZap_count() )
 207  207      warning("starting zap compilation after skipping");
 208  208  
 209  209         if ( CompileZapLast  ==  -1  ) ; // nothing special
 210  210    else if ( CompileZapLast  <   CompiledZap_count() )  skip = true;
 211  211    else if ( CompileZapLast  ==  CompiledZap_count() )
 212  212      warning("about to compile last zap");
 213  213  
 214  214    ++_CompiledZap_count; // counts skipped zaps, too
 215  215  
 216  216    if ( skip )  return;
 217  217  
 218  218  
 219  219    if ( _method == NULL )
 220  220      return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care
 221  221  
 222  222    // Insert call to zap runtime stub before every node with an oop map
 223  223    for( uint i=0; i<_cfg->_num_blocks; i++ ) {
 224  224      Block *b = _cfg->_blocks[i];
 225  225      for ( uint j = 0;  j < b->_nodes.size();  ++j ) {
 226  226        Node *n = b->_nodes[j];
 227  227  
 228  228        // Determining if we should insert a zap-a-lot node in output.
 229  229        // We do that for all nodes that has oopmap info, except for calls
 230  230        // to allocation.  Calls to allocation passes in the old top-of-eden pointer
 231  231        // and expect the C code to reset it.  Hence, there can be no safepoints between
 232  232        // the inlined-allocation and the call to new_Java, etc.
 233  233        // We also cannot zap monitor calls, as they must hold the microlock
 234  234        // during the call to Zap, which also wants to grab the microlock.
 235  235        bool insert = n->is_MachSafePoint() && (n->as_MachSafePoint()->oop_map() != NULL);
 236  236        if ( insert ) { // it is MachSafePoint
 237  237          if ( !n->is_MachCall() ) {
 238  238            insert = false;
 239  239          } else if ( n->is_MachCall() ) {
 240  240            MachCallNode* call = n->as_MachCall();
 241  241            if (call->entry_point() == OptoRuntime::new_instance_Java() ||
 242  242                call->entry_point() == OptoRuntime::new_array_Java() ||
 243  243                call->entry_point() == OptoRuntime::multianewarray2_Java() ||
 244  244                call->entry_point() == OptoRuntime::multianewarray3_Java() ||
 245  245                call->entry_point() == OptoRuntime::multianewarray4_Java() ||
 246  246                call->entry_point() == OptoRuntime::multianewarray5_Java() ||
 247  247                call->entry_point() == OptoRuntime::slow_arraycopy_Java() ||
 248  248                call->entry_point() == OptoRuntime::complete_monitor_locking_Java()
 249  249                ) {
 250  250              insert = false;
 251  251            }
 252  252          }
 253  253          if (insert) {
 254  254            Node *zap = call_zap_node(n->as_MachSafePoint(), i);
 255  255            b->_nodes.insert( j, zap );
 256  256            _cfg->_bbs.map( zap->_idx, b );
 257  257            ++j;
 258  258          }
 259  259        }
 260  260      }
 261  261    }
 262  262  }
 263  263  
 264  264  
 265  265  Node* Compile::call_zap_node(MachSafePointNode* node_to_check, int block_no) {
 266  266    const TypeFunc *tf = OptoRuntime::zap_dead_locals_Type();
 267  267    CallStaticJavaNode* ideal_node =
 268  268      new (this) CallStaticJavaNode( tf,
 269  269           OptoRuntime::zap_dead_locals_stub(_method->flags().is_native()),
 270  270                         "call zap dead locals stub", 0, TypePtr::BOTTOM);
 271  271    // We need to copy the OopMap from the site we're zapping at.
 272  272    // We have to make a copy, because the zap site might not be
 273  273    // a call site, and zap_dead is a call site.
 274  274    OopMap* clone = node_to_check->oop_map()->deep_copy();
 275  275  
 276  276    // Add the cloned OopMap to the zap node
 277  277    ideal_node->set_oop_map(clone);
 278  278    return _matcher->match_sfpt(ideal_node);
 279  279  }
 280  280  
 281  281  //------------------------------is_node_getting_a_safepoint--------------------
 282  282  bool Compile::is_node_getting_a_safepoint( Node* n) {
 283  283    // This code duplicates the logic prior to the call of add_safepoint
 284  284    // below in this file.
 285  285    if( n->is_MachSafePoint() ) return true;
 286  286    return false;
 287  287  }
 288  288  
 289  289  # endif // ENABLE_ZAP_DEAD_LOCALS
 290  290  
 291  291  //------------------------------compute_loop_first_inst_sizes------------------
 292  292  // Compute the size of first NumberOfLoopInstrToAlign instructions at the top
 293  293  // of a loop. When aligning a loop we need to provide enough instructions
 294  294  // in cpu's fetch buffer to feed decoders. The loop alignment could be
 295  295  // avoided if we have enough instructions in fetch buffer at the head of a loop.
 296  296  // By default, the size is set to 999999 by Block's constructor so that
 297  297  // a loop will be aligned if the size is not reset here.
 298  298  //
 299  299  // Note: Mach instructions could contain several HW instructions
 300  300  // so the size is estimated only.
 301  301  //
 302  302  void Compile::compute_loop_first_inst_sizes() {
 303  303    // The next condition is used to gate the loop alignment optimization.
 304  304    // Don't aligned a loop if there are enough instructions at the head of a loop
 305  305    // or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad
 306  306    // is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is
 307  307    // equal to 11 bytes which is the largest address NOP instruction.
 308  308    if( MaxLoopPad < OptoLoopAlignment-1 ) {
 309  309      uint last_block = _cfg->_num_blocks-1;
 310  310      for( uint i=1; i <= last_block; i++ ) {
 311  311        Block *b = _cfg->_blocks[i];
 312  312        // Check the first loop's block which requires an alignment.
 313  313        if( b->loop_alignment() > (uint)relocInfo::addr_unit() ) {
 314  314          uint sum_size = 0;
 315  315          uint inst_cnt = NumberOfLoopInstrToAlign;
 316  316          inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
 317  317  
 318  318          // Check subsequent fallthrough blocks if the loop's first
 319  319          // block(s) does not have enough instructions.
 320  320          Block *nb = b;
 321  321          while( inst_cnt > 0 &&
 322  322                 i < last_block &&
 323  323                 !_cfg->_blocks[i+1]->has_loop_alignment() &&
 324  324                 !nb->has_successor(b) ) {
 325  325            i++;
 326  326            nb = _cfg->_blocks[i];
 327  327            inst_cnt  = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
 328  328          } // while( inst_cnt > 0 && i < last_block  )
 329  329  
 330  330          b->set_first_inst_size(sum_size);
 331  331        } // f( b->head()->is_Loop() )
 332  332      } // for( i <= last_block )
 333  333    } // if( MaxLoopPad < OptoLoopAlignment-1 )
 334  334  }
 335  335  
 336  336  //----------------------shorten_branches---------------------------------------
 337  337  // The architecture description provides short branch variants for some long
 338  338  // branch instructions. Replace eligible long branches with short branches.
 339  339  void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) {
 340  340  
 341  341    // ------------------
 342  342    // Compute size of each block, method size, and relocation information size
 343  343    uint nblocks  = _cfg->_num_blocks;
 344  344  
 345  345    uint*      jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks);
 346  346    uint*      jmp_size   = NEW_RESOURCE_ARRAY(uint,nblocks);
 347  347    int*       jmp_nidx   = NEW_RESOURCE_ARRAY(int ,nblocks);
 348  348    DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks); )
 349  349    DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks); )
 350  350  
 351  351    bool has_short_branch_candidate = false;
 352  352  
 353  353    // Initialize the sizes to 0
 354  354    code_size  = 0;          // Size in bytes of generated code
 355  355    stub_size  = 0;          // Size in bytes of all stub entries
 356  356    // Size in bytes of all relocation entries, including those in local stubs.
 357  357    // Start with 2-bytes of reloc info for the unvalidated entry point
 358  358    reloc_size = 1;          // Number of relocation entries
 359  359  
 360  360    // Make three passes.  The first computes pessimistic blk_starts,
 361  361    // relative jmp_offset and reloc_size information.  The second performs
 362  362    // short branch substitution using the pessimistic sizing.  The
 363  363    // third inserts nops where needed.
 364  364  
 365  365    // Step one, perform a pessimistic sizing pass.
 366  366    uint last_call_adr = max_uint;
 367  367    uint last_avoid_back_to_back_adr = max_uint;
 368  368    uint nop_size = (new (this) MachNopNode())->size(_regalloc);
 369  369    for (uint i = 0; i < nblocks; i++) { // For all blocks
 370  370      Block *b = _cfg->_blocks[i];
 371  371  
 372  372      // During short branch replacement, we store the relative (to blk_starts)
 373  373      // offset of jump in jmp_offset, rather than the absolute offset of jump.
 374  374      // This is so that we do not need to recompute sizes of all nodes when
 375  375      // we compute correct blk_starts in our next sizing pass.
 376  376      jmp_offset[i] = 0;
 377  377      jmp_size[i]   = 0;
 378  378      jmp_nidx[i]   = -1;
 379  379      DEBUG_ONLY( jmp_target[i] = 0; )
 380  380      DEBUG_ONLY( jmp_rule[i]   = 0; )
 381  381  
 382  382      // Sum all instruction sizes to compute block size
 383  383      uint last_inst = b->_nodes.size();
 384  384      uint blk_size = 0;
 385  385      for (uint j = 0; j < last_inst; j++) {
 386  386        Node* nj = b->_nodes[j];
 387  387        // Handle machine instruction nodes
 388  388        if (nj->is_Mach()) {
 389  389          MachNode *mach = nj->as_Mach();
 390  390          blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding
 391  391          reloc_size += mach->reloc();
 392  392          if( mach->is_MachCall() ) {
 393  393            MachCallNode *mcall = mach->as_MachCall();
 394  394            // This destination address is NOT PC-relative
 395  395  
 396  396            mcall->method_set((intptr_t)mcall->entry_point());
 397  397  
 398  398            if( mcall->is_MachCallJava() && mcall->as_MachCallJava()->_method ) {
 399  399              stub_size  += size_java_to_interp();
 400  400              reloc_size += reloc_java_to_interp();
 401  401            }
 402  402          } else if (mach->is_MachSafePoint()) {
 403  403            // If call/safepoint are adjacent, account for possible
 404  404            // nop to disambiguate the two safepoints.
 405  405            // ScheduleAndBundle() can rearrange nodes in a block,
 406  406            // check for all offsets inside this block.
 407  407            if (last_call_adr >= blk_starts[i]) {
 408  408              blk_size += nop_size;
 409  409            }
 410  410          }
 411  411          if (mach->avoid_back_to_back()) {
 412  412            // Nop is inserted between "avoid back to back" instructions.
 413  413            // ScheduleAndBundle() can rearrange nodes in a block,
 414  414            // check for all offsets inside this block.
 415  415            if (last_avoid_back_to_back_adr >= blk_starts[i]) {
 416  416              blk_size += nop_size;
 417  417            }
 418  418          }
 419  419          if (mach->may_be_short_branch()) {
 420  420            if (!nj->is_MachBranch()) {
 421  421  #ifndef PRODUCT
 422  422              nj->dump(3);
 423  423  #endif
 424  424              Unimplemented();
 425  425            }
 426  426            assert(jmp_nidx[i] == -1, "block should have only one branch");
 427  427            jmp_offset[i] = blk_size;
 428  428            jmp_size[i]   = nj->size(_regalloc);
 429  429            jmp_nidx[i]   = j;
 430  430            has_short_branch_candidate = true;
 431  431          }
 432  432        }
 433  433        blk_size += nj->size(_regalloc);
 434  434        // Remember end of call offset
 435  435        if (nj->is_MachCall() && !nj->is_MachCallLeaf()) {
 436  436          last_call_adr = blk_starts[i]+blk_size;
 437  437        }
 438  438        // Remember end of avoid_back_to_back offset
 439  439        if (nj->is_Mach() && nj->as_Mach()->avoid_back_to_back()) {
 440  440          last_avoid_back_to_back_adr = blk_starts[i]+blk_size;
 441  441        }
 442  442      }
 443  443  
 444  444      // When the next block starts a loop, we may insert pad NOP
 445  445      // instructions.  Since we cannot know our future alignment,
 446  446      // assume the worst.
 447  447      if (i< nblocks-1) {
 448  448        Block *nb = _cfg->_blocks[i+1];
 449  449        int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit();
 450  450        if (max_loop_pad > 0) {
 451  451          assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), "");
 452  452          // Adjust last_call_adr and/or last_avoid_back_to_back_adr.
 453  453          // If either is the last instruction in this block, bump by
 454  454          // max_loop_pad in lock-step with blk_size, so sizing
 455  455          // calculations in subsequent blocks still can conservatively
 456  456          // detect that it may the last instruction in this block.
 457  457          if (last_call_adr == blk_starts[i]+blk_size) {
 458  458            last_call_adr += max_loop_pad;
 459  459          }
 460  460          if (last_avoid_back_to_back_adr == blk_starts[i]+blk_size) {
 461  461            last_avoid_back_to_back_adr += max_loop_pad;
 462  462          }
 463  463          blk_size += max_loop_pad;
 464  464        }
 465  465      }
 466  466  
 467  467      // Save block size; update total method size
 468  468      blk_starts[i+1] = blk_starts[i]+blk_size;
 469  469    }
 470  470  
 471  471    // Step two, replace eligible long jumps.
 472  472    bool progress = true;
 473  473    uint last_may_be_short_branch_adr = max_uint;
 474  474    while (has_short_branch_candidate && progress) {
 475  475      progress = false;
 476  476      has_short_branch_candidate = false;
 477  477      int adjust_block_start = 0;
 478  478      for (uint i = 0; i < nblocks; i++) {
 479  479        Block *b = _cfg->_blocks[i];
 480  480        int idx = jmp_nidx[i];
 481  481        MachNode* mach = (idx == -1) ? NULL: b->_nodes[idx]->as_Mach();
 482  482        if (mach != NULL && mach->may_be_short_branch()) {
 483  483  #ifdef ASSERT
 484  484          assert(jmp_size[i] > 0 && mach->is_MachBranch(), "sanity");
 485  485          int j;
 486  486          // Find the branch; ignore trailing NOPs.
 487  487          for (j = b->_nodes.size()-1; j>=0; j--) {
 488  488            Node* n = b->_nodes[j];
 489  489            if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con)
 490  490              break;
 491  491          }
 492  492          assert(j >= 0 && j == idx && b->_nodes[j] == (Node*)mach, "sanity");
 493  493  #endif
 494  494          int br_size = jmp_size[i];
 495  495          int br_offs = blk_starts[i] + jmp_offset[i];
 496  496  
 497  497          // This requires the TRUE branch target be in succs[0]
 498  498          uint bnum = b->non_connector_successor(0)->_pre_order;
 499  499          int offset = blk_starts[bnum] - br_offs;
 500  500          if (bnum > i) { // adjust following block's offset
 501  501            offset -= adjust_block_start;
 502  502          }
 503  503          // In the following code a nop could be inserted before
 504  504          // the branch which will increase the backward distance.
 505  505          bool needs_padding = ((uint)br_offs == last_may_be_short_branch_adr);
 506  506          if (needs_padding && offset <= 0)
 507  507            offset -= nop_size;
 508  508  
 509  509          if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) {
 510  510            // We've got a winner.  Replace this branch.
 511  511            MachNode* replacement = mach->as_MachBranch()->short_branch_version(this);
 512  512  
 513  513            // Update the jmp_size.
 514  514            int new_size = replacement->size(_regalloc);
 515  515            int diff     = br_size - new_size;
 516  516            assert(diff >= (int)nop_size, "short_branch size should be smaller");
 517  517            // Conservatively take into accound padding between
 518  518            // avoid_back_to_back branches. Previous branch could be
 519  519            // converted into avoid_back_to_back branch during next
 520  520            // rounds.
 521  521            if (needs_padding && replacement->avoid_back_to_back()) {
 522  522              jmp_offset[i] += nop_size;
 523  523              diff -= nop_size;
 524  524            }
 525  525            adjust_block_start += diff;
 526  526            b->_nodes.map(idx, replacement);
 527  527            mach->subsume_by(replacement, C);
 528  528            mach = replacement;
 529  529            progress = true;
 530  530  
 531  531            jmp_size[i] = new_size;
 532  532            DEBUG_ONLY( jmp_target[i] = bnum; );
 533  533            DEBUG_ONLY( jmp_rule[i] = mach->rule(); );
 534  534          } else {
 535  535            // The jump distance is not short, try again during next iteration.
 536  536            has_short_branch_candidate = true;
 537  537          }
 538  538        } // (mach->may_be_short_branch())
 539  539        if (mach != NULL && (mach->may_be_short_branch() ||
 540  540                             mach->avoid_back_to_back())) {
 541  541          last_may_be_short_branch_adr = blk_starts[i] + jmp_offset[i] + jmp_size[i];
 542  542        }
 543  543        blk_starts[i+1] -= adjust_block_start;
 544  544      }
 545  545    }
 546  546  
 547  547  #ifdef ASSERT
 548  548    for (uint i = 0; i < nblocks; i++) { // For all blocks
 549  549      if (jmp_target[i] != 0) {
 550  550        int br_size = jmp_size[i];
 551  551        int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]);
 552  552        if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) {
 553  553          tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]);
 554  554        }
 555  555        assert(_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset), "Displacement too large for short jmp");
 556  556      }
 557  557    }
 558  558  #endif
 559  559  
 560  560    // Step 3, compute the offsets of all blocks, will be done in fill_buffer()
 561  561    // after ScheduleAndBundle().
 562  562  
 563  563    // ------------------
 564  564    // Compute size for code buffer
 565  565    code_size = blk_starts[nblocks];
 566  566  
 567  567    // Relocation records
 568  568    reloc_size += 1;              // Relo entry for exception handler
 569  569  
 570  570    // Adjust reloc_size to number of record of relocation info
 571  571    // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for
 572  572    // a relocation index.
 573  573    // The CodeBuffer will expand the locs array if this estimate is too low.
 574  574    reloc_size *= 10 / sizeof(relocInfo);
 575  575  }
 576  576  
 577  577  //------------------------------FillLocArray-----------------------------------
 578  578  // Create a bit of debug info and append it to the array.  The mapping is from
 579  579  // Java local or expression stack to constant, register or stack-slot.  For
 580  580  // doubles, insert 2 mappings and return 1 (to tell the caller that the next
 581  581  // entry has been taken care of and caller should skip it).
 582  582  static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Location::Type l_type ) {
 583  583    // This should never have accepted Bad before
 584  584    assert(OptoReg::is_valid(regnum), "location must be valid");
 585  585    return (OptoReg::is_reg(regnum))
 586  586      ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
 587  587      : new LocationValue(Location::new_stk_loc(l_type,  ra->reg2offset(regnum)));
 588  588  }
 589  589  
 590  590  
 591  591  ObjectValue*
 592  592  Compile::sv_for_node_id(GrowableArray<ScopeValue*> *objs, int id) {
 593  593    for (int i = 0; i < objs->length(); i++) {
 594  594      assert(objs->at(i)->is_object(), "corrupt object cache");
 595  595      ObjectValue* sv = (ObjectValue*) objs->at(i);
 596  596      if (sv->id() == id) {
 597  597        return sv;
 598  598      }
 599  599    }
 600  600    // Otherwise..
 601  601    return NULL;
 602  602  }
 603  603  
 604  604  void Compile::set_sv_for_object_node(GrowableArray<ScopeValue*> *objs,
 605  605                                       ObjectValue* sv ) {
 606  606    assert(sv_for_node_id(objs, sv->id()) == NULL, "Precondition");
 607  607    objs->append(sv);
 608  608  }
 609  609  
 610  610  
 611  611  void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
 612  612                              GrowableArray<ScopeValue*> *array,
 613  613                              GrowableArray<ScopeValue*> *objs ) {
 614  614    assert( local, "use _top instead of null" );
 615  615    if (array->length() != idx) {
 616  616      assert(array->length() == idx + 1, "Unexpected array count");
 617  617      // Old functionality:
 618  618      //   return
 619  619      // New functionality:
 620  620      //   Assert if the local is not top. In product mode let the new node
 621  621      //   override the old entry.
 622  622      assert(local == top(), "LocArray collision");
 623  623      if (local == top()) {
 624  624        return;
 625  625      }
 626  626      array->pop();
 627  627    }
 628  628    const Type *t = local->bottom_type();
 629  629  
 630  630    // Is it a safepoint scalar object node?
 631  631    if (local->is_SafePointScalarObject()) {
 632  632      SafePointScalarObjectNode* spobj = local->as_SafePointScalarObject();
 633  633  
 634  634      ObjectValue* sv = Compile::sv_for_node_id(objs, spobj->_idx);
 635  635      if (sv == NULL) {
 636  636        ciKlass* cik = t->is_oopptr()->klass();
 637  637        assert(cik->is_instance_klass() ||
 638  638               cik->is_array_klass(), "Not supported allocation.");
 639  639        sv = new ObjectValue(spobj->_idx,
 640  640                             new ConstantOopWriteValue(cik->constant_encoding()));
 641  641        Compile::set_sv_for_object_node(objs, sv);
 642  642  
 643  643        uint first_ind = spobj->first_index();
 644  644        for (uint i = 0; i < spobj->n_fields(); i++) {
 645  645          Node* fld_node = sfpt->in(first_ind+i);
 646  646          (void)FillLocArray(sv->field_values()->length(), sfpt, fld_node, sv->field_values(), objs);
 647  647        }
 648  648      }
 649  649      array->append(sv);
 650  650      return;
 651  651    }
 652  652  
 653  653    // Grab the register number for the local
 654  654    OptoReg::Name regnum = _regalloc->get_reg_first(local);
 655  655    if( OptoReg::is_valid(regnum) ) {// Got a register/stack?
 656  656      // Record the double as two float registers.
 657  657      // The register mask for such a value always specifies two adjacent
 658  658      // float registers, with the lower register number even.
 659  659      // Normally, the allocation of high and low words to these registers
 660  660      // is irrelevant, because nearly all operations on register pairs
 661  661      // (e.g., StoreD) treat them as a single unit.
 662  662      // Here, we assume in addition that the words in these two registers
 663  663      // stored "naturally" (by operations like StoreD and double stores
 664  664      // within the interpreter) such that the lower-numbered register
 665  665      // is written to the lower memory address.  This may seem like
 666  666      // a machine dependency, but it is not--it is a requirement on
 667  667      // the author of the <arch>.ad file to ensure that, for every
 668  668      // even/odd double-register pair to which a double may be allocated,
 669  669      // the word in the even single-register is stored to the first
 670  670      // memory word.  (Note that register numbers are completely
 671  671      // arbitrary, and are not tied to any machine-level encodings.)
 672  672  #ifdef _LP64
 673  673      if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon ) {
 674  674        array->append(new ConstantIntValue(0));
 675  675        array->append(new_loc_value( _regalloc, regnum, Location::dbl ));
 676  676      } else if ( t->base() == Type::Long ) {
 677  677        array->append(new ConstantIntValue(0));
 678  678        array->append(new_loc_value( _regalloc, regnum, Location::lng ));
 679  679      } else if ( t->base() == Type::RawPtr ) {
 680  680        // jsr/ret return address which must be restored into a the full
 681  681        // width 64-bit stack slot.
 682  682        array->append(new_loc_value( _regalloc, regnum, Location::lng ));
 683  683      }
 684  684  #else //_LP64
 685  685  #ifdef SPARC
 686  686      if (t->base() == Type::Long && OptoReg::is_reg(regnum)) {
 687  687        // For SPARC we have to swap high and low words for
 688  688        // long values stored in a single-register (g0-g7).
 689  689        array->append(new_loc_value( _regalloc,              regnum   , Location::normal ));
 690  690        array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal ));
 691  691      } else
 692  692  #endif //SPARC
 693  693      if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon || t->base() == Type::Long ) {
 694  694        // Repack the double/long as two jints.
 695  695        // The convention the interpreter uses is that the second local
 696  696        // holds the first raw word of the native double representation.
 697  697        // This is actually reasonable, since locals and stack arrays
 698  698        // grow downwards in all implementations.
 699  699        // (If, on some machine, the interpreter's Java locals or stack
 700  700        // were to grow upwards, the embedded doubles would be word-swapped.)
 701  701        array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal ));
 702  702        array->append(new_loc_value( _regalloc,              regnum   , Location::normal ));
 703  703      }
 704  704  #endif //_LP64
 705  705      else if( (t->base() == Type::FloatBot || t->base() == Type::FloatCon) &&
 706  706                 OptoReg::is_reg(regnum) ) {
 707  707        array->append(new_loc_value( _regalloc, regnum, Matcher::float_in_double()
 708  708                                     ? Location::float_in_dbl : Location::normal ));
 709  709      } else if( t->base() == Type::Int && OptoReg::is_reg(regnum) ) {
 710  710        array->append(new_loc_value( _regalloc, regnum, Matcher::int_in_long
 711  711                                     ? Location::int_in_long : Location::normal ));
 712  712      } else if( t->base() == Type::NarrowOop ) {
 713  713        array->append(new_loc_value( _regalloc, regnum, Location::narrowoop ));
 714  714      } else {
 715  715        array->append(new_loc_value( _regalloc, regnum, _regalloc->is_oop(local) ? Location::oop : Location::normal ));
 716  716      }
 717  717      return;
 718  718    }
 719  719  
 720  720    // No register.  It must be constant data.
 721  721    switch (t->base()) {
 722  722    case Type::Half:              // Second half of a double
 723  723      ShouldNotReachHere();       // Caller should skip 2nd halves
 724  724      break;
 725  725    case Type::AnyPtr:
 726  726      array->append(new ConstantOopWriteValue(NULL));
 727  727      break;
 728  728    case Type::AryPtr:
 729  729    case Type::InstPtr:
 730  730    case Type::KlassPtr:          // fall through
 731  731      array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->constant_encoding()));
 732  732      break;
 733  733    case Type::NarrowOop:
 734  734      if (t == TypeNarrowOop::NULL_PTR) {
 735  735        array->append(new ConstantOopWriteValue(NULL));
 736  736      } else {
 737  737        array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding()));
 738  738      }
 739  739      break;
 740  740    case Type::Int:
 741  741      array->append(new ConstantIntValue(t->is_int()->get_con()));
 742  742      break;
 743  743    case Type::RawPtr:
 744  744      // A return address (T_ADDRESS).
 745  745      assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI");
 746  746  #ifdef _LP64
 747  747      // Must be restored to the full-width 64-bit stack slot.
 748  748      array->append(new ConstantLongValue(t->is_ptr()->get_con()));
 749  749  #else
 750  750      array->append(new ConstantIntValue(t->is_ptr()->get_con()));
 751  751  #endif
 752  752      break;
 753  753    case Type::FloatCon: {
 754  754      float f = t->is_float_constant()->getf();
 755  755      array->append(new ConstantIntValue(jint_cast(f)));
 756  756      break;
 757  757    }
 758  758    case Type::DoubleCon: {
 759  759      jdouble d = t->is_double_constant()->getd();
 760  760  #ifdef _LP64
 761  761      array->append(new ConstantIntValue(0));
 762  762      array->append(new ConstantDoubleValue(d));
 763  763  #else
 764  764      // Repack the double as two jints.
 765  765      // The convention the interpreter uses is that the second local
 766  766      // holds the first raw word of the native double representation.
 767  767      // This is actually reasonable, since locals and stack arrays
 768  768      // grow downwards in all implementations.
 769  769      // (If, on some machine, the interpreter's Java locals or stack
 770  770      // were to grow upwards, the embedded doubles would be word-swapped.)
 771  771      jint   *dp = (jint*)&d;
 772  772      array->append(new ConstantIntValue(dp[1]));
 773  773      array->append(new ConstantIntValue(dp[0]));
 774  774  #endif
 775  775      break;
 776  776    }
 777  777    case Type::Long: {
 778  778      jlong d = t->is_long()->get_con();
 779  779  #ifdef _LP64
 780  780      array->append(new ConstantIntValue(0));
 781  781      array->append(new ConstantLongValue(d));
 782  782  #else
 783  783      // Repack the long as two jints.
 784  784      // The convention the interpreter uses is that the second local
 785  785      // holds the first raw word of the native double representation.
 786  786      // This is actually reasonable, since locals and stack arrays
 787  787      // grow downwards in all implementations.
 788  788      // (If, on some machine, the interpreter's Java locals or stack
 789  789      // were to grow upwards, the embedded doubles would be word-swapped.)
 790  790      jint *dp = (jint*)&d;
 791  791      array->append(new ConstantIntValue(dp[1]));
 792  792      array->append(new ConstantIntValue(dp[0]));
 793  793  #endif
 794  794      break;
 795  795    }
 796  796    case Type::Top:               // Add an illegal value here
 797  797      array->append(new LocationValue(Location()));
 798  798      break;
 799  799    default:
 800  800      ShouldNotReachHere();
 801  801      break;
 802  802    }
 803  803  }
 804  804  
 805  805  // Determine if this node starts a bundle
 806  806  bool Compile::starts_bundle(const Node *n) const {
 807  807    return (_node_bundling_limit > n->_idx &&
 808  808            _node_bundling_base[n->_idx].starts_bundle());
 809  809  }
 810  810  
 811  811  //--------------------------Process_OopMap_Node--------------------------------
 812  812  void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) {
 813  813  
 814  814    // Handle special safepoint nodes for synchronization
 815  815    MachSafePointNode *sfn   = mach->as_MachSafePoint();
 816  816    MachCallNode      *mcall;
 817  817  
 818  818  #ifdef ENABLE_ZAP_DEAD_LOCALS
 819  819    assert( is_node_getting_a_safepoint(mach),  "logic does not match; false negative");
 820  820  #endif
 821  821  
 822  822    int safepoint_pc_offset = current_offset;
 823  823    bool is_method_handle_invoke = false;
 824  824    bool return_oop = false;
 825  825  
 826  826    // Add the safepoint in the DebugInfoRecorder
 827  827    if( !mach->is_MachCall() ) {
 828  828      mcall = NULL;
 829  829      debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map);
 830  830    } else {
 831  831      mcall = mach->as_MachCall();
 832  832  
 833  833      // Is the call a MethodHandle call?
 834  834      if (mcall->is_MachCallJava()) {
 835  835        if (mcall->as_MachCallJava()->_method_handle_invoke) {
 836  836          assert(has_method_handle_invokes(), "must have been set during call generation");
 837  837          is_method_handle_invoke = true;
 838  838        }
 839  839      }
 840  840  
 841  841      // Check if a call returns an object.
 842  842      if (mcall->return_value_is_used() &&
 843  843          mcall->tf()->range()->field_at(TypeFunc::Parms)->isa_ptr()) {
 844  844        return_oop = true;
 845  845      }
 846  846      safepoint_pc_offset += mcall->ret_addr_offset();
 847  847      debug_info()->add_safepoint(safepoint_pc_offset, mcall->_oop_map);
 848  848    }
 849  849  
 850  850    // Loop over the JVMState list to add scope information
 851  851    // Do not skip safepoints with a NULL method, they need monitor info
 852  852    JVMState* youngest_jvms = sfn->jvms();
 853  853    int max_depth = youngest_jvms->depth();
 854  854  
 855  855    // Allocate the object pool for scalar-replaced objects -- the map from
 856  856    // small-integer keys (which can be recorded in the local and ostack
 857  857    // arrays) to descriptions of the object state.
 858  858    GrowableArray<ScopeValue*> *objs = new GrowableArray<ScopeValue*>();
 859  859  
 860  860    // Visit scopes from oldest to youngest.
 861  861    for (int depth = 1; depth <= max_depth; depth++) {
 862  862      JVMState* jvms = youngest_jvms->of_depth(depth);
 863  863      int idx;
 864  864      ciMethod* method = jvms->has_method() ? jvms->method() : NULL;
 865  865      // Safepoints that do not have method() set only provide oop-map and monitor info
 866  866      // to support GC; these do not support deoptimization.
 867  867      int num_locs = (method == NULL) ? 0 : jvms->loc_size();
 868  868      int num_exps = (method == NULL) ? 0 : jvms->stk_size();
 869  869      int num_mon  = jvms->nof_monitors();
 870  870      assert(method == NULL || jvms->bci() < 0 || num_locs == method->max_locals(),
 871  871             "JVMS local count must match that of the method");
 872  872  
 873  873      // Add Local and Expression Stack Information
 874  874  
 875  875      // Insert locals into the locarray
 876  876      GrowableArray<ScopeValue*> *locarray = new GrowableArray<ScopeValue*>(num_locs);
 877  877      for( idx = 0; idx < num_locs; idx++ ) {
 878  878        FillLocArray( idx, sfn, sfn->local(jvms, idx), locarray, objs );
 879  879      }
 880  880  
 881  881      // Insert expression stack entries into the exparray
 882  882      GrowableArray<ScopeValue*> *exparray = new GrowableArray<ScopeValue*>(num_exps);
 883  883      for( idx = 0; idx < num_exps; idx++ ) {
 884  884        FillLocArray( idx,  sfn, sfn->stack(jvms, idx), exparray, objs );
 885  885      }
 886  886  
 887  887      // Add in mappings of the monitors
 888  888      assert( !method ||
 889  889              !method->is_synchronized() ||
 890  890              method->is_native() ||
 891  891              num_mon > 0 ||
 892  892              !GenerateSynchronizationCode,
 893  893              "monitors must always exist for synchronized methods");
 894  894  
 895  895      // Build the growable array of ScopeValues for exp stack
 896  896      GrowableArray<MonitorValue*> *monarray = new GrowableArray<MonitorValue*>(num_mon);
 897  897  
 898  898      // Loop over monitors and insert into array
 899  899      for(idx = 0; idx < num_mon; idx++) {
 900  900        // Grab the node that defines this monitor
 901  901        Node* box_node = sfn->monitor_box(jvms, idx);
 902  902        Node* obj_node = sfn->monitor_obj(jvms, idx);
 903  903  
 904  904        // Create ScopeValue for object
 905  905        ScopeValue *scval = NULL;
 906  906  
 907  907        if( obj_node->is_SafePointScalarObject() ) {
 908  908          SafePointScalarObjectNode* spobj = obj_node->as_SafePointScalarObject();
 909  909          scval = Compile::sv_for_node_id(objs, spobj->_idx);
 910  910          if (scval == NULL) {
 911  911            const Type *t = obj_node->bottom_type();
 912  912            ciKlass* cik = t->is_oopptr()->klass();
 913  913            assert(cik->is_instance_klass() ||
 914  914                   cik->is_array_klass(), "Not supported allocation.");
 915  915            ObjectValue* sv = new ObjectValue(spobj->_idx,
 916  916                                  new ConstantOopWriteValue(cik->constant_encoding()));
 917  917            Compile::set_sv_for_object_node(objs, sv);
 918  918  
 919  919            uint first_ind = spobj->first_index();
 920  920            for (uint i = 0; i < spobj->n_fields(); i++) {
 921  921              Node* fld_node = sfn->in(first_ind+i);
 922  922              (void)FillLocArray(sv->field_values()->length(), sfn, fld_node, sv->field_values(), objs);
 923  923            }

↓ open down ↓

923 lines elided

↑ open up ↑

 924  924            scval = sv;
 925  925          }
 926  926        } else if( !obj_node->is_Con() ) {
 927  927          OptoReg::Name obj_reg = _regalloc->get_reg_first(obj_node);
 928  928          if( obj_node->bottom_type()->base() == Type::NarrowOop ) {
 929  929            scval = new_loc_value( _regalloc, obj_reg, Location::narrowoop );
 930  930          } else {
 931  931            scval = new_loc_value( _regalloc, obj_reg, Location::oop );
 932  932          }
 933  933        } else {
 934      -        const TypePtr *tp = obj_node->bottom_type()->make_ptr();
      934 +        const TypePtr *tp = obj_node->get_ptr_type();
 935  935          scval = new ConstantOopWriteValue(tp->is_oopptr()->const_oop()->constant_encoding());
 936  936        }
 937  937  
 938  938        OptoReg::Name box_reg = BoxLockNode::reg(box_node);
 939  939        Location basic_lock = Location::new_stk_loc(Location::normal,_regalloc->reg2offset(box_reg));
 940  940        bool eliminated = (box_node->is_BoxLock() && box_node->as_BoxLock()->is_eliminated());
 941  941        monarray->append(new MonitorValue(scval, basic_lock, eliminated));
 942  942      }
 943  943  
 944  944      // We dump the object pool first, since deoptimization reads it in first.

 945  945      debug_info()->dump_object_pool(objs);
 946  946  
 947  947      // Build first class objects to pass to scope
 948  948      DebugToken *locvals = debug_info()->create_scope_values(locarray);
 949  949      DebugToken *expvals = debug_info()->create_scope_values(exparray);
 950  950      DebugToken *monvals = debug_info()->create_monitor_values(monarray);
 951  951  
 952  952      // Make method available for all Safepoints
 953  953      ciMethod* scope_method = method ? method : _method;
 954  954      // Describe the scope here
 955  955      assert(jvms->bci() >= InvocationEntryBci && jvms->bci() <= 0x10000, "must be a valid or entry BCI");
 956  956      assert(!jvms->should_reexecute() || depth == max_depth, "reexecute allowed only for the youngest");
 957  957      // Now we can describe the scope.
 958  958      debug_info()->describe_scope(safepoint_pc_offset, scope_method, jvms->bci(), jvms->should_reexecute(), is_method_handle_invoke, return_oop, locvals, expvals, monvals);
 959  959    } // End jvms loop
 960  960  
 961  961    // Mark the end of the scope set.
 962  962    debug_info()->end_safepoint(safepoint_pc_offset);
 963  963  }
 964  964  
 965  965  
 966  966  
 967  967  // A simplified version of Process_OopMap_Node, to handle non-safepoints.
 968  968  class NonSafepointEmitter {
 969  969    Compile*  C;
 970  970    JVMState* _pending_jvms;
 971  971    int       _pending_offset;
 972  972  
 973  973    void emit_non_safepoint();
 974  974  
 975  975   public:
 976  976    NonSafepointEmitter(Compile* compile) {
 977  977      this->C = compile;
 978  978      _pending_jvms = NULL;
 979  979      _pending_offset = 0;
 980  980    }
 981  981  
 982  982    void observe_instruction(Node* n, int pc_offset) {
 983  983      if (!C->debug_info()->recording_non_safepoints())  return;
 984  984  
 985  985      Node_Notes* nn = C->node_notes_at(n->_idx);
 986  986      if (nn == NULL || nn->jvms() == NULL)  return;
 987  987      if (_pending_jvms != NULL &&
 988  988          _pending_jvms->same_calls_as(nn->jvms())) {
 989  989        // Repeated JVMS?  Stretch it up here.
 990  990        _pending_offset = pc_offset;
 991  991      } else {
 992  992        if (_pending_jvms != NULL &&
 993  993            _pending_offset < pc_offset) {
 994  994          emit_non_safepoint();
 995  995        }
 996  996        _pending_jvms = NULL;
 997  997        if (pc_offset > C->debug_info()->last_pc_offset()) {
 998  998          // This is the only way _pending_jvms can become non-NULL:
 999  999          _pending_jvms = nn->jvms();
1000 1000          _pending_offset = pc_offset;
1001 1001        }
1002 1002      }
1003 1003    }
1004 1004  
1005 1005    // Stay out of the way of real safepoints:
1006 1006    void observe_safepoint(JVMState* jvms, int pc_offset) {
1007 1007      if (_pending_jvms != NULL &&
1008 1008          !_pending_jvms->same_calls_as(jvms) &&
1009 1009          _pending_offset < pc_offset) {
1010 1010        emit_non_safepoint();
1011 1011      }
1012 1012      _pending_jvms = NULL;
1013 1013    }
1014 1014  
1015 1015    void flush_at_end() {
1016 1016      if (_pending_jvms != NULL) {
1017 1017        emit_non_safepoint();
1018 1018      }
1019 1019      _pending_jvms = NULL;
1020 1020    }
1021 1021  };
1022 1022  
1023 1023  void NonSafepointEmitter::emit_non_safepoint() {
1024 1024    JVMState* youngest_jvms = _pending_jvms;
1025 1025    int       pc_offset     = _pending_offset;
1026 1026  
1027 1027    // Clear it now:
1028 1028    _pending_jvms = NULL;
1029 1029  
1030 1030    DebugInformationRecorder* debug_info = C->debug_info();
1031 1031    assert(debug_info->recording_non_safepoints(), "sanity");
1032 1032  
1033 1033    debug_info->add_non_safepoint(pc_offset);
1034 1034    int max_depth = youngest_jvms->depth();
1035 1035  
1036 1036    // Visit scopes from oldest to youngest.
1037 1037    for (int depth = 1; depth <= max_depth; depth++) {
1038 1038      JVMState* jvms = youngest_jvms->of_depth(depth);
1039 1039      ciMethod* method = jvms->has_method() ? jvms->method() : NULL;
1040 1040      assert(!jvms->should_reexecute() || depth==max_depth, "reexecute allowed only for the youngest");
1041 1041      debug_info->describe_scope(pc_offset, method, jvms->bci(), jvms->should_reexecute());
1042 1042    }
1043 1043  
1044 1044    // Mark the end of the scope set.
1045 1045    debug_info->end_non_safepoint(pc_offset);
1046 1046  }
1047 1047  
1048 1048  
1049 1049  
1050 1050  // helper for fill_buffer bailout logic
1051 1051  static void turn_off_compiler(Compile* C) {
1052 1052    if (CodeCache::largest_free_block() >= CodeCacheMinimumFreeSpace*10) {
1053 1053      // Do not turn off compilation if a single giant method has
1054 1054      // blown the code cache size.
1055 1055      C->record_failure("excessive request to CodeCache");
1056 1056    } else {
1057 1057      // Let CompilerBroker disable further compilations.
1058 1058      C->record_failure("CodeCache is full");
1059 1059    }
1060 1060  }
1061 1061  
1062 1062  
1063 1063  //------------------------------init_buffer------------------------------------
1064 1064  CodeBuffer* Compile::init_buffer(uint* blk_starts) {
1065 1065  
1066 1066    // Set the initially allocated size
1067 1067    int  code_req   = initial_code_capacity;
1068 1068    int  locs_req   = initial_locs_capacity;
1069 1069    int  stub_req   = TraceJumps ? initial_stub_capacity * 10 : initial_stub_capacity;
1070 1070    int  const_req  = initial_const_capacity;
1071 1071  
1072 1072    int  pad_req    = NativeCall::instruction_size;
1073 1073    // The extra spacing after the code is necessary on some platforms.
1074 1074    // Sometimes we need to patch in a jump after the last instruction,
1075 1075    // if the nmethod has been deoptimized.  (See 4932387, 4894843.)
1076 1076  
1077 1077    // Compute the byte offset where we can store the deopt pc.
1078 1078    if (fixed_slots() != 0) {
1079 1079      _orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot));
1080 1080    }
1081 1081  
1082 1082    // Compute prolog code size
1083 1083    _method_size = 0;
1084 1084    _frame_slots = OptoReg::reg2stack(_matcher->_old_SP)+_regalloc->_framesize;
1085 1085  #ifdef IA64
1086 1086    if (save_argument_registers()) {
1087 1087      // 4815101: this is a stub with implicit and unknown precision fp args.
1088 1088      // The usual spill mechanism can only generate stfd's in this case, which
1089 1089      // doesn't work if the fp reg to spill contains a single-precision denorm.
1090 1090      // Instead, we hack around the normal spill mechanism using stfspill's and
1091 1091      // ldffill's in the MachProlog and MachEpilog emit methods.  We allocate
1092 1092      // space here for the fp arg regs (f8-f15) we're going to thusly spill.
1093 1093      //
1094 1094      // If we ever implement 16-byte 'registers' == stack slots, we can
1095 1095      // get rid of this hack and have SpillCopy generate stfspill/ldffill
1096 1096      // instead of stfd/stfs/ldfd/ldfs.
1097 1097      _frame_slots += 8*(16/BytesPerInt);
1098 1098    }
1099 1099  #endif
1100 1100    assert(_frame_slots >= 0 && _frame_slots < 1000000, "sanity check");
1101 1101  
1102 1102    if (has_mach_constant_base_node()) {
1103 1103      // Fill the constant table.
1104 1104      // Note:  This must happen before shorten_branches.
1105 1105      for (uint i = 0; i < _cfg->_num_blocks; i++) {
1106 1106        Block* b = _cfg->_blocks[i];
1107 1107  
1108 1108        for (uint j = 0; j < b->_nodes.size(); j++) {
1109 1109          Node* n = b->_nodes[j];
1110 1110  
1111 1111          // If the node is a MachConstantNode evaluate the constant
1112 1112          // value section.
1113 1113          if (n->is_MachConstant()) {
1114 1114            MachConstantNode* machcon = n->as_MachConstant();
1115 1115            machcon->eval_constant(C);
1116 1116          }
1117 1117        }
1118 1118      }
1119 1119  
1120 1120      // Calculate the offsets of the constants and the size of the
1121 1121      // constant table (including the padding to the next section).
1122 1122      constant_table().calculate_offsets_and_size();
1123 1123      const_req = constant_table().size();
1124 1124    }
1125 1125  
1126 1126    // Initialize the space for the BufferBlob used to find and verify
1127 1127    // instruction size in MachNode::emit_size()
1128 1128    init_scratch_buffer_blob(const_req);
1129 1129    if (failing())  return NULL; // Out of memory
1130 1130  
1131 1131    // Pre-compute the length of blocks and replace
1132 1132    // long branches with short if machine supports it.
1133 1133    shorten_branches(blk_starts, code_req, locs_req, stub_req);
1134 1134  
1135 1135    // nmethod and CodeBuffer count stubs & constants as part of method's code.
1136 1136    int exception_handler_req = size_exception_handler();
1137 1137    int deopt_handler_req = size_deopt_handler();
1138 1138    exception_handler_req += MAX_stubs_size; // add marginal slop for handler
1139 1139    deopt_handler_req += MAX_stubs_size; // add marginal slop for handler
1140 1140    stub_req += MAX_stubs_size;   // ensure per-stub margin
1141 1141    code_req += MAX_inst_size;    // ensure per-instruction margin
1142 1142  
1143 1143    if (StressCodeBuffers)
1144 1144      code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10;  // force expansion
1145 1145  
1146 1146    int total_req =
1147 1147      const_req +
1148 1148      code_req +
1149 1149      pad_req +
1150 1150      stub_req +
1151 1151      exception_handler_req +
1152 1152      deopt_handler_req;               // deopt handler
1153 1153  
1154 1154    if (has_method_handle_invokes())
1155 1155      total_req += deopt_handler_req;  // deopt MH handler
1156 1156  
1157 1157    CodeBuffer* cb = code_buffer();
1158 1158    cb->initialize(total_req, locs_req);
1159 1159  
1160 1160    // Have we run out of code space?
1161 1161    if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
1162 1162      turn_off_compiler(this);
1163 1163      return NULL;
1164 1164    }
1165 1165    // Configure the code buffer.
1166 1166    cb->initialize_consts_size(const_req);
1167 1167    cb->initialize_stubs_size(stub_req);
1168 1168    cb->initialize_oop_recorder(env()->oop_recorder());
1169 1169  
1170 1170    // fill in the nop array for bundling computations
1171 1171    MachNode *_nop_list[Bundle::_nop_count];
1172 1172    Bundle::initialize_nops(_nop_list, this);
1173 1173  
1174 1174    return cb;
1175 1175  }
1176 1176  
1177 1177  //------------------------------fill_buffer------------------------------------
1178 1178  void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
1179 1179    // blk_starts[] contains offsets calculated during short branches processing,
1180 1180    // offsets should not be increased during following steps.
1181 1181  
1182 1182    // Compute the size of first NumberOfLoopInstrToAlign instructions at head
1183 1183    // of a loop. It is used to determine the padding for loop alignment.
1184 1184    compute_loop_first_inst_sizes();
1185 1185  
1186 1186    // Create oopmap set.
1187 1187    _oop_map_set = new OopMapSet();
1188 1188  
1189 1189    // !!!!! This preserves old handling of oopmaps for now
1190 1190    debug_info()->set_oopmaps(_oop_map_set);
1191 1191  
1192 1192    uint nblocks  = _cfg->_num_blocks;
1193 1193    // Count and start of implicit null check instructions
1194 1194    uint inct_cnt = 0;
1195 1195    uint *inct_starts = NEW_RESOURCE_ARRAY(uint, nblocks+1);
1196 1196  
1197 1197    // Count and start of calls
1198 1198    uint *call_returns = NEW_RESOURCE_ARRAY(uint, nblocks+1);
1199 1199  
1200 1200    uint  return_offset = 0;
1201 1201    int nop_size = (new (this) MachNopNode())->size(_regalloc);
1202 1202  
1203 1203    int previous_offset = 0;
1204 1204    int current_offset  = 0;
1205 1205    int last_call_offset = -1;
1206 1206    int last_avoid_back_to_back_offset = -1;
1207 1207  #ifdef ASSERT
1208 1208    uint* jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks);
1209 1209    uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks);
1210 1210    uint* jmp_size   = NEW_RESOURCE_ARRAY(uint,nblocks);
1211 1211    uint* jmp_rule   = NEW_RESOURCE_ARRAY(uint,nblocks);
1212 1212  #endif
1213 1213  
1214 1214    // Create an array of unused labels, one for each basic block, if printing is enabled
1215 1215  #ifndef PRODUCT
1216 1216    int *node_offsets      = NULL;
1217 1217    uint node_offset_limit = unique();
1218 1218  
1219 1219    if (print_assembly())
1220 1220      node_offsets         = NEW_RESOURCE_ARRAY(int, node_offset_limit);
1221 1221  #endif
1222 1222  
1223 1223    NonSafepointEmitter non_safepoints(this);  // emit non-safepoints lazily
1224 1224  
1225 1225    // Emit the constant table.
1226 1226    if (has_mach_constant_base_node()) {
1227 1227      constant_table().emit(*cb);
1228 1228    }
1229 1229  
1230 1230    // Create an array of labels, one for each basic block
1231 1231    Label *blk_labels = NEW_RESOURCE_ARRAY(Label, nblocks+1);
1232 1232    for (uint i=0; i <= nblocks; i++) {
1233 1233      blk_labels[i].init();
1234 1234    }
1235 1235  
1236 1236    // ------------------
1237 1237    // Now fill in the code buffer
1238 1238    Node *delay_slot = NULL;
1239 1239  
1240 1240    for (uint i=0; i < nblocks; i++) {
1241 1241      Block *b = _cfg->_blocks[i];
1242 1242  
1243 1243      Node *head = b->head();
1244 1244  
1245 1245      // If this block needs to start aligned (i.e, can be reached other
1246 1246      // than by falling-thru from the previous block), then force the
1247 1247      // start of a new bundle.
1248 1248      if (Pipeline::requires_bundling() && starts_bundle(head))
1249 1249        cb->flush_bundle(true);
1250 1250  
1251 1251  #ifdef ASSERT
1252 1252      if (!b->is_connector()) {
1253 1253        stringStream st;
1254 1254        b->dump_head(&_cfg->_bbs, &st);
1255 1255        MacroAssembler(cb).block_comment(st.as_string());
1256 1256      }
1257 1257      jmp_target[i] = 0;
1258 1258      jmp_offset[i] = 0;
1259 1259      jmp_size[i]   = 0;
1260 1260      jmp_rule[i]   = 0;
1261 1261  #endif
1262 1262      int blk_offset = current_offset;
1263 1263  
1264 1264      // Define the label at the beginning of the basic block
1265 1265      MacroAssembler(cb).bind(blk_labels[b->_pre_order]);
1266 1266  
1267 1267      uint last_inst = b->_nodes.size();
1268 1268  
1269 1269      // Emit block normally, except for last instruction.
1270 1270      // Emit means "dump code bits into code buffer".
1271 1271      for (uint j = 0; j<last_inst; j++) {
1272 1272  
1273 1273        // Get the node
1274 1274        Node* n = b->_nodes[j];
1275 1275  
1276 1276        // See if delay slots are supported
1277 1277        if (valid_bundle_info(n) &&
1278 1278            node_bundling(n)->used_in_unconditional_delay()) {
1279 1279          assert(delay_slot == NULL, "no use of delay slot node");
1280 1280          assert(n->size(_regalloc) == Pipeline::instr_unit_size(), "delay slot instruction wrong size");
1281 1281  
1282 1282          delay_slot = n;
1283 1283          continue;
1284 1284        }
1285 1285  
1286 1286        // If this starts a new instruction group, then flush the current one
1287 1287        // (but allow split bundles)
1288 1288        if (Pipeline::requires_bundling() && starts_bundle(n))
1289 1289          cb->flush_bundle(false);
1290 1290  
1291 1291        // The following logic is duplicated in the code ifdeffed for
1292 1292        // ENABLE_ZAP_DEAD_LOCALS which appears above in this file.  It
1293 1293        // should be factored out.  Or maybe dispersed to the nodes?
1294 1294  
1295 1295        // Special handling for SafePoint/Call Nodes
1296 1296        bool is_mcall = false;
1297 1297        if (n->is_Mach()) {
1298 1298          MachNode *mach = n->as_Mach();
1299 1299          is_mcall = n->is_MachCall();
1300 1300          bool is_sfn = n->is_MachSafePoint();
1301 1301  
1302 1302          // If this requires all previous instructions be flushed, then do so
1303 1303          if (is_sfn || is_mcall || mach->alignment_required() != 1) {
1304 1304            cb->flush_bundle(true);
1305 1305            current_offset = cb->insts_size();
1306 1306          }
1307 1307  
1308 1308          // A padding may be needed again since a previous instruction
1309 1309          // could be moved to delay slot.
1310 1310  
1311 1311          // align the instruction if necessary
1312 1312          int padding = mach->compute_padding(current_offset);
1313 1313          // Make sure safepoint node for polling is distinct from a call's
1314 1314          // return by adding a nop if needed.
1315 1315          if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset) {
1316 1316            padding = nop_size;
1317 1317          }
1318 1318          if (padding == 0 && mach->avoid_back_to_back() &&
1319 1319              current_offset == last_avoid_back_to_back_offset) {
1320 1320            // Avoid back to back some instructions.
1321 1321            padding = nop_size;
1322 1322          }
1323 1323  
1324 1324          if(padding > 0) {
1325 1325            assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");
1326 1326            int nops_cnt = padding / nop_size;
1327 1327            MachNode *nop = new (this) MachNopNode(nops_cnt);
1328 1328            b->_nodes.insert(j++, nop);
1329 1329            last_inst++;
1330 1330            _cfg->_bbs.map( nop->_idx, b );
1331 1331            nop->emit(*cb, _regalloc);
1332 1332            cb->flush_bundle(true);
1333 1333            current_offset = cb->insts_size();
1334 1334          }
1335 1335  
1336 1336          // Remember the start of the last call in a basic block
1337 1337          if (is_mcall) {
1338 1338            MachCallNode *mcall = mach->as_MachCall();
1339 1339  
1340 1340            // This destination address is NOT PC-relative
1341 1341            mcall->method_set((intptr_t)mcall->entry_point());
1342 1342  
1343 1343            // Save the return address
1344 1344            call_returns[b->_pre_order] = current_offset + mcall->ret_addr_offset();
1345 1345  
1346 1346            if (mcall->is_MachCallLeaf()) {
1347 1347              is_mcall = false;
1348 1348              is_sfn = false;
1349 1349            }
1350 1350          }
1351 1351  
1352 1352          // sfn will be valid whenever mcall is valid now because of inheritance
1353 1353          if (is_sfn || is_mcall) {
1354 1354  
1355 1355            // Handle special safepoint nodes for synchronization
1356 1356            if (!is_mcall) {
1357 1357              MachSafePointNode *sfn = mach->as_MachSafePoint();
1358 1358              // !!!!! Stubs only need an oopmap right now, so bail out
1359 1359              if (sfn->jvms()->method() == NULL) {
1360 1360                // Write the oopmap directly to the code blob??!!
1361 1361  #             ifdef ENABLE_ZAP_DEAD_LOCALS
1362 1362                assert( !is_node_getting_a_safepoint(sfn),  "logic does not match; false positive");
1363 1363  #             endif
1364 1364                continue;
1365 1365              }
1366 1366            } // End synchronization
1367 1367  
1368 1368            non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(),
1369 1369                                             current_offset);
1370 1370            Process_OopMap_Node(mach, current_offset);
1371 1371          } // End if safepoint
1372 1372  
1373 1373          // If this is a null check, then add the start of the previous instruction to the list
1374 1374          else if( mach->is_MachNullCheck() ) {
1375 1375            inct_starts[inct_cnt++] = previous_offset;
1376 1376          }
1377 1377  
1378 1378          // If this is a branch, then fill in the label with the target BB's label
1379 1379          else if (mach->is_MachBranch()) {
1380 1380            // This requires the TRUE branch target be in succs[0]
1381 1381            uint block_num = b->non_connector_successor(0)->_pre_order;
1382 1382  
1383 1383            // Try to replace long branch if delay slot is not used,
1384 1384            // it is mostly for back branches since forward branch's
1385 1385            // distance is not updated yet.
1386 1386            bool delay_slot_is_used = valid_bundle_info(n) &&
1387 1387                                      node_bundling(n)->use_unconditional_delay();
1388 1388            if (!delay_slot_is_used && mach->may_be_short_branch()) {
1389 1389             assert(delay_slot == NULL, "not expecting delay slot node");
1390 1390             int br_size = n->size(_regalloc);
1391 1391              int offset = blk_starts[block_num] - current_offset;
1392 1392              if (block_num >= i) {
1393 1393                // Current and following block's offset are not
1394 1394                // finilized yet, adjust distance by the difference
1395 1395                // between calculated and final offsets of current block.
1396 1396                offset -= (blk_starts[i] - blk_offset);
1397 1397              }
1398 1398              // In the following code a nop could be inserted before
1399 1399              // the branch which will increase the backward distance.
1400 1400              bool needs_padding = (current_offset == last_avoid_back_to_back_offset);
1401 1401              if (needs_padding && offset <= 0)
1402 1402                offset -= nop_size;
1403 1403  
1404 1404              if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) {
1405 1405                // We've got a winner.  Replace this branch.
1406 1406                MachNode* replacement = mach->as_MachBranch()->short_branch_version(this);
1407 1407  
1408 1408                // Update the jmp_size.
1409 1409                int new_size = replacement->size(_regalloc);
1410 1410                assert((br_size - new_size) >= (int)nop_size, "short_branch size should be smaller");
1411 1411                // Insert padding between avoid_back_to_back branches.
1412 1412                if (needs_padding && replacement->avoid_back_to_back()) {
1413 1413                  MachNode *nop = new (this) MachNopNode();
1414 1414                  b->_nodes.insert(j++, nop);
1415 1415                  _cfg->_bbs.map(nop->_idx, b);
1416 1416                  last_inst++;
1417 1417                  nop->emit(*cb, _regalloc);
1418 1418                  cb->flush_bundle(true);
1419 1419                  current_offset = cb->insts_size();
1420 1420                }
1421 1421  #ifdef ASSERT
1422 1422                jmp_target[i] = block_num;
1423 1423                jmp_offset[i] = current_offset - blk_offset;
1424 1424                jmp_size[i]   = new_size;
1425 1425                jmp_rule[i]   = mach->rule();
1426 1426  #endif
1427 1427                b->_nodes.map(j, replacement);
1428 1428                mach->subsume_by(replacement, C);
1429 1429                n    = replacement;
1430 1430                mach = replacement;
1431 1431              }
1432 1432            }
1433 1433            mach->as_MachBranch()->label_set( &blk_labels[block_num], block_num );
1434 1434          } else if (mach->ideal_Opcode() == Op_Jump) {
1435 1435            for (uint h = 0; h < b->_num_succs; h++) {
1436 1436              Block* succs_block = b->_succs[h];
1437 1437              for (uint j = 1; j < succs_block->num_preds(); j++) {
1438 1438                Node* jpn = succs_block->pred(j);
1439 1439                if (jpn->is_JumpProj() && jpn->in(0) == mach) {
1440 1440                  uint block_num = succs_block->non_connector()->_pre_order;
1441 1441                  Label *blkLabel = &blk_labels[block_num];
1442 1442                  mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel);
1443 1443                }
1444 1444              }
1445 1445            }
1446 1446          }
1447 1447  
1448 1448  #ifdef ASSERT
1449 1449          // Check that oop-store precedes the card-mark
1450 1450          else if (mach->ideal_Opcode() == Op_StoreCM) {
1451 1451            uint storeCM_idx = j;
1452 1452            int count = 0;
1453 1453            for (uint prec = mach->req(); prec < mach->len(); prec++) {
1454 1454              Node *oop_store = mach->in(prec);  // Precedence edge
1455 1455              if (oop_store == NULL) continue;
1456 1456              count++;
1457 1457              uint i4;
1458 1458              for( i4 = 0; i4 < last_inst; ++i4 ) {
1459 1459                if( b->_nodes[i4] == oop_store ) break;
1460 1460              }
1461 1461              // Note: This test can provide a false failure if other precedence
1462 1462              // edges have been added to the storeCMNode.
1463 1463              assert( i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store");
1464 1464            }
1465 1465            assert(count > 0, "storeCM expects at least one precedence edge");
1466 1466          }
1467 1467  #endif
1468 1468  
1469 1469          else if (!n->is_Proj()) {
1470 1470            // Remember the beginning of the previous instruction, in case
1471 1471            // it's followed by a flag-kill and a null-check.  Happens on
1472 1472            // Intel all the time, with add-to-memory kind of opcodes.
1473 1473            previous_offset = current_offset;
1474 1474          }
1475 1475        }
1476 1476  
1477 1477        // Verify that there is sufficient space remaining
1478 1478        cb->insts()->maybe_expand_to_ensure_remaining(MAX_inst_size);
1479 1479        if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
1480 1480          turn_off_compiler(this);
1481 1481          return;
1482 1482        }
1483 1483  
1484 1484        // Save the offset for the listing
1485 1485  #ifndef PRODUCT
1486 1486        if (node_offsets && n->_idx < node_offset_limit)
1487 1487          node_offsets[n->_idx] = cb->insts_size();
1488 1488  #endif
1489 1489  
1490 1490        // "Normal" instruction case
1491 1491        DEBUG_ONLY( uint instr_offset = cb->insts_size(); )
1492 1492        n->emit(*cb, _regalloc);
1493 1493        current_offset  = cb->insts_size();
1494 1494  
1495 1495  #ifdef ASSERT
1496 1496        if (n->size(_regalloc) < (current_offset-instr_offset)) {
1497 1497          n->dump();
1498 1498          assert(false, "wrong size of mach node");
1499 1499        }
1500 1500  #endif
1501 1501        non_safepoints.observe_instruction(n, current_offset);
1502 1502  
1503 1503        // mcall is last "call" that can be a safepoint
1504 1504        // record it so we can see if a poll will directly follow it
1505 1505        // in which case we'll need a pad to make the PcDesc sites unique
1506 1506        // see  5010568. This can be slightly inaccurate but conservative
1507 1507        // in the case that return address is not actually at current_offset.
1508 1508        // This is a small price to pay.
1509 1509  
1510 1510        if (is_mcall) {
1511 1511          last_call_offset = current_offset;
1512 1512        }
1513 1513  
1514 1514        if (n->is_Mach() && n->as_Mach()->avoid_back_to_back()) {
1515 1515          // Avoid back to back some instructions.
1516 1516          last_avoid_back_to_back_offset = current_offset;
1517 1517        }
1518 1518  
1519 1519        // See if this instruction has a delay slot
1520 1520        if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
1521 1521          assert(delay_slot != NULL, "expecting delay slot node");
1522 1522  
1523 1523          // Back up 1 instruction
1524 1524          cb->set_insts_end(cb->insts_end() - Pipeline::instr_unit_size());
1525 1525  
1526 1526          // Save the offset for the listing
1527 1527  #ifndef PRODUCT
1528 1528          if (node_offsets && delay_slot->_idx < node_offset_limit)
1529 1529            node_offsets[delay_slot->_idx] = cb->insts_size();
1530 1530  #endif
1531 1531  
1532 1532          // Support a SafePoint in the delay slot
1533 1533          if (delay_slot->is_MachSafePoint()) {
1534 1534            MachNode *mach = delay_slot->as_Mach();
1535 1535            // !!!!! Stubs only need an oopmap right now, so bail out
1536 1536            if (!mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL) {
1537 1537              // Write the oopmap directly to the code blob??!!
1538 1538  #           ifdef ENABLE_ZAP_DEAD_LOCALS
1539 1539              assert( !is_node_getting_a_safepoint(mach),  "logic does not match; false positive");
1540 1540  #           endif
1541 1541              delay_slot = NULL;
1542 1542              continue;
1543 1543            }
1544 1544  
1545 1545            int adjusted_offset = current_offset - Pipeline::instr_unit_size();
1546 1546            non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(),
1547 1547                                             adjusted_offset);
1548 1548            // Generate an OopMap entry
1549 1549            Process_OopMap_Node(mach, adjusted_offset);
1550 1550          }
1551 1551  
1552 1552          // Insert the delay slot instruction
1553 1553          delay_slot->emit(*cb, _regalloc);
1554 1554  
1555 1555          // Don't reuse it
1556 1556          delay_slot = NULL;
1557 1557        }
1558 1558  
1559 1559      } // End for all instructions in block
1560 1560  
1561 1561      // If the next block is the top of a loop, pad this block out to align
1562 1562      // the loop top a little. Helps prevent pipe stalls at loop back branches.
1563 1563      if (i < nblocks-1) {
1564 1564        Block *nb = _cfg->_blocks[i+1];
1565 1565        int padding = nb->alignment_padding(current_offset);
1566 1566        if( padding > 0 ) {
1567 1567          MachNode *nop = new (this) MachNopNode(padding / nop_size);
1568 1568          b->_nodes.insert( b->_nodes.size(), nop );
1569 1569          _cfg->_bbs.map( nop->_idx, b );
1570 1570          nop->emit(*cb, _regalloc);
1571 1571          current_offset = cb->insts_size();
1572 1572        }
1573 1573      }
1574 1574      // Verify that the distance for generated before forward
1575 1575      // short branches is still valid.
1576 1576      guarantee((int)(blk_starts[i+1] - blk_starts[i]) >= (current_offset - blk_offset), "shouldn't increase block size");
1577 1577  
1578 1578      // Save new block start offset
1579 1579      blk_starts[i] = blk_offset;
1580 1580    } // End of for all blocks
1581 1581    blk_starts[nblocks] = current_offset;
1582 1582  
1583 1583    non_safepoints.flush_at_end();
1584 1584  
1585 1585    // Offset too large?
1586 1586    if (failing())  return;
1587 1587  
1588 1588    // Define a pseudo-label at the end of the code
1589 1589    MacroAssembler(cb).bind( blk_labels[nblocks] );
1590 1590  
1591 1591    // Compute the size of the first block
1592 1592    _first_block_size = blk_labels[1].loc_pos() - blk_labels[0].loc_pos();
1593 1593  
1594 1594    assert(cb->insts_size() < 500000, "method is unreasonably large");
1595 1595  
1596 1596  #ifdef ASSERT
1597 1597    for (uint i = 0; i < nblocks; i++) { // For all blocks
1598 1598      if (jmp_target[i] != 0) {
1599 1599        int br_size = jmp_size[i];
1600 1600        int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]);
1601 1601        if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) {
1602 1602          tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]);
1603 1603          assert(false, "Displacement too large for short jmp");
1604 1604        }
1605 1605      }
1606 1606    }
1607 1607  #endif
1608 1608  
1609 1609    // ------------------
1610 1610  
1611 1611  #ifndef PRODUCT
1612 1612    // Information on the size of the method, without the extraneous code
1613 1613    Scheduling::increment_method_size(cb->insts_size());
1614 1614  #endif
1615 1615  
1616 1616    // ------------------
1617 1617    // Fill in exception table entries.
1618 1618    FillExceptionTables(inct_cnt, call_returns, inct_starts, blk_labels);
1619 1619  
1620 1620    // Only java methods have exception handlers and deopt handlers
1621 1621    if (_method) {
1622 1622      // Emit the exception handler code.
1623 1623      _code_offsets.set_value(CodeOffsets::Exceptions, emit_exception_handler(*cb));
1624 1624      // Emit the deopt handler code.
1625 1625      _code_offsets.set_value(CodeOffsets::Deopt, emit_deopt_handler(*cb));
1626 1626  
1627 1627      // Emit the MethodHandle deopt handler code (if required).
1628 1628      if (has_method_handle_invokes()) {
1629 1629        // We can use the same code as for the normal deopt handler, we
1630 1630        // just need a different entry point address.
1631 1631        _code_offsets.set_value(CodeOffsets::DeoptMH, emit_deopt_handler(*cb));
1632 1632      }
1633 1633    }
1634 1634  
1635 1635    // One last check for failed CodeBuffer::expand:
1636 1636    if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
1637 1637      turn_off_compiler(this);
1638 1638      return;
1639 1639    }
1640 1640  
1641 1641  #ifndef PRODUCT
1642 1642    // Dump the assembly code, including basic-block numbers
1643 1643    if (print_assembly()) {
1644 1644      ttyLocker ttyl;  // keep the following output all in one block
1645 1645      if (!VMThread::should_terminate()) {  // test this under the tty lock
1646 1646        // This output goes directly to the tty, not the compiler log.
1647 1647        // To enable tools to match it up with the compilation activity,
1648 1648        // be sure to tag this tty output with the compile ID.
1649 1649        if (xtty != NULL) {
1650 1650          xtty->head("opto_assembly compile_id='%d'%s", compile_id(),
1651 1651                     is_osr_compilation()    ? " compile_kind='osr'" :
1652 1652                     "");
1653 1653        }
1654 1654        if (method() != NULL) {
1655 1655          method()->print_oop();
1656 1656          print_codes();
1657 1657        }
1658 1658        dump_asm(node_offsets, node_offset_limit);
1659 1659        if (xtty != NULL) {
1660 1660          xtty->tail("opto_assembly");
1661 1661        }
1662 1662      }
1663 1663    }
1664 1664  #endif
1665 1665  
1666 1666  }
1667 1667  
1668 1668  void Compile::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_starts, Label *blk_labels) {
1669 1669    _inc_table.set_size(cnt);
1670 1670  
1671 1671    uint inct_cnt = 0;
1672 1672    for( uint i=0; i<_cfg->_num_blocks; i++ ) {
1673 1673      Block *b = _cfg->_blocks[i];
1674 1674      Node *n = NULL;
1675 1675      int j;
1676 1676  
1677 1677      // Find the branch; ignore trailing NOPs.
1678 1678      for( j = b->_nodes.size()-1; j>=0; j-- ) {
1679 1679        n = b->_nodes[j];
1680 1680        if( !n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con )
1681 1681          break;
1682 1682      }
1683 1683  
1684 1684      // If we didn't find anything, continue
1685 1685      if( j < 0 ) continue;
1686 1686  
1687 1687      // Compute ExceptionHandlerTable subtable entry and add it
1688 1688      // (skip empty blocks)
1689 1689      if( n->is_Catch() ) {
1690 1690  
1691 1691        // Get the offset of the return from the call
1692 1692        uint call_return = call_returns[b->_pre_order];
1693 1693  #ifdef ASSERT
1694 1694        assert( call_return > 0, "no call seen for this basic block" );
1695 1695        while( b->_nodes[--j]->is_MachProj() ) ;
1696 1696        assert( b->_nodes[j]->is_MachCall(), "CatchProj must follow call" );
1697 1697  #endif
1698 1698        // last instruction is a CatchNode, find it's CatchProjNodes
1699 1699        int nof_succs = b->_num_succs;
1700 1700        // allocate space
1701 1701        GrowableArray<intptr_t> handler_bcis(nof_succs);
1702 1702        GrowableArray<intptr_t> handler_pcos(nof_succs);
1703 1703        // iterate through all successors
1704 1704        for (int j = 0; j < nof_succs; j++) {
1705 1705          Block* s = b->_succs[j];
1706 1706          bool found_p = false;
1707 1707          for( uint k = 1; k < s->num_preds(); k++ ) {
1708 1708            Node *pk = s->pred(k);
1709 1709            if( pk->is_CatchProj() && pk->in(0) == n ) {
1710 1710              const CatchProjNode* p = pk->as_CatchProj();
1711 1711              found_p = true;
1712 1712              // add the corresponding handler bci & pco information
1713 1713              if( p->_con != CatchProjNode::fall_through_index ) {
1714 1714                // p leads to an exception handler (and is not fall through)
1715 1715                assert(s == _cfg->_blocks[s->_pre_order],"bad numbering");
1716 1716                // no duplicates, please
1717 1717                if( !handler_bcis.contains(p->handler_bci()) ) {
1718 1718                  uint block_num = s->non_connector()->_pre_order;
1719 1719                  handler_bcis.append(p->handler_bci());
1720 1720                  handler_pcos.append(blk_labels[block_num].loc_pos());
1721 1721                }
1722 1722              }
1723 1723            }
1724 1724          }
1725 1725          assert(found_p, "no matching predecessor found");
1726 1726          // Note:  Due to empty block removal, one block may have
1727 1727          // several CatchProj inputs, from the same Catch.
1728 1728        }
1729 1729  
1730 1730        // Set the offset of the return from the call
1731 1731        _handler_table.add_subtable(call_return, &handler_bcis, NULL, &handler_pcos);
1732 1732        continue;
1733 1733      }
1734 1734  
1735 1735      // Handle implicit null exception table updates
1736 1736      if( n->is_MachNullCheck() ) {
1737 1737        uint block_num = b->non_connector_successor(0)->_pre_order;
1738 1738        _inc_table.append( inct_starts[inct_cnt++], blk_labels[block_num].loc_pos() );
1739 1739        continue;
1740 1740      }
1741 1741    } // End of for all blocks fill in exception table entries
1742 1742  }
1743 1743  
1744 1744  // Static Variables
1745 1745  #ifndef PRODUCT
1746 1746  uint Scheduling::_total_nop_size = 0;
1747 1747  uint Scheduling::_total_method_size = 0;
1748 1748  uint Scheduling::_total_branches = 0;
1749 1749  uint Scheduling::_total_unconditional_delays = 0;
1750 1750  uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+1];
1751 1751  #endif
1752 1752  
1753 1753  // Initializer for class Scheduling
1754 1754  
1755 1755  Scheduling::Scheduling(Arena *arena, Compile &compile)
1756 1756    : _arena(arena),
1757 1757      _cfg(compile.cfg()),
1758 1758      _bbs(compile.cfg()->_bbs),
1759 1759      _regalloc(compile.regalloc()),
1760 1760      _reg_node(arena),
1761 1761      _bundle_instr_count(0),
1762 1762      _bundle_cycle_number(0),
1763 1763      _scheduled(arena),
1764 1764      _available(arena),
1765 1765      _next_node(NULL),
1766 1766      _bundle_use(0, 0, resource_count, &_bundle_use_elements[0]),
1767 1767      _pinch_free_list(arena)
1768 1768  #ifndef PRODUCT
1769 1769    , _branches(0)
1770 1770    , _unconditional_delays(0)
1771 1771  #endif
1772 1772  {
1773 1773    // Create a MachNopNode
1774 1774    _nop = new (&compile) MachNopNode();
1775 1775  
1776 1776    // Now that the nops are in the array, save the count
1777 1777    // (but allow entries for the nops)
1778 1778    _node_bundling_limit = compile.unique();
1779 1779    uint node_max = _regalloc->node_regs_max_index();
1780 1780  
1781 1781    compile.set_node_bundling_limit(_node_bundling_limit);
1782 1782  
1783 1783    // This one is persistent within the Compile class
1784 1784    _node_bundling_base = NEW_ARENA_ARRAY(compile.comp_arena(), Bundle, node_max);
1785 1785  
1786 1786    // Allocate space for fixed-size arrays
1787 1787    _node_latency    = NEW_ARENA_ARRAY(arena, unsigned short, node_max);
1788 1788    _uses            = NEW_ARENA_ARRAY(arena, short,          node_max);
1789 1789    _current_latency = NEW_ARENA_ARRAY(arena, unsigned short, node_max);
1790 1790  
1791 1791    // Clear the arrays
1792 1792    memset(_node_bundling_base, 0, node_max * sizeof(Bundle));
1793 1793    memset(_node_latency,       0, node_max * sizeof(unsigned short));
1794 1794    memset(_uses,               0, node_max * sizeof(short));
1795 1795    memset(_current_latency,    0, node_max * sizeof(unsigned short));
1796 1796  
1797 1797    // Clear the bundling information
1798 1798    memcpy(_bundle_use_elements,
1799 1799      Pipeline_Use::elaborated_elements,
1800 1800      sizeof(Pipeline_Use::elaborated_elements));
1801 1801  
1802 1802    // Get the last node
1803 1803    Block *bb = _cfg->_blocks[_cfg->_blocks.size()-1];
1804 1804  
1805 1805    _next_node = bb->_nodes[bb->_nodes.size()-1];
1806 1806  }
1807 1807  
1808 1808  #ifndef PRODUCT
1809 1809  // Scheduling destructor
1810 1810  Scheduling::~Scheduling() {
1811 1811    _total_branches             += _branches;
1812 1812    _total_unconditional_delays += _unconditional_delays;
1813 1813  }
1814 1814  #endif
1815 1815  
1816 1816  // Step ahead "i" cycles
1817 1817  void Scheduling::step(uint i) {
1818 1818  
1819 1819    Bundle *bundle = node_bundling(_next_node);
1820 1820    bundle->set_starts_bundle();
1821 1821  
1822 1822    // Update the bundle record, but leave the flags information alone
1823 1823    if (_bundle_instr_count > 0) {
1824 1824      bundle->set_instr_count(_bundle_instr_count);
1825 1825      bundle->set_resources_used(_bundle_use.resourcesUsed());
1826 1826    }
1827 1827  
1828 1828    // Update the state information
1829 1829    _bundle_instr_count = 0;
1830 1830    _bundle_cycle_number += i;
1831 1831    _bundle_use.step(i);
1832 1832  }
1833 1833  
1834 1834  void Scheduling::step_and_clear() {
1835 1835    Bundle *bundle = node_bundling(_next_node);
1836 1836    bundle->set_starts_bundle();
1837 1837  
1838 1838    // Update the bundle record
1839 1839    if (_bundle_instr_count > 0) {
1840 1840      bundle->set_instr_count(_bundle_instr_count);
1841 1841      bundle->set_resources_used(_bundle_use.resourcesUsed());
1842 1842  
1843 1843      _bundle_cycle_number += 1;
1844 1844    }
1845 1845  
1846 1846    // Clear the bundling information
1847 1847    _bundle_instr_count = 0;
1848 1848    _bundle_use.reset();
1849 1849  
1850 1850    memcpy(_bundle_use_elements,
1851 1851      Pipeline_Use::elaborated_elements,
1852 1852      sizeof(Pipeline_Use::elaborated_elements));
1853 1853  }
1854 1854  
1855 1855  //------------------------------ScheduleAndBundle------------------------------
1856 1856  // Perform instruction scheduling and bundling over the sequence of
1857 1857  // instructions in backwards order.
1858 1858  void Compile::ScheduleAndBundle() {
1859 1859  
1860 1860    // Don't optimize this if it isn't a method
1861 1861    if (!_method)
1862 1862      return;
1863 1863  
1864 1864    // Don't optimize this if scheduling is disabled
1865 1865    if (!do_scheduling())
1866 1866      return;
1867 1867  
1868 1868    // Scheduling code works only with pairs (8 bytes) maximum.
1869 1869    if (max_vector_size() > 8)
1870 1870      return;
1871 1871  
1872 1872    NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); )
1873 1873  
1874 1874    // Create a data structure for all the scheduling information
1875 1875    Scheduling scheduling(Thread::current()->resource_area(), *this);
1876 1876  
1877 1877    // Walk backwards over each basic block, computing the needed alignment
1878 1878    // Walk over all the basic blocks
1879 1879    scheduling.DoScheduling();
1880 1880  }
1881 1881  
1882 1882  //------------------------------ComputeLocalLatenciesForward-------------------
1883 1883  // Compute the latency of all the instructions.  This is fairly simple,
1884 1884  // because we already have a legal ordering.  Walk over the instructions
1885 1885  // from first to last, and compute the latency of the instruction based
1886 1886  // on the latency of the preceding instruction(s).
1887 1887  void Scheduling::ComputeLocalLatenciesForward(const Block *bb) {
1888 1888  #ifndef PRODUCT
1889 1889    if (_cfg->C->trace_opto_output())
1890 1890      tty->print("# -> ComputeLocalLatenciesForward\n");
1891 1891  #endif
1892 1892  
1893 1893    // Walk over all the schedulable instructions
1894 1894    for( uint j=_bb_start; j < _bb_end; j++ ) {
1895 1895  
1896 1896      // This is a kludge, forcing all latency calculations to start at 1.
1897 1897      // Used to allow latency 0 to force an instruction to the beginning
1898 1898      // of the bb
1899 1899      uint latency = 1;
1900 1900      Node *use = bb->_nodes[j];
1901 1901      uint nlen = use->len();
1902 1902  
1903 1903      // Walk over all the inputs
1904 1904      for ( uint k=0; k < nlen; k++ ) {
1905 1905        Node *def = use->in(k);
1906 1906        if (!def)
1907 1907          continue;
1908 1908  
1909 1909        uint l = _node_latency[def->_idx] + use->latency(k);
1910 1910        if (latency < l)
1911 1911          latency = l;
1912 1912      }
1913 1913  
1914 1914      _node_latency[use->_idx] = latency;
1915 1915  
1916 1916  #ifndef PRODUCT
1917 1917      if (_cfg->C->trace_opto_output()) {
1918 1918        tty->print("# latency %4d: ", latency);
1919 1919        use->dump();
1920 1920      }
1921 1921  #endif
1922 1922    }
1923 1923  
1924 1924  #ifndef PRODUCT
1925 1925    if (_cfg->C->trace_opto_output())
1926 1926      tty->print("# <- ComputeLocalLatenciesForward\n");
1927 1927  #endif
1928 1928  
1929 1929  } // end ComputeLocalLatenciesForward
1930 1930  
1931 1931  // See if this node fits into the present instruction bundle
1932 1932  bool Scheduling::NodeFitsInBundle(Node *n) {
1933 1933    uint n_idx = n->_idx;
1934 1934  
1935 1935    // If this is the unconditional delay instruction, then it fits
1936 1936    if (n == _unconditional_delay_slot) {
1937 1937  #ifndef PRODUCT
1938 1938      if (_cfg->C->trace_opto_output())
1939 1939        tty->print("#     NodeFitsInBundle [%4d]: TRUE; is in unconditional delay slot\n", n->_idx);
1940 1940  #endif
1941 1941      return (true);
1942 1942    }
1943 1943  
1944 1944    // If the node cannot be scheduled this cycle, skip it
1945 1945    if (_current_latency[n_idx] > _bundle_cycle_number) {
1946 1946  #ifndef PRODUCT
1947 1947      if (_cfg->C->trace_opto_output())
1948 1948        tty->print("#     NodeFitsInBundle [%4d]: FALSE; latency %4d > %d\n",
1949 1949          n->_idx, _current_latency[n_idx], _bundle_cycle_number);
1950 1950  #endif
1951 1951      return (false);
1952 1952    }
1953 1953  
1954 1954    const Pipeline *node_pipeline = n->pipeline();
1955 1955  
1956 1956    uint instruction_count = node_pipeline->instructionCount();
1957 1957    if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0)
1958 1958      instruction_count = 0;
1959 1959    else if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot)
1960 1960      instruction_count++;
1961 1961  
1962 1962    if (_bundle_instr_count + instruction_count > Pipeline::_max_instrs_per_cycle) {
1963 1963  #ifndef PRODUCT
1964 1964      if (_cfg->C->trace_opto_output())
1965 1965        tty->print("#     NodeFitsInBundle [%4d]: FALSE; too many instructions: %d > %d\n",
1966 1966          n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle);
1967 1967  #endif
1968 1968      return (false);
1969 1969    }
1970 1970  
1971 1971    // Don't allow non-machine nodes to be handled this way
1972 1972    if (!n->is_Mach() && instruction_count == 0)
1973 1973      return (false);
1974 1974  
1975 1975    // See if there is any overlap
1976 1976    uint delay = _bundle_use.full_latency(0, node_pipeline->resourceUse());
1977 1977  
1978 1978    if (delay > 0) {
1979 1979  #ifndef PRODUCT
1980 1980      if (_cfg->C->trace_opto_output())
1981 1981        tty->print("#     NodeFitsInBundle [%4d]: FALSE; functional units overlap\n", n_idx);
1982 1982  #endif
1983 1983      return false;
1984 1984    }
1985 1985  
1986 1986  #ifndef PRODUCT
1987 1987    if (_cfg->C->trace_opto_output())
1988 1988      tty->print("#     NodeFitsInBundle [%4d]:  TRUE\n", n_idx);
1989 1989  #endif
1990 1990  
1991 1991    return true;
1992 1992  }
1993 1993  
1994 1994  Node * Scheduling::ChooseNodeToBundle() {
1995 1995    uint siz = _available.size();
1996 1996  
1997 1997    if (siz == 0) {
1998 1998  
1999 1999  #ifndef PRODUCT
2000 2000      if (_cfg->C->trace_opto_output())
2001 2001        tty->print("#   ChooseNodeToBundle: NULL\n");
2002 2002  #endif
2003 2003      return (NULL);
2004 2004    }
2005 2005  
2006 2006    // Fast path, if only 1 instruction in the bundle
2007 2007    if (siz == 1) {
2008 2008  #ifndef PRODUCT
2009 2009      if (_cfg->C->trace_opto_output()) {
2010 2010        tty->print("#   ChooseNodeToBundle (only 1): ");
2011 2011        _available[0]->dump();
2012 2012      }
2013 2013  #endif
2014 2014      return (_available[0]);
2015 2015    }
2016 2016  
2017 2017    // Don't bother, if the bundle is already full
2018 2018    if (_bundle_instr_count < Pipeline::_max_instrs_per_cycle) {
2019 2019      for ( uint i = 0; i < siz; i++ ) {
2020 2020        Node *n = _available[i];
2021 2021  
2022 2022        // Skip projections, we'll handle them another way
2023 2023        if (n->is_Proj())
2024 2024          continue;
2025 2025  
2026 2026        // This presupposed that instructions are inserted into the
2027 2027        // available list in a legality order; i.e. instructions that
2028 2028        // must be inserted first are at the head of the list
2029 2029        if (NodeFitsInBundle(n)) {
2030 2030  #ifndef PRODUCT
2031 2031          if (_cfg->C->trace_opto_output()) {
2032 2032            tty->print("#   ChooseNodeToBundle: ");
2033 2033            n->dump();
2034 2034          }
2035 2035  #endif
2036 2036          return (n);
2037 2037        }
2038 2038      }
2039 2039    }
2040 2040  
2041 2041    // Nothing fits in this bundle, choose the highest priority
2042 2042  #ifndef PRODUCT
2043 2043    if (_cfg->C->trace_opto_output()) {
2044 2044      tty->print("#   ChooseNodeToBundle: ");
2045 2045      _available[0]->dump();
2046 2046    }
2047 2047  #endif
2048 2048  
2049 2049    return _available[0];
2050 2050  }
2051 2051  
2052 2052  //------------------------------AddNodeToAvailableList-------------------------
2053 2053  void Scheduling::AddNodeToAvailableList(Node *n) {
2054 2054    assert( !n->is_Proj(), "projections never directly made available" );
2055 2055  #ifndef PRODUCT
2056 2056    if (_cfg->C->trace_opto_output()) {
2057 2057      tty->print("#   AddNodeToAvailableList: ");
2058 2058      n->dump();
2059 2059    }
2060 2060  #endif
2061 2061  
2062 2062    int latency = _current_latency[n->_idx];
2063 2063  
2064 2064    // Insert in latency order (insertion sort)
2065 2065    uint i;
2066 2066    for ( i=0; i < _available.size(); i++ )
2067 2067      if (_current_latency[_available[i]->_idx] > latency)
2068 2068        break;
2069 2069  
2070 2070    // Special Check for compares following branches
2071 2071    if( n->is_Mach() && _scheduled.size() > 0 ) {
2072 2072      int op = n->as_Mach()->ideal_Opcode();
2073 2073      Node *last = _scheduled[0];
2074 2074      if( last->is_MachIf() && last->in(1) == n &&
2075 2075          ( op == Op_CmpI ||
2076 2076            op == Op_CmpU ||
2077 2077            op == Op_CmpP ||
2078 2078            op == Op_CmpF ||
2079 2079            op == Op_CmpD ||
2080 2080            op == Op_CmpL ) ) {
2081 2081  
2082 2082        // Recalculate position, moving to front of same latency
2083 2083        for ( i=0 ; i < _available.size(); i++ )
2084 2084          if (_current_latency[_available[i]->_idx] >= latency)
2085 2085            break;
2086 2086      }
2087 2087    }
2088 2088  
2089 2089    // Insert the node in the available list
2090 2090    _available.insert(i, n);
2091 2091  
2092 2092  #ifndef PRODUCT
2093 2093    if (_cfg->C->trace_opto_output())
2094 2094      dump_available();
2095 2095  #endif
2096 2096  }
2097 2097  
2098 2098  //------------------------------DecrementUseCounts-----------------------------
2099 2099  void Scheduling::DecrementUseCounts(Node *n, const Block *bb) {
2100 2100    for ( uint i=0; i < n->len(); i++ ) {
2101 2101      Node *def = n->in(i);
2102 2102      if (!def) continue;
2103 2103      if( def->is_Proj() )        // If this is a machine projection, then
2104 2104        def = def->in(0);         // propagate usage thru to the base instruction
2105 2105  
2106 2106      if( _bbs[def->_idx] != bb ) // Ignore if not block-local
2107 2107        continue;
2108 2108  
2109 2109      // Compute the latency
2110 2110      uint l = _bundle_cycle_number + n->latency(i);
2111 2111      if (_current_latency[def->_idx] < l)
2112 2112        _current_latency[def->_idx] = l;
2113 2113  
2114 2114      // If this does not have uses then schedule it
2115 2115      if ((--_uses[def->_idx]) == 0)
2116 2116        AddNodeToAvailableList(def);
2117 2117    }
2118 2118  }
2119 2119  
2120 2120  //------------------------------AddNodeToBundle--------------------------------
2121 2121  void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
2122 2122  #ifndef PRODUCT
2123 2123    if (_cfg->C->trace_opto_output()) {
2124 2124      tty->print("#   AddNodeToBundle: ");
2125 2125      n->dump();
2126 2126    }
2127 2127  #endif
2128 2128  
2129 2129    // Remove this from the available list
2130 2130    uint i;
2131 2131    for (i = 0; i < _available.size(); i++)
2132 2132      if (_available[i] == n)
2133 2133        break;
2134 2134    assert(i < _available.size(), "entry in _available list not found");
2135 2135    _available.remove(i);
2136 2136  
2137 2137    // See if this fits in the current bundle
2138 2138    const Pipeline *node_pipeline = n->pipeline();
2139 2139    const Pipeline_Use& node_usage = node_pipeline->resourceUse();
2140 2140  
2141 2141    // Check for instructions to be placed in the delay slot. We
2142 2142    // do this before we actually schedule the current instruction,
2143 2143    // because the delay slot follows the current instruction.
2144 2144    if (Pipeline::_branch_has_delay_slot &&
2145 2145        node_pipeline->hasBranchDelay() &&
2146 2146        !_unconditional_delay_slot) {
2147 2147  
2148 2148      uint siz = _available.size();
2149 2149  
2150 2150      // Conditional branches can support an instruction that
2151 2151      // is unconditionally executed and not dependent by the
2152 2152      // branch, OR a conditionally executed instruction if
2153 2153      // the branch is taken.  In practice, this means that
2154 2154      // the first instruction at the branch target is
2155 2155      // copied to the delay slot, and the branch goes to
2156 2156      // the instruction after that at the branch target
2157 2157      if ( n->is_MachBranch() ) {
2158 2158  
2159 2159        assert( !n->is_MachNullCheck(), "should not look for delay slot for Null Check" );
2160 2160        assert( !n->is_Catch(),         "should not look for delay slot for Catch" );
2161 2161  
2162 2162  #ifndef PRODUCT
2163 2163        _branches++;
2164 2164  #endif
2165 2165  
2166 2166        // At least 1 instruction is on the available list
2167 2167        // that is not dependent on the branch
2168 2168        for (uint i = 0; i < siz; i++) {
2169 2169          Node *d = _available[i];
2170 2170          const Pipeline *avail_pipeline = d->pipeline();
2171 2171  
2172 2172          // Don't allow safepoints in the branch shadow, that will
2173 2173          // cause a number of difficulties
2174 2174          if ( avail_pipeline->instructionCount() == 1 &&
2175 2175              !avail_pipeline->hasMultipleBundles() &&
2176 2176              !avail_pipeline->hasBranchDelay() &&
2177 2177              Pipeline::instr_has_unit_size() &&
2178 2178              d->size(_regalloc) == Pipeline::instr_unit_size() &&
2179 2179              NodeFitsInBundle(d) &&
2180 2180              !node_bundling(d)->used_in_delay()) {
2181 2181  
2182 2182            if (d->is_Mach() && !d->is_MachSafePoint()) {
2183 2183              // A node that fits in the delay slot was found, so we need to
2184 2184              // set the appropriate bits in the bundle pipeline information so
2185 2185              // that it correctly indicates resource usage.  Later, when we
2186 2186              // attempt to add this instruction to the bundle, we will skip
2187 2187              // setting the resource usage.
2188 2188              _unconditional_delay_slot = d;
2189 2189              node_bundling(n)->set_use_unconditional_delay();
2190 2190              node_bundling(d)->set_used_in_unconditional_delay();
2191 2191              _bundle_use.add_usage(avail_pipeline->resourceUse());
2192 2192              _current_latency[d->_idx] = _bundle_cycle_number;
2193 2193              _next_node = d;
2194 2194              ++_bundle_instr_count;
2195 2195  #ifndef PRODUCT
2196 2196              _unconditional_delays++;
2197 2197  #endif
2198 2198              break;
2199 2199            }
2200 2200          }
2201 2201        }
2202 2202      }
2203 2203  
2204 2204      // No delay slot, add a nop to the usage
2205 2205      if (!_unconditional_delay_slot) {
2206 2206        // See if adding an instruction in the delay slot will overflow
2207 2207        // the bundle.
2208 2208        if (!NodeFitsInBundle(_nop)) {
2209 2209  #ifndef PRODUCT
2210 2210          if (_cfg->C->trace_opto_output())
2211 2211            tty->print("#  *** STEP(1 instruction for delay slot) ***\n");
2212 2212  #endif
2213 2213          step(1);
2214 2214        }
2215 2215  
2216 2216        _bundle_use.add_usage(_nop->pipeline()->resourceUse());
2217 2217        _next_node = _nop;
2218 2218        ++_bundle_instr_count;
2219 2219      }
2220 2220  
2221 2221      // See if the instruction in the delay slot requires a
2222 2222      // step of the bundles
2223 2223      if (!NodeFitsInBundle(n)) {
2224 2224  #ifndef PRODUCT
2225 2225          if (_cfg->C->trace_opto_output())
2226 2226            tty->print("#  *** STEP(branch won't fit) ***\n");
2227 2227  #endif
2228 2228          // Update the state information
2229 2229          _bundle_instr_count = 0;
2230 2230          _bundle_cycle_number += 1;
2231 2231          _bundle_use.step(1);
2232 2232      }
2233 2233    }
2234 2234  
2235 2235    // Get the number of instructions
2236 2236    uint instruction_count = node_pipeline->instructionCount();
2237 2237    if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0)
2238 2238      instruction_count = 0;
2239 2239  
2240 2240    // Compute the latency information
2241 2241    uint delay = 0;
2242 2242  
2243 2243    if (instruction_count > 0 || !node_pipeline->mayHaveNoCode()) {
2244 2244      int relative_latency = _current_latency[n->_idx] - _bundle_cycle_number;
2245 2245      if (relative_latency < 0)
2246 2246        relative_latency = 0;
2247 2247  
2248 2248      delay = _bundle_use.full_latency(relative_latency, node_usage);
2249 2249  
2250 2250      // Does not fit in this bundle, start a new one
2251 2251      if (delay > 0) {
2252 2252        step(delay);
2253 2253  
2254 2254  #ifndef PRODUCT
2255 2255        if (_cfg->C->trace_opto_output())
2256 2256          tty->print("#  *** STEP(%d) ***\n", delay);
2257 2257  #endif
2258 2258      }
2259 2259    }
2260 2260  
2261 2261    // If this was placed in the delay slot, ignore it
2262 2262    if (n != _unconditional_delay_slot) {
2263 2263  
2264 2264      if (delay == 0) {
2265 2265        if (node_pipeline->hasMultipleBundles()) {
2266 2266  #ifndef PRODUCT
2267 2267          if (_cfg->C->trace_opto_output())
2268 2268            tty->print("#  *** STEP(multiple instructions) ***\n");
2269 2269  #endif
2270 2270          step(1);
2271 2271        }
2272 2272  
2273 2273        else if (instruction_count + _bundle_instr_count > Pipeline::_max_instrs_per_cycle) {
2274 2274  #ifndef PRODUCT
2275 2275          if (_cfg->C->trace_opto_output())
2276 2276            tty->print("#  *** STEP(%d >= %d instructions) ***\n",
2277 2277              instruction_count + _bundle_instr_count,
2278 2278              Pipeline::_max_instrs_per_cycle);
2279 2279  #endif
2280 2280          step(1);
2281 2281        }
2282 2282      }
2283 2283  
2284 2284      if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot)
2285 2285        _bundle_instr_count++;
2286 2286  
2287 2287      // Set the node's latency
2288 2288      _current_latency[n->_idx] = _bundle_cycle_number;
2289 2289  
2290 2290      // Now merge the functional unit information
2291 2291      if (instruction_count > 0 || !node_pipeline->mayHaveNoCode())
2292 2292        _bundle_use.add_usage(node_usage);
2293 2293  
2294 2294      // Increment the number of instructions in this bundle
2295 2295      _bundle_instr_count += instruction_count;
2296 2296  
2297 2297      // Remember this node for later
2298 2298      if (n->is_Mach())
2299 2299        _next_node = n;
2300 2300    }
2301 2301  
2302 2302    // It's possible to have a BoxLock in the graph and in the _bbs mapping but
2303 2303    // not in the bb->_nodes array.  This happens for debug-info-only BoxLocks.
2304 2304    // 'Schedule' them (basically ignore in the schedule) but do not insert them
2305 2305    // into the block.  All other scheduled nodes get put in the schedule here.
2306 2306    int op = n->Opcode();
2307 2307    if( (op == Op_Node && n->req() == 0) || // anti-dependence node OR
2308 2308        (op != Op_Node &&         // Not an unused antidepedence node and
2309 2309         // not an unallocated boxlock
2310 2310         (OptoReg::is_valid(_regalloc->get_reg_first(n)) || op != Op_BoxLock)) ) {
2311 2311  
2312 2312      // Push any trailing projections
2313 2313      if( bb->_nodes[bb->_nodes.size()-1] != n ) {
2314 2314        for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
2315 2315          Node *foi = n->fast_out(i);
2316 2316          if( foi->is_Proj() )
2317 2317            _scheduled.push(foi);
2318 2318        }
2319 2319      }
2320 2320  
2321 2321      // Put the instruction in the schedule list
2322 2322      _scheduled.push(n);
2323 2323    }
2324 2324  
2325 2325  #ifndef PRODUCT
2326 2326    if (_cfg->C->trace_opto_output())
2327 2327      dump_available();
2328 2328  #endif
2329 2329  
2330 2330    // Walk all the definitions, decrementing use counts, and
2331 2331    // if a definition has a 0 use count, place it in the available list.
2332 2332    DecrementUseCounts(n,bb);
2333 2333  }
2334 2334  
2335 2335  //------------------------------ComputeUseCount--------------------------------
2336 2336  // This method sets the use count within a basic block.  We will ignore all
2337 2337  // uses outside the current basic block.  As we are doing a backwards walk,
2338 2338  // any node we reach that has a use count of 0 may be scheduled.  This also
2339 2339  // avoids the problem of cyclic references from phi nodes, as long as phi
2340 2340  // nodes are at the front of the basic block.  This method also initializes
2341 2341  // the available list to the set of instructions that have no uses within this
2342 2342  // basic block.
2343 2343  void Scheduling::ComputeUseCount(const Block *bb) {
2344 2344  #ifndef PRODUCT
2345 2345    if (_cfg->C->trace_opto_output())
2346 2346      tty->print("# -> ComputeUseCount\n");
2347 2347  #endif
2348 2348  
2349 2349    // Clear the list of available and scheduled instructions, just in case
2350 2350    _available.clear();
2351 2351    _scheduled.clear();
2352 2352  
2353 2353    // No delay slot specified
2354 2354    _unconditional_delay_slot = NULL;
2355 2355  
2356 2356  #ifdef ASSERT
2357 2357    for( uint i=0; i < bb->_nodes.size(); i++ )
2358 2358      assert( _uses[bb->_nodes[i]->_idx] == 0, "_use array not clean" );
2359 2359  #endif
2360 2360  
2361 2361    // Force the _uses count to never go to zero for unscheduable pieces
2362 2362    // of the block
2363 2363    for( uint k = 0; k < _bb_start; k++ )
2364 2364      _uses[bb->_nodes[k]->_idx] = 1;
2365 2365    for( uint l = _bb_end; l < bb->_nodes.size(); l++ )
2366 2366      _uses[bb->_nodes[l]->_idx] = 1;
2367 2367  
2368 2368    // Iterate backwards over the instructions in the block.  Don't count the
2369 2369    // branch projections at end or the block header instructions.
2370 2370    for( uint j = _bb_end-1; j >= _bb_start; j-- ) {
2371 2371      Node *n = bb->_nodes[j];
2372 2372      if( n->is_Proj() ) continue; // Projections handled another way
2373 2373  
2374 2374      // Account for all uses
2375 2375      for ( uint k = 0; k < n->len(); k++ ) {
2376 2376        Node *inp = n->in(k);
2377 2377        if (!inp) continue;
2378 2378        assert(inp != n, "no cycles allowed" );
2379 2379        if( _bbs[inp->_idx] == bb ) { // Block-local use?
2380 2380          if( inp->is_Proj() )    // Skip through Proj's
2381 2381            inp = inp->in(0);
2382 2382          ++_uses[inp->_idx];     // Count 1 block-local use
2383 2383        }
2384 2384      }
2385 2385  
2386 2386      // If this instruction has a 0 use count, then it is available
2387 2387      if (!_uses[n->_idx]) {
2388 2388        _current_latency[n->_idx] = _bundle_cycle_number;
2389 2389        AddNodeToAvailableList(n);
2390 2390      }
2391 2391  
2392 2392  #ifndef PRODUCT
2393 2393      if (_cfg->C->trace_opto_output()) {
2394 2394        tty->print("#   uses: %3d: ", _uses[n->_idx]);
2395 2395        n->dump();
2396 2396      }
2397 2397  #endif
2398 2398    }
2399 2399  
2400 2400  #ifndef PRODUCT
2401 2401    if (_cfg->C->trace_opto_output())
2402 2402      tty->print("# <- ComputeUseCount\n");
2403 2403  #endif
2404 2404  }
2405 2405  
2406 2406  // This routine performs scheduling on each basic block in reverse order,
2407 2407  // using instruction latencies and taking into account function unit
2408 2408  // availability.
2409 2409  void Scheduling::DoScheduling() {
2410 2410  #ifndef PRODUCT
2411 2411    if (_cfg->C->trace_opto_output())
2412 2412      tty->print("# -> DoScheduling\n");
2413 2413  #endif
2414 2414  
2415 2415    Block *succ_bb = NULL;
2416 2416    Block *bb;
2417 2417  
2418 2418    // Walk over all the basic blocks in reverse order
2419 2419    for( int i=_cfg->_num_blocks-1; i >= 0; succ_bb = bb, i-- ) {
2420 2420      bb = _cfg->_blocks[i];
2421 2421  
2422 2422  #ifndef PRODUCT
2423 2423      if (_cfg->C->trace_opto_output()) {
2424 2424        tty->print("#  Schedule BB#%03d (initial)\n", i);
2425 2425        for (uint j = 0; j < bb->_nodes.size(); j++)
2426 2426          bb->_nodes[j]->dump();
2427 2427      }
2428 2428  #endif
2429 2429  
2430 2430      // On the head node, skip processing
2431 2431      if( bb == _cfg->_broot )
2432 2432        continue;
2433 2433  
2434 2434      // Skip empty, connector blocks
2435 2435      if (bb->is_connector())
2436 2436        continue;
2437 2437  
2438 2438      // If the following block is not the sole successor of
2439 2439      // this one, then reset the pipeline information
2440 2440      if (bb->_num_succs != 1 || bb->non_connector_successor(0) != succ_bb) {
2441 2441  #ifndef PRODUCT
2442 2442        if (_cfg->C->trace_opto_output()) {
2443 2443          tty->print("*** bundle start of next BB, node %d, for %d instructions\n",
2444 2444                     _next_node->_idx, _bundle_instr_count);
2445 2445        }
2446 2446  #endif
2447 2447        step_and_clear();
2448 2448      }
2449 2449  
2450 2450      // Leave untouched the starting instruction, any Phis, a CreateEx node
2451 2451      // or Top.  bb->_nodes[_bb_start] is the first schedulable instruction.
2452 2452      _bb_end = bb->_nodes.size()-1;
2453 2453      for( _bb_start=1; _bb_start <= _bb_end; _bb_start++ ) {
2454 2454        Node *n = bb->_nodes[_bb_start];
2455 2455        // Things not matched, like Phinodes and ProjNodes don't get scheduled.
2456 2456        // Also, MachIdealNodes do not get scheduled
2457 2457        if( !n->is_Mach() ) continue;     // Skip non-machine nodes
2458 2458        MachNode *mach = n->as_Mach();
2459 2459        int iop = mach->ideal_Opcode();
2460 2460        if( iop == Op_CreateEx ) continue; // CreateEx is pinned
2461 2461        if( iop == Op_Con ) continue;      // Do not schedule Top
2462 2462        if( iop == Op_Node &&     // Do not schedule PhiNodes, ProjNodes
2463 2463            mach->pipeline() == MachNode::pipeline_class() &&
2464 2464            !n->is_SpillCopy() )  // Breakpoints, Prolog, etc
2465 2465          continue;
2466 2466        break;                    // Funny loop structure to be sure...
2467 2467      }
2468 2468      // Compute last "interesting" instruction in block - last instruction we
2469 2469      // might schedule.  _bb_end points just after last schedulable inst.  We
2470 2470      // normally schedule conditional branches (despite them being forced last
2471 2471      // in the block), because they have delay slots we can fill.  Calls all
2472 2472      // have their delay slots filled in the template expansions, so we don't
2473 2473      // bother scheduling them.
2474 2474      Node *last = bb->_nodes[_bb_end];
2475 2475      // Ignore trailing NOPs.
2476 2476      while (_bb_end > 0 && last->is_Mach() &&
2477 2477             last->as_Mach()->ideal_Opcode() == Op_Con) {
2478 2478        last = bb->_nodes[--_bb_end];
2479 2479      }
2480 2480      assert(!last->is_Mach() || last->as_Mach()->ideal_Opcode() != Op_Con, "");
2481 2481      if( last->is_Catch() ||
2482 2482         // Exclude unreachable path case when Halt node is in a separate block.
2483 2483         (_bb_end > 1 && last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) {
2484 2484        // There must be a prior call.  Skip it.
2485 2485        while( !bb->_nodes[--_bb_end]->is_MachCall() ) {
2486 2486          assert( bb->_nodes[_bb_end]->is_MachProj(), "skipping projections after expected call" );
2487 2487        }
2488 2488      } else if( last->is_MachNullCheck() ) {
2489 2489        // Backup so the last null-checked memory instruction is
2490 2490        // outside the schedulable range. Skip over the nullcheck,
2491 2491        // projection, and the memory nodes.
2492 2492        Node *mem = last->in(1);
2493 2493        do {
2494 2494          _bb_end--;
2495 2495        } while (mem != bb->_nodes[_bb_end]);
2496 2496      } else {
2497 2497        // Set _bb_end to point after last schedulable inst.
2498 2498        _bb_end++;
2499 2499      }
2500 2500  
2501 2501      assert( _bb_start <= _bb_end, "inverted block ends" );
2502 2502  
2503 2503      // Compute the register antidependencies for the basic block
2504 2504      ComputeRegisterAntidependencies(bb);
2505 2505      if (_cfg->C->failing())  return;  // too many D-U pinch points
2506 2506  
2507 2507      // Compute intra-bb latencies for the nodes
2508 2508      ComputeLocalLatenciesForward(bb);
2509 2509  
2510 2510      // Compute the usage within the block, and set the list of all nodes
2511 2511      // in the block that have no uses within the block.
2512 2512      ComputeUseCount(bb);
2513 2513  
2514 2514      // Schedule the remaining instructions in the block
2515 2515      while ( _available.size() > 0 ) {
2516 2516        Node *n = ChooseNodeToBundle();
2517 2517        AddNodeToBundle(n,bb);
2518 2518      }
2519 2519  
2520 2520      assert( _scheduled.size() == _bb_end - _bb_start, "wrong number of instructions" );
2521 2521  #ifdef ASSERT
2522 2522      for( uint l = _bb_start; l < _bb_end; l++ ) {
2523 2523        Node *n = bb->_nodes[l];
2524 2524        uint m;
2525 2525        for( m = 0; m < _bb_end-_bb_start; m++ )
2526 2526          if( _scheduled[m] == n )
2527 2527            break;
2528 2528        assert( m < _bb_end-_bb_start, "instruction missing in schedule" );
2529 2529      }
2530 2530  #endif
2531 2531  
2532 2532      // Now copy the instructions (in reverse order) back to the block
2533 2533      for ( uint k = _bb_start; k < _bb_end; k++ )
2534 2534        bb->_nodes.map(k, _scheduled[_bb_end-k-1]);
2535 2535  
2536 2536  #ifndef PRODUCT
2537 2537      if (_cfg->C->trace_opto_output()) {
2538 2538        tty->print("#  Schedule BB#%03d (final)\n", i);
2539 2539        uint current = 0;
2540 2540        for (uint j = 0; j < bb->_nodes.size(); j++) {
2541 2541          Node *n = bb->_nodes[j];
2542 2542          if( valid_bundle_info(n) ) {
2543 2543            Bundle *bundle = node_bundling(n);
2544 2544            if (bundle->instr_count() > 0 || bundle->flags() > 0) {
2545 2545              tty->print("*** Bundle: ");
2546 2546              bundle->dump();
2547 2547            }
2548 2548            n->dump();
2549 2549          }
2550 2550        }
2551 2551      }
2552 2552  #endif
2553 2553  #ifdef ASSERT
2554 2554    verify_good_schedule(bb,"after block local scheduling");
2555 2555  #endif
2556 2556    }
2557 2557  
2558 2558  #ifndef PRODUCT
2559 2559    if (_cfg->C->trace_opto_output())
2560 2560      tty->print("# <- DoScheduling\n");
2561 2561  #endif
2562 2562  
2563 2563    // Record final node-bundling array location
2564 2564    _regalloc->C->set_node_bundling_base(_node_bundling_base);
2565 2565  
2566 2566  } // end DoScheduling
2567 2567  
2568 2568  //------------------------------verify_good_schedule---------------------------
2569 2569  // Verify that no live-range used in the block is killed in the block by a
2570 2570  // wrong DEF.  This doesn't verify live-ranges that span blocks.
2571 2571  
2572 2572  // Check for edge existence.  Used to avoid adding redundant precedence edges.
2573 2573  static bool edge_from_to( Node *from, Node *to ) {
2574 2574    for( uint i=0; i<from->len(); i++ )
2575 2575      if( from->in(i) == to )
2576 2576        return true;
2577 2577    return false;
2578 2578  }
2579 2579  
2580 2580  #ifdef ASSERT
2581 2581  //------------------------------verify_do_def----------------------------------
2582 2582  void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) {
2583 2583    // Check for bad kills
2584 2584    if( OptoReg::is_valid(def) ) { // Ignore stores & control flow
2585 2585      Node *prior_use = _reg_node[def];
2586 2586      if( prior_use && !edge_from_to(prior_use,n) ) {
2587 2587        tty->print("%s = ",OptoReg::as_VMReg(def)->name());
2588 2588        n->dump();
2589 2589        tty->print_cr("...");
2590 2590        prior_use->dump();
2591 2591        assert(edge_from_to(prior_use,n),msg);
2592 2592      }
2593 2593      _reg_node.map(def,NULL); // Kill live USEs
2594 2594    }
2595 2595  }
2596 2596  
2597 2597  //------------------------------verify_good_schedule---------------------------
2598 2598  void Scheduling::verify_good_schedule( Block *b, const char *msg ) {
2599 2599  
2600 2600    // Zap to something reasonable for the verify code
2601 2601    _reg_node.clear();
2602 2602  
2603 2603    // Walk over the block backwards.  Check to make sure each DEF doesn't
2604 2604    // kill a live value (other than the one it's supposed to).  Add each
2605 2605    // USE to the live set.
2606 2606    for( uint i = b->_nodes.size()-1; i >= _bb_start; i-- ) {
2607 2607      Node *n = b->_nodes[i];
2608 2608      int n_op = n->Opcode();
2609 2609      if( n_op == Op_MachProj && n->ideal_reg() == MachProjNode::fat_proj ) {
2610 2610        // Fat-proj kills a slew of registers
2611 2611        RegMask rm = n->out_RegMask();// Make local copy
2612 2612        while( rm.is_NotEmpty() ) {
2613 2613          OptoReg::Name kill = rm.find_first_elem();
2614 2614          rm.Remove(kill);
2615 2615          verify_do_def( n, kill, msg );
2616 2616        }
2617 2617      } else if( n_op != Op_Node ) { // Avoid brand new antidependence nodes
2618 2618        // Get DEF'd registers the normal way
2619 2619        verify_do_def( n, _regalloc->get_reg_first(n), msg );
2620 2620        verify_do_def( n, _regalloc->get_reg_second(n), msg );
2621 2621      }
2622 2622  
2623 2623      // Now make all USEs live
2624 2624      for( uint i=1; i<n->req(); i++ ) {
2625 2625        Node *def = n->in(i);
2626 2626        assert(def != 0, "input edge required");
2627 2627        OptoReg::Name reg_lo = _regalloc->get_reg_first(def);
2628 2628        OptoReg::Name reg_hi = _regalloc->get_reg_second(def);
2629 2629        if( OptoReg::is_valid(reg_lo) ) {
2630 2630          assert(!_reg_node[reg_lo] || edge_from_to(_reg_node[reg_lo],def), msg);
2631 2631          _reg_node.map(reg_lo,n);
2632 2632        }
2633 2633        if( OptoReg::is_valid(reg_hi) ) {
2634 2634          assert(!_reg_node[reg_hi] || edge_from_to(_reg_node[reg_hi],def), msg);
2635 2635          _reg_node.map(reg_hi,n);
2636 2636        }
2637 2637      }
2638 2638  
2639 2639    }
2640 2640  
2641 2641    // Zap to something reasonable for the Antidependence code
2642 2642    _reg_node.clear();
2643 2643  }
2644 2644  #endif
2645 2645  
2646 2646  // Conditionally add precedence edges.  Avoid putting edges on Projs.
2647 2647  static void add_prec_edge_from_to( Node *from, Node *to ) {
2648 2648    if( from->is_Proj() ) {       // Put precedence edge on Proj's input
2649 2649      assert( from->req() == 1 && (from->len() == 1 || from->in(1)==0), "no precedence edges on projections" );
2650 2650      from = from->in(0);
2651 2651    }
2652 2652    if( from != to &&             // No cycles (for things like LD L0,[L0+4] )
2653 2653        !edge_from_to( from, to ) ) // Avoid duplicate edge
2654 2654      from->add_prec(to);
2655 2655  }
2656 2656  
2657 2657  //------------------------------anti_do_def------------------------------------
2658 2658  void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) {
2659 2659    if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow
2660 2660      return;
2661 2661  
2662 2662    Node *pinch = _reg_node[def_reg]; // Get pinch point
2663 2663    if( !pinch || _bbs[pinch->_idx] != b || // No pinch-point yet?
2664 2664        is_def ) {    // Check for a true def (not a kill)
2665 2665      _reg_node.map(def_reg,def); // Record def/kill as the optimistic pinch-point
2666 2666      return;
2667 2667    }
2668 2668  
2669 2669    Node *kill = def;             // Rename 'def' to more descriptive 'kill'
2670 2670    debug_only( def = (Node*)0xdeadbeef; )
2671 2671  
2672 2672    // After some number of kills there _may_ be a later def
2673 2673    Node *later_def = NULL;
2674 2674  
2675 2675    // Finding a kill requires a real pinch-point.
2676 2676    // Check for not already having a pinch-point.
2677 2677    // Pinch points are Op_Node's.
2678 2678    if( pinch->Opcode() != Op_Node ) { // Or later-def/kill as pinch-point?
2679 2679      later_def = pinch;            // Must be def/kill as optimistic pinch-point
2680 2680      if ( _pinch_free_list.size() > 0) {
2681 2681        pinch = _pinch_free_list.pop();
2682 2682      } else {
2683 2683        pinch = new (_cfg->C) Node(1); // Pinch point to-be
2684 2684      }
2685 2685      if (pinch->_idx >= _regalloc->node_regs_max_index()) {
2686 2686        _cfg->C->record_method_not_compilable("too many D-U pinch points");
2687 2687        return;
2688 2688      }
2689 2689      _bbs.map(pinch->_idx,b);      // Pretend it's valid in this block (lazy init)
2690 2690      _reg_node.map(def_reg,pinch); // Record pinch-point
2691 2691      //_regalloc->set_bad(pinch->_idx); // Already initialized this way.
2692 2692      if( later_def->outcnt() == 0 || later_def->ideal_reg() == MachProjNode::fat_proj ) { // Distinguish def from kill
2693 2693        pinch->init_req(0, _cfg->C->top());     // set not NULL for the next call
2694 2694        add_prec_edge_from_to(later_def,pinch); // Add edge from kill to pinch
2695 2695        later_def = NULL;           // and no later def
2696 2696      }
2697 2697      pinch->set_req(0,later_def);  // Hook later def so we can find it
2698 2698    } else {                        // Else have valid pinch point
2699 2699      if( pinch->in(0) )            // If there is a later-def
2700 2700        later_def = pinch->in(0);   // Get it
2701 2701    }
2702 2702  
2703 2703    // Add output-dependence edge from later def to kill
2704 2704    if( later_def )               // If there is some original def
2705 2705      add_prec_edge_from_to(later_def,kill); // Add edge from def to kill
2706 2706  
2707 2707    // See if current kill is also a use, and so is forced to be the pinch-point.
2708 2708    if( pinch->Opcode() == Op_Node ) {
2709 2709      Node *uses = kill->is_Proj() ? kill->in(0) : kill;
2710 2710      for( uint i=1; i<uses->req(); i++ ) {
2711 2711        if( _regalloc->get_reg_first(uses->in(i)) == def_reg ||
2712 2712            _regalloc->get_reg_second(uses->in(i)) == def_reg ) {
2713 2713          // Yes, found a use/kill pinch-point
2714 2714          pinch->set_req(0,NULL);  //
2715 2715          pinch->replace_by(kill); // Move anti-dep edges up
2716 2716          pinch = kill;
2717 2717          _reg_node.map(def_reg,pinch);
2718 2718          return;
2719 2719        }
2720 2720      }
2721 2721    }
2722 2722  
2723 2723    // Add edge from kill to pinch-point
2724 2724    add_prec_edge_from_to(kill,pinch);
2725 2725  }
2726 2726  
2727 2727  //------------------------------anti_do_use------------------------------------
2728 2728  void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) {
2729 2729    if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow
2730 2730      return;
2731 2731    Node *pinch = _reg_node[use_reg]; // Get pinch point
2732 2732    // Check for no later def_reg/kill in block
2733 2733    if( pinch && _bbs[pinch->_idx] == b &&
2734 2734        // Use has to be block-local as well
2735 2735        _bbs[use->_idx] == b ) {
2736 2736      if( pinch->Opcode() == Op_Node && // Real pinch-point (not optimistic?)
2737 2737          pinch->req() == 1 ) {   // pinch not yet in block?
2738 2738        pinch->del_req(0);        // yank pointer to later-def, also set flag
2739 2739        // Insert the pinch-point in the block just after the last use
2740 2740        b->_nodes.insert(b->find_node(use)+1,pinch);
2741 2741        _bb_end++;                // Increase size scheduled region in block
2742 2742      }
2743 2743  
2744 2744      add_prec_edge_from_to(pinch,use);
2745 2745    }
2746 2746  }
2747 2747  
2748 2748  //------------------------------ComputeRegisterAntidependences-----------------
2749 2749  // We insert antidependences between the reads and following write of
2750 2750  // allocated registers to prevent illegal code motion. Hopefully, the
2751 2751  // number of added references should be fairly small, especially as we
2752 2752  // are only adding references within the current basic block.
2753 2753  void Scheduling::ComputeRegisterAntidependencies(Block *b) {
2754 2754  
2755 2755  #ifdef ASSERT
2756 2756    verify_good_schedule(b,"before block local scheduling");
2757 2757  #endif
2758 2758  
2759 2759    // A valid schedule, for each register independently, is an endless cycle
2760 2760    // of: a def, then some uses (connected to the def by true dependencies),
2761 2761    // then some kills (defs with no uses), finally the cycle repeats with a new
2762 2762    // def.  The uses are allowed to float relative to each other, as are the
2763 2763    // kills.  No use is allowed to slide past a kill (or def).  This requires
2764 2764    // antidependencies between all uses of a single def and all kills that
2765 2765    // follow, up to the next def.  More edges are redundant, because later defs
2766 2766    // & kills are already serialized with true or antidependencies.  To keep
2767 2767    // the edge count down, we add a 'pinch point' node if there's more than
2768 2768    // one use or more than one kill/def.
2769 2769  
2770 2770    // We add dependencies in one bottom-up pass.
2771 2771  
2772 2772    // For each instruction we handle it's DEFs/KILLs, then it's USEs.
2773 2773  
2774 2774    // For each DEF/KILL, we check to see if there's a prior DEF/KILL for this
2775 2775    // register.  If not, we record the DEF/KILL in _reg_node, the
2776 2776    // register-to-def mapping.  If there is a prior DEF/KILL, we insert a
2777 2777    // "pinch point", a new Node that's in the graph but not in the block.
2778 2778    // We put edges from the prior and current DEF/KILLs to the pinch point.
2779 2779    // We put the pinch point in _reg_node.  If there's already a pinch point
2780 2780    // we merely add an edge from the current DEF/KILL to the pinch point.
2781 2781  
2782 2782    // After doing the DEF/KILLs, we handle USEs.  For each used register, we
2783 2783    // put an edge from the pinch point to the USE.
2784 2784  
2785 2785    // To be expedient, the _reg_node array is pre-allocated for the whole
2786 2786    // compilation.  _reg_node is lazily initialized; it either contains a NULL,
2787 2787    // or a valid def/kill/pinch-point, or a leftover node from some prior
2788 2788    // block.  Leftover node from some prior block is treated like a NULL (no
2789 2789    // prior def, so no anti-dependence needed).  Valid def is distinguished by
2790 2790    // it being in the current block.
2791 2791    bool fat_proj_seen = false;
2792 2792    uint last_safept = _bb_end-1;
2793 2793    Node* end_node         = (_bb_end-1 >= _bb_start) ? b->_nodes[last_safept] : NULL;
2794 2794    Node* last_safept_node = end_node;
2795 2795    for( uint i = _bb_end-1; i >= _bb_start; i-- ) {
2796 2796      Node *n = b->_nodes[i];
2797 2797      int is_def = n->outcnt();   // def if some uses prior to adding precedence edges
2798 2798      if( n->is_MachProj() && n->ideal_reg() == MachProjNode::fat_proj ) {
2799 2799        // Fat-proj kills a slew of registers
2800 2800        // This can add edges to 'n' and obscure whether or not it was a def,
2801 2801        // hence the is_def flag.
2802 2802        fat_proj_seen = true;
2803 2803        RegMask rm = n->out_RegMask();// Make local copy
2804 2804        while( rm.is_NotEmpty() ) {
2805 2805          OptoReg::Name kill = rm.find_first_elem();
2806 2806          rm.Remove(kill);
2807 2807          anti_do_def( b, n, kill, is_def );
2808 2808        }
2809 2809      } else {
2810 2810        // Get DEF'd registers the normal way
2811 2811        anti_do_def( b, n, _regalloc->get_reg_first(n), is_def );
2812 2812        anti_do_def( b, n, _regalloc->get_reg_second(n), is_def );
2813 2813      }
2814 2814  
2815 2815      // Kill projections on a branch should appear to occur on the
2816 2816      // branch, not afterwards, so grab the masks from the projections
2817 2817      // and process them.
2818 2818      if (n->is_MachBranch() || n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_Jump) {
2819 2819        for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
2820 2820          Node* use = n->fast_out(i);
2821 2821          if (use->is_Proj()) {
2822 2822            RegMask rm = use->out_RegMask();// Make local copy
2823 2823            while( rm.is_NotEmpty() ) {
2824 2824              OptoReg::Name kill = rm.find_first_elem();
2825 2825              rm.Remove(kill);
2826 2826              anti_do_def( b, n, kill, false );
2827 2827            }
2828 2828          }
2829 2829        }
2830 2830      }
2831 2831  
2832 2832      // Check each register used by this instruction for a following DEF/KILL
2833 2833      // that must occur afterward and requires an anti-dependence edge.
2834 2834      for( uint j=0; j<n->req(); j++ ) {
2835 2835        Node *def = n->in(j);
2836 2836        if( def ) {
2837 2837          assert( !def->is_MachProj() || def->ideal_reg() != MachProjNode::fat_proj, "" );
2838 2838          anti_do_use( b, n, _regalloc->get_reg_first(def) );
2839 2839          anti_do_use( b, n, _regalloc->get_reg_second(def) );
2840 2840        }
2841 2841      }
2842 2842      // Do not allow defs of new derived values to float above GC
2843 2843      // points unless the base is definitely available at the GC point.
2844 2844  
2845 2845      Node *m = b->_nodes[i];
2846 2846  
2847 2847      // Add precedence edge from following safepoint to use of derived pointer
2848 2848      if( last_safept_node != end_node &&
2849 2849          m != last_safept_node) {
2850 2850        for (uint k = 1; k < m->req(); k++) {
2851 2851          const Type *t = m->in(k)->bottom_type();
2852 2852          if( t->isa_oop_ptr() &&
2853 2853              t->is_ptr()->offset() != 0 ) {
2854 2854            last_safept_node->add_prec( m );
2855 2855            break;
2856 2856          }
2857 2857        }
2858 2858      }
2859 2859  
2860 2860      if( n->jvms() ) {           // Precedence edge from derived to safept
2861 2861        // Check if last_safept_node was moved by pinch-point insertion in anti_do_use()
2862 2862        if( b->_nodes[last_safept] != last_safept_node ) {
2863 2863          last_safept = b->find_node(last_safept_node);
2864 2864        }
2865 2865        for( uint j=last_safept; j > i; j-- ) {
2866 2866          Node *mach = b->_nodes[j];
2867 2867          if( mach->is_Mach() && mach->as_Mach()->ideal_Opcode() == Op_AddP )
2868 2868            mach->add_prec( n );
2869 2869        }
2870 2870        last_safept = i;
2871 2871        last_safept_node = m;
2872 2872      }
2873 2873    }
2874 2874  
2875 2875    if (fat_proj_seen) {
2876 2876      // Garbage collect pinch nodes that were not consumed.
2877 2877      // They are usually created by a fat kill MachProj for a call.
2878 2878      garbage_collect_pinch_nodes();
2879 2879    }
2880 2880  }
2881 2881  
2882 2882  //------------------------------garbage_collect_pinch_nodes-------------------------------
2883 2883  
2884 2884  // Garbage collect pinch nodes for reuse by other blocks.
2885 2885  //
2886 2886  // The block scheduler's insertion of anti-dependence
2887 2887  // edges creates many pinch nodes when the block contains
2888 2888  // 2 or more Calls.  A pinch node is used to prevent a
2889 2889  // combinatorial explosion of edges.  If a set of kills for a
2890 2890  // register is anti-dependent on a set of uses (or defs), rather
2891 2891  // than adding an edge in the graph between each pair of kill
2892 2892  // and use (or def), a pinch is inserted between them:
2893 2893  //
2894 2894  //            use1   use2  use3
2895 2895  //                \   |   /
2896 2896  //                 \  |  /
2897 2897  //                  pinch
2898 2898  //                 /  |  \
2899 2899  //                /   |   \
2900 2900  //            kill1 kill2 kill3
2901 2901  //
2902 2902  // One pinch node is created per register killed when
2903 2903  // the second call is encountered during a backwards pass
2904 2904  // over the block.  Most of these pinch nodes are never
2905 2905  // wired into the graph because the register is never
2906 2906  // used or def'ed in the block.
2907 2907  //
2908 2908  void Scheduling::garbage_collect_pinch_nodes() {
2909 2909  #ifndef PRODUCT
2910 2910      if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:");
2911 2911  #endif
2912 2912      int trace_cnt = 0;
2913 2913      for (uint k = 0; k < _reg_node.Size(); k++) {
2914 2914        Node* pinch = _reg_node[k];
2915 2915        if (pinch != NULL && pinch->Opcode() == Op_Node &&
2916 2916            // no predecence input edges
2917 2917            (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) {
2918 2918          cleanup_pinch(pinch);
2919 2919          _pinch_free_list.push(pinch);
2920 2920          _reg_node.map(k, NULL);
2921 2921  #ifndef PRODUCT
2922 2922          if (_cfg->C->trace_opto_output()) {
2923 2923            trace_cnt++;
2924 2924            if (trace_cnt > 40) {
2925 2925              tty->print("\n");
2926 2926              trace_cnt = 0;
2927 2927            }
2928 2928            tty->print(" %d", pinch->_idx);
2929 2929          }
2930 2930  #endif
2931 2931        }
2932 2932      }
2933 2933  #ifndef PRODUCT
2934 2934      if (_cfg->C->trace_opto_output()) tty->print("\n");
2935 2935  #endif
2936 2936  }
2937 2937  
2938 2938  // Clean up a pinch node for reuse.
2939 2939  void Scheduling::cleanup_pinch( Node *pinch ) {
2940 2940    assert (pinch && pinch->Opcode() == Op_Node && pinch->req() == 1, "just checking");
2941 2941  
2942 2942    for (DUIterator_Last imin, i = pinch->last_outs(imin); i >= imin; ) {
2943 2943      Node* use = pinch->last_out(i);
2944 2944      uint uses_found = 0;
2945 2945      for (uint j = use->req(); j < use->len(); j++) {
2946 2946        if (use->in(j) == pinch) {
2947 2947          use->rm_prec(j);
2948 2948          uses_found++;
2949 2949        }
2950 2950      }
2951 2951      assert(uses_found > 0, "must be a precedence edge");
2952 2952      i -= uses_found;    // we deleted 1 or more copies of this edge
2953 2953    }
2954 2954    // May have a later_def entry
2955 2955    pinch->set_req(0, NULL);
2956 2956  }
2957 2957  
2958 2958  //------------------------------print_statistics-------------------------------
2959 2959  #ifndef PRODUCT
2960 2960  
2961 2961  void Scheduling::dump_available() const {
2962 2962    tty->print("#Availist  ");
2963 2963    for (uint i = 0; i < _available.size(); i++)
2964 2964      tty->print(" N%d/l%d", _available[i]->_idx,_current_latency[_available[i]->_idx]);
2965 2965    tty->cr();
2966 2966  }
2967 2967  
2968 2968  // Print Scheduling Statistics
2969 2969  void Scheduling::print_statistics() {
2970 2970    // Print the size added by nops for bundling
2971 2971    tty->print("Nops added %d bytes to total of %d bytes",
2972 2972      _total_nop_size, _total_method_size);
2973 2973    if (_total_method_size > 0)
2974 2974      tty->print(", for %.2f%%",
2975 2975        ((double)_total_nop_size) / ((double) _total_method_size) * 100.0);
2976 2976    tty->print("\n");
2977 2977  
2978 2978    // Print the number of branch shadows filled
2979 2979    if (Pipeline::_branch_has_delay_slot) {
2980 2980      tty->print("Of %d branches, %d had unconditional delay slots filled",
2981 2981        _total_branches, _total_unconditional_delays);
2982 2982      if (_total_branches > 0)
2983 2983        tty->print(", for %.2f%%",
2984 2984          ((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0);
2985 2985      tty->print("\n");
2986 2986    }
2987 2987  
2988 2988    uint total_instructions = 0, total_bundles = 0;
2989 2989  
2990 2990    for (uint i = 1; i <= Pipeline::_max_instrs_per_cycle; i++) {
2991 2991      uint bundle_count   = _total_instructions_per_bundle[i];
2992 2992      total_instructions += bundle_count * i;
2993 2993      total_bundles      += bundle_count;
2994 2994    }
2995 2995  
2996 2996    if (total_bundles > 0)
2997 2997      tty->print("Average ILP (excluding nops) is %.2f\n",
2998 2998        ((double)total_instructions) / ((double)total_bundles));
2999 2999  }
3000 3000  #endif

↓ open down ↓

2056 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX