6961690 Wdiff src/share/vm/opto/compile.cpp

Print this page

rev 1838 : 6961690: load oops from constant table on SPARC
Summary: oops should be loaded from the constant table of an nmethod instead of materializing them with a long code sequence.
Reviewed-by:

Split	Close
Expand all
Collapse all

          --- old/src/share/vm/opto/compile.cpp
          +++ new/src/share/vm/opto/compile.cpp

   1    1  /*
   2    2   * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
   3    3   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4    4   *
   5    5   * This code is free software; you can redistribute it and/or modify it
   6    6   * under the terms of the GNU General Public License version 2 only, as
   7    7   * published by the Free Software Foundation.
   8    8   *
   9    9   * This code is distributed in the hope that it will be useful, but WITHOUT
  10   10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11   11   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12   12   * version 2 for more details (a copy is included in the LICENSE file that
  13   13   * accompanied this code).
  14   14   *
  15   15   * You should have received a copy of the GNU General Public License version
  16   16   * 2 along with this work; if not, write to the Free Software Foundation,
  17   17   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

↓ open down ↓

17 lines elided

↑ open up ↑

  18   18   *
  19   19   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20   20   * or visit www.oracle.com if you need additional information or have any
  21   21   * questions.
  22   22   *
  23   23   */
  24   24  
  25   25  #include "incls/_precompiled.incl"
  26   26  #include "incls/_compile.cpp.incl"
  27   27  
       28 +
       29 +// -------------------- Compile::mach_constant_base_node -----------------------
       30 +// Constant table base node singleton.
       31 +MachConstantBaseNode* Compile::mach_constant_base_node() {
       32 +  if (_mach_constant_base_node == NULL) {
       33 +    _mach_constant_base_node = new (C) MachConstantBaseNode();
       34 +    _mach_constant_base_node->set_req(0, C->root());
       35 +  }
       36 +  return _mach_constant_base_node;
       37 +}
       38 +
       39 +
  28   40  /// Support for intrinsics.
  29   41  
  30   42  // Return the index at which m must be inserted (or already exists).
  31   43  // The sort order is by the address of the ciMethod, with is_virtual as minor key.
  32   44  int Compile::intrinsic_insertion_index(ciMethod* m, bool is_virtual) {
  33   45  #ifdef ASSERT
  34   46    for (int i = 1; i < _intrinsics->length(); i++) {
  35   47      CallGenerator* cg1 = _intrinsics->at(i-1);
  36   48      CallGenerator* cg2 = _intrinsics->at(i);
  37   49      assert(cg1->method() != cg2->method()

  38   50             ? cg1->method()     < cg2->method()
  39   51             : cg1->is_virtual() < cg2->is_virtual(),
  40   52             "compiler intrinsics list must stay sorted");
  41   53    }
  42   54  #endif
  43   55    // Binary search sorted list, in decreasing intervals [lo, hi].
  44   56    int lo = 0, hi = _intrinsics->length()-1;
  45   57    while (lo <= hi) {
  46   58      int mid = (uint)(hi + lo) / 2;
  47   59      ciMethod* mid_m = _intrinsics->at(mid)->method();
  48   60      if (m < mid_m) {
  49   61        hi = mid-1;
  50   62      } else if (m > mid_m) {
  51   63        lo = mid+1;
  52   64      } else {
  53   65        // look at minor sort key
  54   66        bool mid_virt = _intrinsics->at(mid)->is_virtual();
  55   67        if (is_virtual < mid_virt) {
  56   68          hi = mid-1;
  57   69        } else if (is_virtual > mid_virt) {
  58   70          lo = mid+1;
  59   71        } else {
  60   72          return mid;  // exact match
  61   73        }
  62   74      }
  63   75    }
  64   76    return lo;  // inexact match
  65   77  }
  66   78  
  67   79  void Compile::register_intrinsic(CallGenerator* cg) {
  68   80    if (_intrinsics == NULL) {
  69   81      _intrinsics = new GrowableArray<CallGenerator*>(60);
  70   82    }
  71   83    // This code is stolen from ciObjectFactory::insert.
  72   84    // Really, GrowableArray should have methods for
  73   85    // insert_at, remove_at, and binary_search.
  74   86    int len = _intrinsics->length();
  75   87    int index = intrinsic_insertion_index(cg->method(), cg->is_virtual());
  76   88    if (index == len) {
  77   89      _intrinsics->append(cg);
  78   90    } else {
  79   91  #ifdef ASSERT
  80   92      CallGenerator* oldcg = _intrinsics->at(index);
  81   93      assert(oldcg->method() != cg->method() || oldcg->is_virtual() != cg->is_virtual(), "don't register twice");
  82   94  #endif
  83   95      _intrinsics->append(_intrinsics->at(len-1));
  84   96      int pos;
  85   97      for (pos = len-2; pos >= index; pos--) {
  86   98        _intrinsics->at_put(pos+1,_intrinsics->at(pos));
  87   99      }
  88  100      _intrinsics->at_put(index, cg);
  89  101    }
  90  102    assert(find_intrinsic(cg->method(), cg->is_virtual()) == cg, "registration worked");
  91  103  }
  92  104  
  93  105  CallGenerator* Compile::find_intrinsic(ciMethod* m, bool is_virtual) {
  94  106    assert(m->is_loaded(), "don't try this on unloaded methods");
  95  107    if (_intrinsics != NULL) {
  96  108      int index = intrinsic_insertion_index(m, is_virtual);
  97  109      if (index < _intrinsics->length()
  98  110          && _intrinsics->at(index)->method() == m
  99  111          && _intrinsics->at(index)->is_virtual() == is_virtual) {
 100  112        return _intrinsics->at(index);
 101  113      }
 102  114    }
 103  115    // Lazily create intrinsics for intrinsic IDs well-known in the runtime.
 104  116    if (m->intrinsic_id() != vmIntrinsics::_none &&
 105  117        m->intrinsic_id() <= vmIntrinsics::LAST_COMPILER_INLINE) {
 106  118      CallGenerator* cg = make_vm_intrinsic(m, is_virtual);
 107  119      if (cg != NULL) {
 108  120        // Save it for next time:
 109  121        register_intrinsic(cg);
 110  122        return cg;
 111  123      } else {
 112  124        gather_intrinsic_statistics(m->intrinsic_id(), is_virtual, _intrinsic_disabled);
 113  125      }
 114  126    }
 115  127    return NULL;
 116  128  }
 117  129  
 118  130  // Compile:: register_library_intrinsics and make_vm_intrinsic are defined
 119  131  // in library_call.cpp.
 120  132  
 121  133  
 122  134  #ifndef PRODUCT
 123  135  // statistics gathering...
 124  136  
 125  137  juint  Compile::_intrinsic_hist_count[vmIntrinsics::ID_LIMIT] = {0};
 126  138  jubyte Compile::_intrinsic_hist_flags[vmIntrinsics::ID_LIMIT] = {0};
 127  139  
 128  140  bool Compile::gather_intrinsic_statistics(vmIntrinsics::ID id, bool is_virtual, int flags) {
 129  141    assert(id > vmIntrinsics::_none && id < vmIntrinsics::ID_LIMIT, "oob");
 130  142    int oflags = _intrinsic_hist_flags[id];
 131  143    assert(flags != 0, "what happened?");
 132  144    if (is_virtual) {
 133  145      flags |= _intrinsic_virtual;
 134  146    }
 135  147    bool changed = (flags != oflags);
 136  148    if ((flags & _intrinsic_worked) != 0) {
 137  149      juint count = (_intrinsic_hist_count[id] += 1);
 138  150      if (count == 1) {
 139  151        changed = true;           // first time
 140  152      }
 141  153      // increment the overall count also:
 142  154      _intrinsic_hist_count[vmIntrinsics::_none] += 1;
 143  155    }
 144  156    if (changed) {
 145  157      if (((oflags ^ flags) & _intrinsic_virtual) != 0) {
 146  158        // Something changed about the intrinsic's virtuality.
 147  159        if ((flags & _intrinsic_virtual) != 0) {
 148  160          // This is the first use of this intrinsic as a virtual call.
 149  161          if (oflags != 0) {
 150  162            // We already saw it as a non-virtual, so note both cases.
 151  163            flags |= _intrinsic_both;
 152  164          }
 153  165        } else if ((oflags & _intrinsic_both) == 0) {
 154  166          // This is the first use of this intrinsic as a non-virtual
 155  167          flags |= _intrinsic_both;
 156  168        }
 157  169      }
 158  170      _intrinsic_hist_flags[id] = (jubyte) (oflags | flags);
 159  171    }
 160  172    // update the overall flags also:
 161  173    _intrinsic_hist_flags[vmIntrinsics::_none] |= (jubyte) flags;
 162  174    return changed;
 163  175  }
 164  176  
 165  177  static char* format_flags(int flags, char* buf) {
 166  178    buf[0] = 0;
 167  179    if ((flags & Compile::_intrinsic_worked) != 0)    strcat(buf, ",worked");
 168  180    if ((flags & Compile::_intrinsic_failed) != 0)    strcat(buf, ",failed");
 169  181    if ((flags & Compile::_intrinsic_disabled) != 0)  strcat(buf, ",disabled");
 170  182    if ((flags & Compile::_intrinsic_virtual) != 0)   strcat(buf, ",virtual");
 171  183    if ((flags & Compile::_intrinsic_both) != 0)      strcat(buf, ",nonvirtual");
 172  184    if (buf[0] == 0)  strcat(buf, ",");
 173  185    assert(buf[0] == ',', "must be");
 174  186    return &buf[1];
 175  187  }
 176  188  
 177  189  void Compile::print_intrinsic_statistics() {
 178  190    char flagsbuf[100];
 179  191    ttyLocker ttyl;
 180  192    if (xtty != NULL)  xtty->head("statistics type='intrinsic'");
 181  193    tty->print_cr("Compiler intrinsic usage:");
 182  194    juint total = _intrinsic_hist_count[vmIntrinsics::_none];
 183  195    if (total == 0)  total = 1;  // avoid div0 in case of no successes
 184  196    #define PRINT_STAT_LINE(name, c, f) \
 185  197      tty->print_cr("  %4d (%4.1f%%) %s (%s)", (int)(c), ((c) * 100.0) / total, name, f);
 186  198    for (int index = 1 + (int)vmIntrinsics::_none; index < (int)vmIntrinsics::ID_LIMIT; index++) {
 187  199      vmIntrinsics::ID id = (vmIntrinsics::ID) index;
 188  200      int   flags = _intrinsic_hist_flags[id];
 189  201      juint count = _intrinsic_hist_count[id];
 190  202      if ((flags | count) != 0) {
 191  203        PRINT_STAT_LINE(vmIntrinsics::name_at(id), count, format_flags(flags, flagsbuf));
 192  204      }
 193  205    }
 194  206    PRINT_STAT_LINE("total", total, format_flags(_intrinsic_hist_flags[vmIntrinsics::_none], flagsbuf));
 195  207    if (xtty != NULL)  xtty->tail("statistics");
 196  208  }
 197  209  
 198  210  void Compile::print_statistics() {
 199  211    { ttyLocker ttyl;
 200  212      if (xtty != NULL)  xtty->head("statistics type='opto'");
 201  213      Parse::print_statistics();
 202  214      PhaseCCP::print_statistics();
 203  215      PhaseRegAlloc::print_statistics();
 204  216      Scheduling::print_statistics();
 205  217      PhasePeephole::print_statistics();
 206  218      PhaseIdealLoop::print_statistics();
 207  219      if (xtty != NULL)  xtty->tail("statistics");
 208  220    }
 209  221    if (_intrinsic_hist_flags[vmIntrinsics::_none] != 0) {
 210  222      // put this under its own <statistics> element.
 211  223      print_intrinsic_statistics();
 212  224    }
 213  225  }
 214  226  #endif //PRODUCT
 215  227  
 216  228  // Support for bundling info
 217  229  Bundle* Compile::node_bundling(const Node *n) {
 218  230    assert(valid_bundle_info(n), "oob");
 219  231    return &_node_bundling_base[n->_idx];
 220  232  }
 221  233  
 222  234  bool Compile::valid_bundle_info(const Node *n) {
 223  235    return (_node_bundling_limit > n->_idx);
 224  236  }
 225  237  
 226  238  
 227  239  void Compile::gvn_replace_by(Node* n, Node* nn) {
 228  240    for (DUIterator_Last imin, i = n->last_outs(imin); i >= imin; ) {
 229  241      Node* use = n->last_out(i);
 230  242      bool is_in_table = initial_gvn()->hash_delete(use);
 231  243      uint uses_found = 0;
 232  244      for (uint j = 0; j < use->len(); j++) {
 233  245        if (use->in(j) == n) {
 234  246          if (j < use->req())
 235  247            use->set_req(j, nn);
 236  248          else
 237  249            use->set_prec(j, nn);
 238  250          uses_found++;
 239  251        }
 240  252      }
 241  253      if (is_in_table) {
 242  254        // reinsert into table
 243  255        initial_gvn()->hash_find_insert(use);
 244  256      }
 245  257      record_for_igvn(use);
 246  258      i -= uses_found;    // we deleted 1 or more copies of this edge
 247  259    }
 248  260  }
 249  261  
 250  262  
 251  263  
 252  264  
 253  265  // Identify all nodes that are reachable from below, useful.
 254  266  // Use breadth-first pass that records state in a Unique_Node_List,
 255  267  // recursive traversal is slower.
 256  268  void Compile::identify_useful_nodes(Unique_Node_List &useful) {
 257  269    int estimated_worklist_size = unique();
 258  270    useful.map( estimated_worklist_size, NULL );  // preallocate space
 259  271  
 260  272    // Initialize worklist
 261  273    if (root() != NULL)     { useful.push(root()); }
 262  274    // If 'top' is cached, declare it useful to preserve cached node
 263  275    if( cached_top_node() ) { useful.push(cached_top_node()); }
 264  276  
 265  277    // Push all useful nodes onto the list, breadthfirst
 266  278    for( uint next = 0; next < useful.size(); ++next ) {
 267  279      assert( next < unique(), "Unique useful nodes < total nodes");
 268  280      Node *n  = useful.at(next);
 269  281      uint max = n->len();
 270  282      for( uint i = 0; i < max; ++i ) {
 271  283        Node *m = n->in(i);
 272  284        if( m == NULL ) continue;
 273  285        useful.push(m);
 274  286      }
 275  287    }
 276  288  }
 277  289  
 278  290  // Disconnect all useless nodes by disconnecting those at the boundary.
 279  291  void Compile::remove_useless_nodes(Unique_Node_List &useful) {
 280  292    uint next = 0;
 281  293    while( next < useful.size() ) {
 282  294      Node *n = useful.at(next++);
 283  295      // Use raw traversal of out edges since this code removes out edges
 284  296      int max = n->outcnt();
 285  297      for (int j = 0; j < max; ++j ) {
 286  298        Node* child = n->raw_out(j);
 287  299        if( ! useful.member(child) ) {
 288  300          assert( !child->is_top() || child != top(),
 289  301                  "If top is cached in Compile object it is in useful list");
 290  302          // Only need to remove this out-edge to the useless node
 291  303          n->raw_del_out(j);
 292  304          --j;
 293  305          --max;
 294  306        }
 295  307      }
 296  308      if (n->outcnt() == 1 && n->has_special_unique_user()) {
 297  309        record_for_igvn( n->unique_out() );
 298  310      }
 299  311    }
 300  312    debug_only(verify_graph_edges(true/*check for no_dead_code*/);)
 301  313  }
 302  314  
 303  315  //------------------------------frame_size_in_words-----------------------------
 304  316  // frame_slots in units of words
 305  317  int Compile::frame_size_in_words() const {
 306  318    // shift is 0 in LP32 and 1 in LP64
 307  319    const int shift = (LogBytesPerWord - LogBytesPerInt);
 308  320    int words = _frame_slots >> shift;
 309  321    assert( words << shift == _frame_slots, "frame size must be properly aligned in LP64" );
 310  322    return words;
 311  323  }
 312  324  
 313  325  // ============================================================================
 314  326  //------------------------------CompileWrapper---------------------------------
 315  327  class CompileWrapper : public StackObj {
 316  328    Compile *const _compile;
 317  329   public:
 318  330    CompileWrapper(Compile* compile);
 319  331  
 320  332    ~CompileWrapper();
 321  333  };
 322  334  
 323  335  CompileWrapper::CompileWrapper(Compile* compile) : _compile(compile) {
 324  336    // the Compile* pointer is stored in the current ciEnv:
 325  337    ciEnv* env = compile->env();
 326  338    assert(env == ciEnv::current(), "must already be a ciEnv active");
 327  339    assert(env->compiler_data() == NULL, "compile already active?");
 328  340    env->set_compiler_data(compile);
 329  341    assert(compile == Compile::current(), "sanity");
 330  342  
 331  343    compile->set_type_dict(NULL);
 332  344    compile->set_type_hwm(NULL);
 333  345    compile->set_type_last_size(0);
 334  346    compile->set_last_tf(NULL, NULL);
 335  347    compile->set_indexSet_arena(NULL);
 336  348    compile->set_indexSet_free_block_list(NULL);
 337  349    compile->init_type_arena();
 338  350    Type::Initialize(compile);
 339  351    _compile->set_scratch_buffer_blob(NULL);
 340  352    _compile->begin_method();
 341  353  }
 342  354  CompileWrapper::~CompileWrapper() {
 343  355    _compile->end_method();
 344  356    if (_compile->scratch_buffer_blob() != NULL)
 345  357      BufferBlob::free(_compile->scratch_buffer_blob());
 346  358    _compile->env()->set_compiler_data(NULL);
 347  359  }
 348  360  
 349  361  
 350  362  //----------------------------print_compile_messages---------------------------
 351  363  void Compile::print_compile_messages() {
 352  364  #ifndef PRODUCT
 353  365    // Check if recompiling
 354  366    if (_subsume_loads == false && PrintOpto) {
 355  367      // Recompiling without allowing machine instructions to subsume loads
 356  368      tty->print_cr("*********************************************************");
 357  369      tty->print_cr("** Bailout: Recompile without subsuming loads          **");
 358  370      tty->print_cr("*********************************************************");
 359  371    }
 360  372    if (_do_escape_analysis != DoEscapeAnalysis && PrintOpto) {
 361  373      // Recompiling without escape analysis
 362  374      tty->print_cr("*********************************************************");
 363  375      tty->print_cr("** Bailout: Recompile without escape analysis          **");
 364  376      tty->print_cr("*********************************************************");
 365  377    }
 366  378    if (env()->break_at_compile()) {
 367  379      // Open the debugger when compiling this method.
 368  380      tty->print("### Breaking when compiling: ");
 369  381      method()->print_short_name();
 370  382      tty->cr();
 371  383      BREAKPOINT;
 372  384    }
 373  385  
 374  386    if( PrintOpto ) {

↓ open down ↓

337 lines elided

↑ open up ↑

 375  387      if (is_osr_compilation()) {
 376  388        tty->print("[OSR]%3d", _compile_id);
 377  389      } else {
 378  390        tty->print("%3d", _compile_id);
 379  391      }
 380  392    }
 381  393  #endif
 382  394  }
 383  395  
 384  396  
 385      -void Compile::init_scratch_buffer_blob() {
 386      -  if( scratch_buffer_blob() != NULL )  return;
      397 +void Compile::init_scratch_buffer_blob(int const_size) {
      398 +  if (scratch_buffer_blob() != NULL)  return;
 387  399  
 388  400    // Construct a temporary CodeBuffer to have it construct a BufferBlob
 389  401    // Cache this BufferBlob for this compile.
 390  402    ResourceMark rm;
 391      -  int size = (MAX_inst_size + MAX_stubs_size + MAX_const_size);
      403 +  _scratch_const_size = const_size;
      404 +  int size = (MAX_inst_size + MAX_stubs_size + _scratch_const_size);
 392  405    BufferBlob* blob = BufferBlob::create("Compile::scratch_buffer", size);
 393  406    // Record the buffer blob for next time.
 394  407    set_scratch_buffer_blob(blob);
 395  408    // Have we run out of code space?
 396  409    if (scratch_buffer_blob() == NULL) {
 397  410      // Let CompilerBroker disable further compilations.
 398  411      record_failure("Not enough space for scratch buffer in CodeCache");
 399  412      return;
 400  413    }
 401  414  
 402  415    // Initialize the relocation buffers
 403  416    relocInfo* locs_buf = (relocInfo*) blob->content_end() - MAX_locs_size;
 404  417    set_scratch_locs_memory(locs_buf);
 405  418  }
 406  419  
 407  420  
      421 +void Compile::clear_scratch_buffer_blob() {
      422 +  assert(scratch_buffer_blob(), "no BufferBlob set");
      423 +  set_scratch_buffer_blob(NULL);
      424 +  set_scratch_locs_memory(NULL);
      425 +}
      426 +
      427 +
 408  428  //-----------------------scratch_emit_size-------------------------------------
 409  429  // Helper function that computes size by emitting code
 410  430  uint Compile::scratch_emit_size(const Node* n) {
      431 +  // Start scratch_emit_size section.
      432 +  set_in_scratch_emit_size(true);
      433 +
 411  434    // Emit into a trash buffer and count bytes emitted.
 412  435    // This is a pretty expensive way to compute a size,
 413  436    // but it works well enough if seldom used.
 414  437    // All common fixed-size instructions are given a size
 415  438    // method by the AD file.
 416  439    // Note that the scratch buffer blob and locs memory are
 417  440    // allocated at the beginning of the compile task, and
 418  441    // may be shared by several calls to scratch_emit_size.
 419  442    // The allocation of the scratch buffer blob is particularly
 420  443    // expensive, since it has to grab the code cache lock.
 421  444    BufferBlob* blob = this->scratch_buffer_blob();
 422  445    assert(blob != NULL, "Initialize BufferBlob at start");
 423  446    assert(blob->size() > MAX_inst_size, "sanity");
 424  447    relocInfo* locs_buf = scratch_locs_memory();
 425  448    address blob_begin = blob->content_begin();
 426  449    address blob_end   = (address)locs_buf;
 427  450    assert(blob->content_contains(blob_end), "sanity");
 428  451    CodeBuffer buf(blob_begin, blob_end - blob_begin);
 429      -  buf.initialize_consts_size(MAX_const_size);
      452 +  buf.initialize_consts_size(_scratch_const_size);
 430  453    buf.initialize_stubs_size(MAX_stubs_size);
 431  454    assert(locs_buf != NULL, "sanity");
 432      -  int lsize = MAX_locs_size / 2;
 433      -  buf.insts()->initialize_shared_locs(&locs_buf[0],     lsize);
 434      -  buf.stubs()->initialize_shared_locs(&locs_buf[lsize], lsize);
      455 +  int lsize = MAX_locs_size / 3;
      456 +  buf.consts()->initialize_shared_locs(&locs_buf[lsize * 0], lsize);
      457 +  buf.insts()->initialize_shared_locs( &locs_buf[lsize * 1], lsize);
      458 +  buf.stubs()->initialize_shared_locs( &locs_buf[lsize * 2], lsize);
      459 +
      460 +  // Do the emission.
 435  461    n->emit(buf, this->regalloc());
      462 +
      463 +  // End scratch_emit_size section.
      464 +  set_in_scratch_emit_size(false);
      465 +
 436  466    return buf.insts_size();
 437  467  }
 438  468  
 439  469  
 440  470  // ============================================================================
 441  471  //------------------------------Compile standard-------------------------------
 442  472  debug_only( int Compile::_debug_idx = 100000; )
 443  473  
 444  474  // Compile a method.  entry_bci is -1 for normal compilations and indicates
 445  475  // the continuation bci for on stack replacement.

 446  476  
 447  477  
 448  478  Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci, bool subsume_loads, bool do_escape_analysis )
 449  479                  : Phase(Compiler),
 450  480                    _env(ci_env),
 451  481                    _log(ci_env->log()),
 452  482                    _compile_id(ci_env->compile_id()),
 453  483                    _save_argument_registers(false),
 454  484                    _stub_name(NULL),
 455  485                    _stub_function(NULL),
 456  486                    _stub_entry_point(NULL),
 457  487                    _method(target),
 458  488                    _entry_bci(osr_bci),

↓ open down ↓

13 lines elided

↑ open up ↑

 459  489                    _initial_gvn(NULL),
 460  490                    _for_igvn(NULL),
 461  491                    _warm_calls(NULL),
 462  492                    _subsume_loads(subsume_loads),
 463  493                    _do_escape_analysis(do_escape_analysis),
 464  494                    _failure_reason(NULL),
 465  495                    _code_buffer("Compile::Fill_buffer"),
 466  496                    _orig_pc_slot(0),
 467  497                    _orig_pc_slot_offset_in_bytes(0),
 468  498                    _has_method_handle_invokes(false),
      499 +                  _mach_constant_base_node(NULL),
 469  500                    _node_bundling_limit(0),
 470  501                    _node_bundling_base(NULL),
 471  502                    _java_calls(0),
 472  503                    _inner_loops(0),
      504 +                  _scratch_const_size(-1),
      505 +                  _in_scratch_emit_size(false),
 473  506  #ifndef PRODUCT
 474  507                    _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
 475  508                    _printer(IdealGraphPrinter::printer()),
 476  509  #endif
 477  510                    _congraph(NULL) {
 478  511    C = this;
 479  512  
 480  513    CompileWrapper cw(this);
 481  514  #ifndef PRODUCT
 482  515    if (TimeCompiler2) {

 483  516      tty->print(" ");
 484  517      target->holder()->name()->print();
 485  518      tty->print(".");
 486  519      target->print_short_name();
 487  520      tty->print("  ");
 488  521    }
 489  522    TraceTime t1("Total compilation time", &_t_totalCompilation, TimeCompiler, TimeCompiler2);
 490  523    TraceTime t2(NULL, &_t_methodCompilation, TimeCompiler, false);
 491  524    bool print_opto_assembly = PrintOptoAssembly || _method->has_option("PrintOptoAssembly");
 492  525    if (!print_opto_assembly) {
 493  526      bool print_assembly = (PrintAssembly || _method->should_print_assembly());
 494  527      if (print_assembly && !Disassembler::can_decode()) {
 495  528        tty->print_cr("PrintAssembly request changed to PrintOptoAssembly");
 496  529        print_opto_assembly = true;
 497  530      }
 498  531    }
 499  532    set_print_assembly(print_opto_assembly);
 500  533    set_parsed_irreducible_loop(false);
 501  534  #endif
 502  535  
 503  536    if (ProfileTraps) {
 504  537      // Make sure the method being compiled gets its own MDO,
 505  538      // so we can at least track the decompile_count().
 506  539      method()->build_method_data();
 507  540    }
 508  541  
 509  542    Init(::AliasLevel);
 510  543  
 511  544  
 512  545    print_compile_messages();
 513  546  
 514  547    if (UseOldInlining || PrintCompilation NOT_PRODUCT( || PrintOpto) )
 515  548      _ilt = InlineTree::build_inline_tree_root();
 516  549    else
 517  550      _ilt = NULL;
 518  551  
 519  552    // Even if NO memory addresses are used, MergeMem nodes must have at least 1 slice
 520  553    assert(num_alias_types() >= AliasIdxRaw, "");
 521  554  
 522  555  #define MINIMUM_NODE_HASH  1023
 523  556    // Node list that Iterative GVN will start with
 524  557    Unique_Node_List for_igvn(comp_arena());
 525  558    set_for_igvn(&for_igvn);
 526  559  
 527  560    // GVN that will be run immediately on new nodes
 528  561    uint estimated_size = method()->code_size()*4+64;
 529  562    estimated_size = (estimated_size < MINIMUM_NODE_HASH ? MINIMUM_NODE_HASH : estimated_size);
 530  563    PhaseGVN gvn(node_arena(), estimated_size);
 531  564    set_initial_gvn(&gvn);
 532  565  
 533  566    { // Scope for timing the parser
 534  567      TracePhase t3("parse", &_t_parser, true);
 535  568  
 536  569      // Put top into the hash table ASAP.
 537  570      initial_gvn()->transform_no_reclaim(top());
 538  571  
 539  572      // Set up tf(), start(), and find a CallGenerator.
 540  573      CallGenerator* cg;
 541  574      if (is_osr_compilation()) {
 542  575        const TypeTuple *domain = StartOSRNode::osr_domain();
 543  576        const TypeTuple *range = TypeTuple::make_range(method()->signature());
 544  577        init_tf(TypeFunc::make(domain, range));
 545  578        StartNode* s = new (this, 2) StartOSRNode(root(), domain);
 546  579        initial_gvn()->set_type_bottom(s);
 547  580        init_start(s);
 548  581        cg = CallGenerator::for_osr(method(), entry_bci());
 549  582      } else {
 550  583        // Normal case.
 551  584        init_tf(TypeFunc::make(method()));
 552  585        StartNode* s = new (this, 2) StartNode(root(), tf()->domain());
 553  586        initial_gvn()->set_type_bottom(s);
 554  587        init_start(s);
 555  588        float past_uses = method()->interpreter_invocation_count();
 556  589        float expected_uses = past_uses;
 557  590        cg = CallGenerator::for_inline(method(), expected_uses);
 558  591      }
 559  592      if (failing())  return;
 560  593      if (cg == NULL) {
 561  594        record_method_not_compilable_all_tiers("cannot parse method");
 562  595        return;
 563  596      }
 564  597      JVMState* jvms = build_start_state(start(), tf());
 565  598      if ((jvms = cg->generate(jvms)) == NULL) {
 566  599        record_method_not_compilable("method parse failed");
 567  600        return;
 568  601      }
 569  602      GraphKit kit(jvms);
 570  603  
 571  604      if (!kit.stopped()) {
 572  605        // Accept return values, and transfer control we know not where.
 573  606        // This is done by a special, unique ReturnNode bound to root.
 574  607        return_values(kit.jvms());
 575  608      }
 576  609  
 577  610      if (kit.has_exceptions()) {
 578  611        // Any exceptions that escape from this call must be rethrown
 579  612        // to whatever caller is dynamically above us on the stack.
 580  613        // This is done by a special, unique RethrowNode bound to root.
 581  614        rethrow_exceptions(kit.transfer_exceptions_into_jvms());
 582  615      }
 583  616  
 584  617      if (!failing() && has_stringbuilder()) {
 585  618        {
 586  619          // remove useless nodes to make the usage analysis simpler
 587  620          ResourceMark rm;
 588  621          PhaseRemoveUseless pru(initial_gvn(), &for_igvn);
 589  622        }
 590  623  
 591  624        {
 592  625          ResourceMark rm;
 593  626          print_method("Before StringOpts", 3);
 594  627          PhaseStringOpts pso(initial_gvn(), &for_igvn);
 595  628          print_method("After StringOpts", 3);
 596  629        }
 597  630  
 598  631        // now inline anything that we skipped the first time around
 599  632        while (_late_inlines.length() > 0) {
 600  633          CallGenerator* cg = _late_inlines.pop();
 601  634          cg->do_late_inline();
 602  635        }
 603  636      }
 604  637      assert(_late_inlines.length() == 0, "should have been processed");
 605  638  
 606  639      print_method("Before RemoveUseless", 3);
 607  640  
 608  641      // Remove clutter produced by parsing.
 609  642      if (!failing()) {
 610  643        ResourceMark rm;
 611  644        PhaseRemoveUseless pru(initial_gvn(), &for_igvn);
 612  645      }
 613  646    }
 614  647  
 615  648    // Note:  Large methods are capped off in do_one_bytecode().
 616  649    if (failing())  return;
 617  650  
 618  651    // After parsing, node notes are no longer automagic.
 619  652    // They must be propagated by register_new_node_with_optimizer(),
 620  653    // clone(), or the like.
 621  654    set_default_node_notes(NULL);
 622  655  
 623  656    for (;;) {
 624  657      int successes = Inline_Warm();
 625  658      if (failing())  return;
 626  659      if (successes == 0)  break;
 627  660    }
 628  661  
 629  662    // Drain the list.
 630  663    Finish_Warm();
 631  664  #ifndef PRODUCT
 632  665    if (_printer) {
 633  666      _printer->print_inlining(this);
 634  667    }
 635  668  #endif
 636  669  
 637  670    if (failing())  return;
 638  671    NOT_PRODUCT( verify_graph_edges(); )
 639  672  
 640  673    // Now optimize
 641  674    Optimize();
 642  675    if (failing())  return;
 643  676    NOT_PRODUCT( verify_graph_edges(); )
 644  677  
 645  678  #ifndef PRODUCT
 646  679    if (PrintIdeal) {
 647  680      ttyLocker ttyl;  // keep the following output all in one block
 648  681      // This output goes directly to the tty, not the compiler log.
 649  682      // To enable tools to match it up with the compilation activity,
 650  683      // be sure to tag this tty output with the compile ID.
 651  684      if (xtty != NULL) {
 652  685        xtty->head("ideal compile_id='%d'%s", compile_id(),
 653  686                   is_osr_compilation()    ? " compile_kind='osr'" :
 654  687                   "");
 655  688      }
 656  689      root()->dump(9999);
 657  690      if (xtty != NULL) {
 658  691        xtty->tail("ideal");
 659  692      }
 660  693    }
 661  694  #endif
 662  695  
 663  696    // Now that we know the size of all the monitors we can add a fixed slot
 664  697    // for the original deopt pc.
 665  698  
 666  699    _orig_pc_slot =  fixed_slots();
 667  700    int next_slot = _orig_pc_slot + (sizeof(address) / VMRegImpl::stack_slot_size);
 668  701    set_fixed_slots(next_slot);
 669  702  
 670  703    // Now generate code
 671  704    Code_Gen();
 672  705    if (failing())  return;
 673  706  
 674  707    // Check if we want to skip execution of all compiled code.
 675  708    {
 676  709  #ifndef PRODUCT
 677  710      if (OptoNoExecute) {
 678  711        record_method_not_compilable("+OptoNoExecute");  // Flag as failed
 679  712        return;
 680  713      }
 681  714      TracePhase t2("install_code", &_t_registerMethod, TimeCompiler);
 682  715  #endif
 683  716  
 684  717      if (is_osr_compilation()) {
 685  718        _code_offsets.set_value(CodeOffsets::Verified_Entry, 0);
 686  719        _code_offsets.set_value(CodeOffsets::OSR_Entry, _first_block_size);
 687  720      } else {
 688  721        _code_offsets.set_value(CodeOffsets::Verified_Entry, _first_block_size);
 689  722        _code_offsets.set_value(CodeOffsets::OSR_Entry, 0);
 690  723      }
 691  724  
 692  725      env()->register_method(_method, _entry_bci,
 693  726                             &_code_offsets,
 694  727                             _orig_pc_slot_offset_in_bytes,
 695  728                             code_buffer(),
 696  729                             frame_size_in_words(), _oop_map_set,
 697  730                             &_handler_table, &_inc_table,
 698  731                             compiler,
 699  732                             env()->comp_level(),
 700  733                             true, /*has_debug_info*/
 701  734                             has_unsafe_access()
 702  735                             );
 703  736    }
 704  737  }
 705  738  
 706  739  //------------------------------Compile----------------------------------------
 707  740  // Compile a runtime stub
 708  741  Compile::Compile( ciEnv* ci_env,
 709  742                    TypeFunc_generator generator,
 710  743                    address stub_function,
 711  744                    const char *stub_name,
 712  745                    int is_fancy_jump,
 713  746                    bool pass_tls,
 714  747                    bool save_arg_registers,
 715  748                    bool return_pc )
 716  749    : Phase(Compiler),
 717  750      _env(ci_env),
 718  751      _log(ci_env->log()),
 719  752      _compile_id(-1),
 720  753      _save_argument_registers(save_arg_registers),
 721  754      _method(NULL),
 722  755      _stub_name(stub_name),
 723  756      _stub_function(stub_function),
 724  757      _stub_entry_point(NULL),
 725  758      _entry_bci(InvocationEntryBci),

↓ open down ↓

243 lines elided

↑ open up ↑

 726  759      _initial_gvn(NULL),
 727  760      _for_igvn(NULL),
 728  761      _warm_calls(NULL),
 729  762      _orig_pc_slot(0),
 730  763      _orig_pc_slot_offset_in_bytes(0),
 731  764      _subsume_loads(true),
 732  765      _do_escape_analysis(false),
 733  766      _failure_reason(NULL),
 734  767      _code_buffer("Compile::Fill_buffer"),
 735  768      _has_method_handle_invokes(false),
      769 +    _mach_constant_base_node(NULL),
 736  770      _node_bundling_limit(0),
 737  771      _node_bundling_base(NULL),
 738  772      _java_calls(0),
 739  773      _inner_loops(0),
 740  774  #ifndef PRODUCT
 741  775      _trace_opto_output(TraceOptoOutput),
 742  776      _printer(NULL),
 743  777  #endif
 744  778      _congraph(NULL) {
 745  779    C = this;

 746  780  
 747  781  #ifndef PRODUCT
 748  782    TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);
 749  783    TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);
 750  784    set_print_assembly(PrintFrameConverterAssembly);
 751  785    set_parsed_irreducible_loop(false);
 752  786  #endif
 753  787    CompileWrapper cw(this);
 754  788    Init(/*AliasLevel=*/ 0);
 755  789    init_tf((*generator)());
 756  790  
 757  791    {
 758  792      // The following is a dummy for the sake of GraphKit::gen_stub
 759  793      Unique_Node_List for_igvn(comp_arena());
 760  794      set_for_igvn(&for_igvn);  // not used, but some GraphKit guys push on this
 761  795      PhaseGVN gvn(Thread::current()->resource_area(),255);
 762  796      set_initial_gvn(&gvn);    // not significant, but GraphKit guys use it pervasively
 763  797      gvn.transform_no_reclaim(top());
 764  798  
 765  799      GraphKit kit;
 766  800      kit.gen_stub(stub_function, stub_name, is_fancy_jump, pass_tls, return_pc);
 767  801    }
 768  802  
 769  803    NOT_PRODUCT( verify_graph_edges(); )
 770  804    Code_Gen();
 771  805    if (failing())  return;
 772  806  
 773  807  
 774  808    // Entry point will be accessed using compile->stub_entry_point();
 775  809    if (code_buffer() == NULL) {
 776  810      Matcher::soft_match_failure();
 777  811    } else {
 778  812      if (PrintAssembly && (WizardMode || Verbose))
 779  813        tty->print_cr("### Stub::%s", stub_name);
 780  814  
 781  815      if (!failing()) {
 782  816        assert(_fixed_slots == 0, "no fixed slots used for runtime stubs");
 783  817  
 784  818        // Make the NMethod
 785  819        // For now we mark the frame as never safe for profile stackwalking
 786  820        RuntimeStub *rs = RuntimeStub::new_runtime_stub(stub_name,
 787  821                                                        code_buffer(),
 788  822                                                        CodeOffsets::frame_never_safe,
 789  823                                                        // _code_offsets.value(CodeOffsets::Frame_Complete),
 790  824                                                        frame_size_in_words(),
 791  825                                                        _oop_map_set,
 792  826                                                        save_arg_registers);
 793  827        assert(rs != NULL && rs->is_runtime_stub(), "sanity check");
 794  828  
 795  829        _stub_entry_point = rs->entry_point();
 796  830      }
 797  831    }
 798  832  }
 799  833  
 800  834  #ifndef PRODUCT
 801  835  void print_opto_verbose_signature( const TypeFunc *j_sig, const char *stub_name ) {
 802  836    if(PrintOpto && Verbose) {
 803  837      tty->print("%s   ", stub_name); j_sig->print_flattened(); tty->cr();
 804  838    }
 805  839  }
 806  840  #endif
 807  841  
 808  842  void Compile::print_codes() {
 809  843  }
 810  844  
 811  845  //------------------------------Init-------------------------------------------
 812  846  // Prepare for a single compilation
 813  847  void Compile::Init(int aliaslevel) {
 814  848    _unique  = 0;
 815  849    _regalloc = NULL;
 816  850  
 817  851    _tf      = NULL;  // filled in later
 818  852    _top     = NULL;  // cached later
 819  853    _matcher = NULL;  // filled in later
 820  854    _cfg     = NULL;  // filled in later
 821  855  
 822  856    set_24_bit_selection_and_mode(Use24BitFP, false);
 823  857  
 824  858    _node_note_array = NULL;
 825  859    _default_node_notes = NULL;
 826  860  
 827  861    _immutable_memory = NULL; // filled in at first inquiry
 828  862  
 829  863    // Globally visible Nodes
 830  864    // First set TOP to NULL to give safe behavior during creation of RootNode
 831  865    set_cached_top_node(NULL);
 832  866    set_root(new (this, 3) RootNode());
 833  867    // Now that you have a Root to point to, create the real TOP
 834  868    set_cached_top_node( new (this, 1) ConNode(Type::TOP) );
 835  869    set_recent_alloc(NULL, NULL);
 836  870  
 837  871    // Create Debug Information Recorder to record scopes, oopmaps, etc.
 838  872    env()->set_oop_recorder(new OopRecorder(comp_arena()));
 839  873    env()->set_debug_info(new DebugInformationRecorder(env()->oop_recorder()));
 840  874    env()->set_dependencies(new Dependencies(env()));
 841  875  
 842  876    _fixed_slots = 0;
 843  877    set_has_split_ifs(false);
 844  878    set_has_loops(has_method() && method()->has_loops()); // first approximation
 845  879    set_has_stringbuilder(false);
 846  880    _trap_can_recompile = false;  // no traps emitted yet
 847  881    _major_progress = true; // start out assuming good things will happen
 848  882    set_has_unsafe_access(false);
 849  883    Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
 850  884    set_decompile_count(0);
 851  885  
 852  886    set_do_freq_based_layout(BlockLayoutByFrequency || method_has_option("BlockLayoutByFrequency"));
 853  887    set_num_loop_opts(LoopOptsCount);
 854  888    set_do_inlining(Inline);
 855  889    set_max_inline_size(MaxInlineSize);
 856  890    set_freq_inline_size(FreqInlineSize);
 857  891    set_do_scheduling(OptoScheduling);
 858  892    set_do_count_invocations(false);
 859  893    set_do_method_data_update(false);
 860  894  
 861  895    if (debug_info()->recording_non_safepoints()) {
 862  896      set_node_note_array(new(comp_arena()) GrowableArray<Node_Notes*>
 863  897                          (comp_arena(), 8, 0, NULL));
 864  898      set_default_node_notes(Node_Notes::make(this));
 865  899    }
 866  900  
 867  901    // // -- Initialize types before each compile --
 868  902    // // Update cached type information
 869  903    // if( _method && _method->constants() )
 870  904    //   Type::update_loaded_types(_method, _method->constants());
 871  905  
 872  906    // Init alias_type map.
 873  907    if (!_do_escape_analysis && aliaslevel == 3)
 874  908      aliaslevel = 2;  // No unique types without escape analysis
 875  909    _AliasLevel = aliaslevel;
 876  910    const int grow_ats = 16;
 877  911    _max_alias_types = grow_ats;
 878  912    _alias_types   = NEW_ARENA_ARRAY(comp_arena(), AliasType*, grow_ats);
 879  913    AliasType* ats = NEW_ARENA_ARRAY(comp_arena(), AliasType,  grow_ats);
 880  914    Copy::zero_to_bytes(ats, sizeof(AliasType)*grow_ats);
 881  915    {
 882  916      for (int i = 0; i < grow_ats; i++)  _alias_types[i] = &ats[i];
 883  917    }
 884  918    // Initialize the first few types.
 885  919    _alias_types[AliasIdxTop]->Init(AliasIdxTop, NULL);
 886  920    _alias_types[AliasIdxBot]->Init(AliasIdxBot, TypePtr::BOTTOM);
 887  921    _alias_types[AliasIdxRaw]->Init(AliasIdxRaw, TypeRawPtr::BOTTOM);
 888  922    _num_alias_types = AliasIdxRaw+1;
 889  923    // Zero out the alias type cache.
 890  924    Copy::zero_to_bytes(_alias_cache, sizeof(_alias_cache));
 891  925    // A NULL adr_type hits in the cache right away.  Preload the right answer.
 892  926    probe_alias_cache(NULL)->_index = AliasIdxTop;
 893  927  
 894  928    _intrinsics = NULL;
 895  929    _macro_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
 896  930    _predicate_opaqs = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
 897  931    register_library_intrinsics();
 898  932  }
 899  933  
 900  934  //---------------------------init_start----------------------------------------
 901  935  // Install the StartNode on this compile object.
 902  936  void Compile::init_start(StartNode* s) {
 903  937    if (failing())
 904  938      return; // already failing
 905  939    assert(s == start(), "");
 906  940  }
 907  941  
 908  942  StartNode* Compile::start() const {
 909  943    assert(!failing(), "");
 910  944    for (DUIterator_Fast imax, i = root()->fast_outs(imax); i < imax; i++) {
 911  945      Node* start = root()->fast_out(i);
 912  946      if( start->is_Start() )
 913  947        return start->as_Start();
 914  948    }
 915  949    ShouldNotReachHere();
 916  950    return NULL;
 917  951  }
 918  952  
 919  953  //-------------------------------immutable_memory-------------------------------------
 920  954  // Access immutable memory
 921  955  Node* Compile::immutable_memory() {
 922  956    if (_immutable_memory != NULL) {
 923  957      return _immutable_memory;
 924  958    }
 925  959    StartNode* s = start();
 926  960    for (DUIterator_Fast imax, i = s->fast_outs(imax); true; i++) {
 927  961      Node *p = s->fast_out(i);
 928  962      if (p != s && p->as_Proj()->_con == TypeFunc::Memory) {
 929  963        _immutable_memory = p;
 930  964        return _immutable_memory;
 931  965      }
 932  966    }
 933  967    ShouldNotReachHere();
 934  968    return NULL;
 935  969  }
 936  970  
 937  971  //----------------------set_cached_top_node------------------------------------
 938  972  // Install the cached top node, and make sure Node::is_top works correctly.
 939  973  void Compile::set_cached_top_node(Node* tn) {
 940  974    if (tn != NULL)  verify_top(tn);
 941  975    Node* old_top = _top;
 942  976    _top = tn;
 943  977    // Calling Node::setup_is_top allows the nodes the chance to adjust
 944  978    // their _out arrays.
 945  979    if (_top != NULL)     _top->setup_is_top();
 946  980    if (old_top != NULL)  old_top->setup_is_top();
 947  981    assert(_top == NULL || top()->is_top(), "");
 948  982  }
 949  983  
 950  984  #ifndef PRODUCT
 951  985  void Compile::verify_top(Node* tn) const {
 952  986    if (tn != NULL) {
 953  987      assert(tn->is_Con(), "top node must be a constant");
 954  988      assert(((ConNode*)tn)->type() == Type::TOP, "top node must have correct type");
 955  989      assert(tn->in(0) != NULL, "must have live top node");
 956  990    }
 957  991  }
 958  992  #endif
 959  993  
 960  994  
 961  995  ///-------------------Managing Per-Node Debug & Profile Info-------------------
 962  996  
 963  997  void Compile::grow_node_notes(GrowableArray<Node_Notes*>* arr, int grow_by) {
 964  998    guarantee(arr != NULL, "");
 965  999    int num_blocks = arr->length();
 966 1000    if (grow_by < num_blocks)  grow_by = num_blocks;
 967 1001    int num_notes = grow_by * _node_notes_block_size;
 968 1002    Node_Notes* notes = NEW_ARENA_ARRAY(node_arena(), Node_Notes, num_notes);
 969 1003    Copy::zero_to_bytes(notes, num_notes * sizeof(Node_Notes));
 970 1004    while (num_notes > 0) {
 971 1005      arr->append(notes);
 972 1006      notes     += _node_notes_block_size;
 973 1007      num_notes -= _node_notes_block_size;
 974 1008    }
 975 1009    assert(num_notes == 0, "exact multiple, please");
 976 1010  }
 977 1011  
 978 1012  bool Compile::copy_node_notes_to(Node* dest, Node* source) {
 979 1013    if (source == NULL || dest == NULL)  return false;
 980 1014  
 981 1015    if (dest->is_Con())
 982 1016      return false;               // Do not push debug info onto constants.
 983 1017  
 984 1018  #ifdef ASSERT
 985 1019    // Leave a bread crumb trail pointing to the original node:
 986 1020    if (dest != NULL && dest != source && dest->debug_orig() == NULL) {
 987 1021      dest->set_debug_orig(source);
 988 1022    }
 989 1023  #endif
 990 1024  
 991 1025    if (node_note_array() == NULL)
 992 1026      return false;               // Not collecting any notes now.
 993 1027  
 994 1028    // This is a copy onto a pre-existing node, which may already have notes.
 995 1029    // If both nodes have notes, do not overwrite any pre-existing notes.
 996 1030    Node_Notes* source_notes = node_notes_at(source->_idx);
 997 1031    if (source_notes == NULL || source_notes->is_clear())  return false;
 998 1032    Node_Notes* dest_notes   = node_notes_at(dest->_idx);
 999 1033    if (dest_notes == NULL || dest_notes->is_clear()) {
1000 1034      return set_node_notes_at(dest->_idx, source_notes);
1001 1035    }
1002 1036  
1003 1037    Node_Notes merged_notes = (*source_notes);
1004 1038    // The order of operations here ensures that dest notes will win...
1005 1039    merged_notes.update_from(dest_notes);
1006 1040    return set_node_notes_at(dest->_idx, &merged_notes);
1007 1041  }
1008 1042  
1009 1043  
1010 1044  //--------------------------allow_range_check_smearing-------------------------
1011 1045  // Gating condition for coalescing similar range checks.
1012 1046  // Sometimes we try 'speculatively' replacing a series of a range checks by a
1013 1047  // single covering check that is at least as strong as any of them.
1014 1048  // If the optimization succeeds, the simplified (strengthened) range check
1015 1049  // will always succeed.  If it fails, we will deopt, and then give up
1016 1050  // on the optimization.
1017 1051  bool Compile::allow_range_check_smearing() const {
1018 1052    // If this method has already thrown a range-check,
1019 1053    // assume it was because we already tried range smearing
1020 1054    // and it failed.
1021 1055    uint already_trapped = trap_count(Deoptimization::Reason_range_check);
1022 1056    return !already_trapped;
1023 1057  }
1024 1058  
1025 1059  
1026 1060  //------------------------------flatten_alias_type-----------------------------
1027 1061  const TypePtr *Compile::flatten_alias_type( const TypePtr *tj ) const {
1028 1062    int offset = tj->offset();
1029 1063    TypePtr::PTR ptr = tj->ptr();
1030 1064  
1031 1065    // Known instance (scalarizable allocation) alias only with itself.
1032 1066    bool is_known_inst = tj->isa_oopptr() != NULL &&
1033 1067                         tj->is_oopptr()->is_known_instance();
1034 1068  
1035 1069    // Process weird unsafe references.
1036 1070    if (offset == Type::OffsetBot && (tj->isa_instptr() /*|| tj->isa_klassptr()*/)) {
1037 1071      assert(InlineUnsafeOps, "indeterminate pointers come only from unsafe ops");
1038 1072      assert(!is_known_inst, "scalarizable allocation should not have unsafe references");
1039 1073      tj = TypeOopPtr::BOTTOM;
1040 1074      ptr = tj->ptr();
1041 1075      offset = tj->offset();
1042 1076    }
1043 1077  
1044 1078    // Array pointers need some flattening
1045 1079    const TypeAryPtr *ta = tj->isa_aryptr();
1046 1080    if( ta && is_known_inst ) {
1047 1081      if ( offset != Type::OffsetBot &&
1048 1082           offset > arrayOopDesc::length_offset_in_bytes() ) {
1049 1083        offset = Type::OffsetBot; // Flatten constant access into array body only
1050 1084        tj = ta = TypeAryPtr::make(ptr, ta->ary(), ta->klass(), true, offset, ta->instance_id());
1051 1085      }
1052 1086    } else if( ta && _AliasLevel >= 2 ) {
1053 1087      // For arrays indexed by constant indices, we flatten the alias
1054 1088      // space to include all of the array body.  Only the header, klass
1055 1089      // and array length can be accessed un-aliased.
1056 1090      if( offset != Type::OffsetBot ) {
1057 1091        if( ta->const_oop() ) { // methodDataOop or methodOop
1058 1092          offset = Type::OffsetBot;   // Flatten constant access into array body
1059 1093          tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),ta->ary(),ta->klass(),false,offset);
1060 1094        } else if( offset == arrayOopDesc::length_offset_in_bytes() ) {
1061 1095          // range is OK as-is.
1062 1096          tj = ta = TypeAryPtr::RANGE;
1063 1097        } else if( offset == oopDesc::klass_offset_in_bytes() ) {
1064 1098          tj = TypeInstPtr::KLASS; // all klass loads look alike
1065 1099          ta = TypeAryPtr::RANGE; // generic ignored junk
1066 1100          ptr = TypePtr::BotPTR;
1067 1101        } else if( offset == oopDesc::mark_offset_in_bytes() ) {
1068 1102          tj = TypeInstPtr::MARK;
1069 1103          ta = TypeAryPtr::RANGE; // generic ignored junk
1070 1104          ptr = TypePtr::BotPTR;
1071 1105        } else {                  // Random constant offset into array body
1072 1106          offset = Type::OffsetBot;   // Flatten constant access into array body
1073 1107          tj = ta = TypeAryPtr::make(ptr,ta->ary(),ta->klass(),false,offset);
1074 1108        }
1075 1109      }
1076 1110      // Arrays of fixed size alias with arrays of unknown size.
1077 1111      if (ta->size() != TypeInt::POS) {
1078 1112        const TypeAry *tary = TypeAry::make(ta->elem(), TypeInt::POS);
1079 1113        tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,ta->klass(),false,offset);
1080 1114      }
1081 1115      // Arrays of known objects become arrays of unknown objects.
1082 1116      if (ta->elem()->isa_narrowoop() && ta->elem() != TypeNarrowOop::BOTTOM) {
1083 1117        const TypeAry *tary = TypeAry::make(TypeNarrowOop::BOTTOM, ta->size());
1084 1118        tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);
1085 1119      }
1086 1120      if (ta->elem()->isa_oopptr() && ta->elem() != TypeInstPtr::BOTTOM) {
1087 1121        const TypeAry *tary = TypeAry::make(TypeInstPtr::BOTTOM, ta->size());
1088 1122        tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);
1089 1123      }
1090 1124      // Arrays of bytes and of booleans both use 'bastore' and 'baload' so
1091 1125      // cannot be distinguished by bytecode alone.
1092 1126      if (ta->elem() == TypeInt::BOOL) {
1093 1127        const TypeAry *tary = TypeAry::make(TypeInt::BYTE, ta->size());
1094 1128        ciKlass* aklass = ciTypeArrayKlass::make(T_BYTE);
1095 1129        tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,aklass,false,offset);
1096 1130      }
1097 1131      // During the 2nd round of IterGVN, NotNull castings are removed.
1098 1132      // Make sure the Bottom and NotNull variants alias the same.
1099 1133      // Also, make sure exact and non-exact variants alias the same.
1100 1134      if( ptr == TypePtr::NotNull || ta->klass_is_exact() ) {
1101 1135        if (ta->const_oop()) {
1102 1136          tj = ta = TypeAryPtr::make(TypePtr::Constant,ta->const_oop(),ta->ary(),ta->klass(),false,offset);
1103 1137        } else {
1104 1138          tj = ta = TypeAryPtr::make(TypePtr::BotPTR,ta->ary(),ta->klass(),false,offset);
1105 1139        }
1106 1140      }
1107 1141    }
1108 1142  
1109 1143    // Oop pointers need some flattening
1110 1144    const TypeInstPtr *to = tj->isa_instptr();
1111 1145    if( to && _AliasLevel >= 2 && to != TypeOopPtr::BOTTOM ) {
1112 1146      if( ptr == TypePtr::Constant ) {
1113 1147        // No constant oop pointers (such as Strings); they alias with
1114 1148        // unknown strings.
1115 1149        assert(!is_known_inst, "not scalarizable allocation");
1116 1150        tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);
1117 1151      } else if( is_known_inst ) {
1118 1152        tj = to; // Keep NotNull and klass_is_exact for instance type
1119 1153      } else if( ptr == TypePtr::NotNull || to->klass_is_exact() ) {
1120 1154        // During the 2nd round of IterGVN, NotNull castings are removed.
1121 1155        // Make sure the Bottom and NotNull variants alias the same.
1122 1156        // Also, make sure exact and non-exact variants alias the same.
1123 1157        tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);
1124 1158      }
1125 1159      // Canonicalize the holder of this field
1126 1160      ciInstanceKlass *k = to->klass()->as_instance_klass();
1127 1161      if (offset >= 0 && offset < instanceOopDesc::base_offset_in_bytes()) {
1128 1162        // First handle header references such as a LoadKlassNode, even if the
1129 1163        // object's klass is unloaded at compile time (4965979).
1130 1164        if (!is_known_inst) { // Do it only for non-instance types
1131 1165          tj = to = TypeInstPtr::make(TypePtr::BotPTR, env()->Object_klass(), false, NULL, offset);
1132 1166        }
1133 1167      } else if (offset < 0 || offset >= k->size_helper() * wordSize) {
1134 1168        to = NULL;
1135 1169        tj = TypeOopPtr::BOTTOM;
1136 1170        offset = tj->offset();
1137 1171      } else {
1138 1172        ciInstanceKlass *canonical_holder = k->get_canonical_holder(offset);
1139 1173        if (!k->equals(canonical_holder) || tj->offset() != offset) {
1140 1174          if( is_known_inst ) {
1141 1175            tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, true, NULL, offset, to->instance_id());
1142 1176          } else {
1143 1177            tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, false, NULL, offset);
1144 1178          }
1145 1179        }
1146 1180      }
1147 1181    }
1148 1182  
1149 1183    // Klass pointers to object array klasses need some flattening
1150 1184    const TypeKlassPtr *tk = tj->isa_klassptr();
1151 1185    if( tk ) {
1152 1186      // If we are referencing a field within a Klass, we need
1153 1187      // to assume the worst case of an Object.  Both exact and
1154 1188      // inexact types must flatten to the same alias class.
1155 1189      // Since the flattened result for a klass is defined to be
1156 1190      // precisely java.lang.Object, use a constant ptr.
1157 1191      if ( offset == Type::OffsetBot || (offset >= 0 && (size_t)offset < sizeof(Klass)) ) {
1158 1192  
1159 1193        tj = tk = TypeKlassPtr::make(TypePtr::Constant,
1160 1194                                     TypeKlassPtr::OBJECT->klass(),
1161 1195                                     offset);
1162 1196      }
1163 1197  
1164 1198      ciKlass* klass = tk->klass();
1165 1199      if( klass->is_obj_array_klass() ) {
1166 1200        ciKlass* k = TypeAryPtr::OOPS->klass();
1167 1201        if( !k || !k->is_loaded() )                  // Only fails for some -Xcomp runs
1168 1202          k = TypeInstPtr::BOTTOM->klass();
1169 1203        tj = tk = TypeKlassPtr::make( TypePtr::NotNull, k, offset );
1170 1204      }
1171 1205  
1172 1206      // Check for precise loads from the primary supertype array and force them
1173 1207      // to the supertype cache alias index.  Check for generic array loads from
1174 1208      // the primary supertype array and also force them to the supertype cache
1175 1209      // alias index.  Since the same load can reach both, we need to merge
1176 1210      // these 2 disparate memories into the same alias class.  Since the
1177 1211      // primary supertype array is read-only, there's no chance of confusion
1178 1212      // where we bypass an array load and an array store.
1179 1213      uint off2 = offset - Klass::primary_supers_offset_in_bytes();
1180 1214      if( offset == Type::OffsetBot ||
1181 1215          off2 < Klass::primary_super_limit()*wordSize ) {
1182 1216        offset = sizeof(oopDesc) +Klass::secondary_super_cache_offset_in_bytes();
1183 1217        tj = tk = TypeKlassPtr::make( TypePtr::NotNull, tk->klass(), offset );
1184 1218      }
1185 1219    }
1186 1220  
1187 1221    // Flatten all Raw pointers together.
1188 1222    if (tj->base() == Type::RawPtr)
1189 1223      tj = TypeRawPtr::BOTTOM;
1190 1224  
1191 1225    if (tj->base() == Type::AnyPtr)
1192 1226      tj = TypePtr::BOTTOM;      // An error, which the caller must check for.
1193 1227  
1194 1228    // Flatten all to bottom for now
1195 1229    switch( _AliasLevel ) {
1196 1230    case 0:
1197 1231      tj = TypePtr::BOTTOM;
1198 1232      break;
1199 1233    case 1:                       // Flatten to: oop, static, field or array
1200 1234      switch (tj->base()) {
1201 1235      //case Type::AryPtr: tj = TypeAryPtr::RANGE;    break;
1202 1236      case Type::RawPtr:   tj = TypeRawPtr::BOTTOM;   break;
1203 1237      case Type::AryPtr:   // do not distinguish arrays at all
1204 1238      case Type::InstPtr:  tj = TypeInstPtr::BOTTOM;  break;
1205 1239      case Type::KlassPtr: tj = TypeKlassPtr::OBJECT; break;
1206 1240      case Type::AnyPtr:   tj = TypePtr::BOTTOM;      break;  // caller checks it
1207 1241      default: ShouldNotReachHere();
1208 1242      }
1209 1243      break;
1210 1244    case 2:                       // No collapsing at level 2; keep all splits
1211 1245    case 3:                       // No collapsing at level 3; keep all splits
1212 1246      break;
1213 1247    default:
1214 1248      Unimplemented();
1215 1249    }
1216 1250  
1217 1251    offset = tj->offset();
1218 1252    assert( offset != Type::OffsetTop, "Offset has fallen from constant" );
1219 1253  
1220 1254    assert( (offset != Type::OffsetBot && tj->base() != Type::AryPtr) ||
1221 1255            (offset == Type::OffsetBot && tj->base() == Type::AryPtr) ||
1222 1256            (offset == Type::OffsetBot && tj == TypeOopPtr::BOTTOM) ||
1223 1257            (offset == Type::OffsetBot && tj == TypePtr::BOTTOM) ||
1224 1258            (offset == oopDesc::mark_offset_in_bytes() && tj->base() == Type::AryPtr) ||
1225 1259            (offset == oopDesc::klass_offset_in_bytes() && tj->base() == Type::AryPtr) ||
1226 1260            (offset == arrayOopDesc::length_offset_in_bytes() && tj->base() == Type::AryPtr)  ,
1227 1261            "For oops, klasses, raw offset must be constant; for arrays the offset is never known" );
1228 1262    assert( tj->ptr() != TypePtr::TopPTR &&
1229 1263            tj->ptr() != TypePtr::AnyNull &&
1230 1264            tj->ptr() != TypePtr::Null, "No imprecise addresses" );
1231 1265  //    assert( tj->ptr() != TypePtr::Constant ||
1232 1266  //            tj->base() == Type::RawPtr ||
1233 1267  //            tj->base() == Type::KlassPtr, "No constant oop addresses" );
1234 1268  
1235 1269    return tj;
1236 1270  }
1237 1271  
1238 1272  void Compile::AliasType::Init(int i, const TypePtr* at) {
1239 1273    _index = i;
1240 1274    _adr_type = at;
1241 1275    _field = NULL;
1242 1276    _is_rewritable = true; // default
1243 1277    const TypeOopPtr *atoop = (at != NULL) ? at->isa_oopptr() : NULL;
1244 1278    if (atoop != NULL && atoop->is_known_instance()) {
1245 1279      const TypeOopPtr *gt = atoop->cast_to_instance_id(TypeOopPtr::InstanceBot);
1246 1280      _general_index = Compile::current()->get_alias_index(gt);
1247 1281    } else {
1248 1282      _general_index = 0;
1249 1283    }
1250 1284  }
1251 1285  
1252 1286  //---------------------------------print_on------------------------------------
1253 1287  #ifndef PRODUCT
1254 1288  void Compile::AliasType::print_on(outputStream* st) {
1255 1289    if (index() < 10)
1256 1290          st->print("@ <%d> ", index());
1257 1291    else  st->print("@ <%d>",  index());
1258 1292    st->print(is_rewritable() ? "   " : " RO");
1259 1293    int offset = adr_type()->offset();
1260 1294    if (offset == Type::OffsetBot)
1261 1295          st->print(" +any");
1262 1296    else  st->print(" +%-3d", offset);
1263 1297    st->print(" in ");
1264 1298    adr_type()->dump_on(st);
1265 1299    const TypeOopPtr* tjp = adr_type()->isa_oopptr();
1266 1300    if (field() != NULL && tjp) {
1267 1301      if (tjp->klass()  != field()->holder() ||
1268 1302          tjp->offset() != field()->offset_in_bytes()) {
1269 1303        st->print(" != ");
1270 1304        field()->print();
1271 1305        st->print(" ***");
1272 1306      }
1273 1307    }
1274 1308  }
1275 1309  
1276 1310  void print_alias_types() {
1277 1311    Compile* C = Compile::current();
1278 1312    tty->print_cr("--- Alias types, AliasIdxBot .. %d", C->num_alias_types()-1);
1279 1313    for (int idx = Compile::AliasIdxBot; idx < C->num_alias_types(); idx++) {
1280 1314      C->alias_type(idx)->print_on(tty);
1281 1315      tty->cr();
1282 1316    }
1283 1317  }
1284 1318  #endif
1285 1319  
1286 1320  
1287 1321  //----------------------------probe_alias_cache--------------------------------
1288 1322  Compile::AliasCacheEntry* Compile::probe_alias_cache(const TypePtr* adr_type) {
1289 1323    intptr_t key = (intptr_t) adr_type;
1290 1324    key ^= key >> logAliasCacheSize;
1291 1325    return &_alias_cache[key & right_n_bits(logAliasCacheSize)];
1292 1326  }
1293 1327  
1294 1328  
1295 1329  //-----------------------------grow_alias_types--------------------------------
1296 1330  void Compile::grow_alias_types() {
1297 1331    const int old_ats  = _max_alias_types; // how many before?
1298 1332    const int new_ats  = old_ats;          // how many more?
1299 1333    const int grow_ats = old_ats+new_ats;  // how many now?
1300 1334    _max_alias_types = grow_ats;
1301 1335    _alias_types =  REALLOC_ARENA_ARRAY(comp_arena(), AliasType*, _alias_types, old_ats, grow_ats);
1302 1336    AliasType* ats =    NEW_ARENA_ARRAY(comp_arena(), AliasType, new_ats);
1303 1337    Copy::zero_to_bytes(ats, sizeof(AliasType)*new_ats);
1304 1338    for (int i = 0; i < new_ats; i++)  _alias_types[old_ats+i] = &ats[i];
1305 1339  }
1306 1340  
1307 1341  
1308 1342  //--------------------------------find_alias_type------------------------------
1309 1343  Compile::AliasType* Compile::find_alias_type(const TypePtr* adr_type, bool no_create) {
1310 1344    if (_AliasLevel == 0)
1311 1345      return alias_type(AliasIdxBot);
1312 1346  
1313 1347    AliasCacheEntry* ace = probe_alias_cache(adr_type);
1314 1348    if (ace->_adr_type == adr_type) {
1315 1349      return alias_type(ace->_index);
1316 1350    }
1317 1351  
1318 1352    // Handle special cases.
1319 1353    if (adr_type == NULL)             return alias_type(AliasIdxTop);
1320 1354    if (adr_type == TypePtr::BOTTOM)  return alias_type(AliasIdxBot);
1321 1355  
1322 1356    // Do it the slow way.
1323 1357    const TypePtr* flat = flatten_alias_type(adr_type);
1324 1358  
1325 1359  #ifdef ASSERT
1326 1360    assert(flat == flatten_alias_type(flat), "idempotent");
1327 1361    assert(flat != TypePtr::BOTTOM,     "cannot alias-analyze an untyped ptr");
1328 1362    if (flat->isa_oopptr() && !flat->isa_klassptr()) {
1329 1363      const TypeOopPtr* foop = flat->is_oopptr();
1330 1364      // Scalarizable allocations have exact klass always.
1331 1365      bool exact = !foop->klass_is_exact() || foop->is_known_instance();
1332 1366      const TypePtr* xoop = foop->cast_to_exactness(exact)->is_ptr();
1333 1367      assert(foop == flatten_alias_type(xoop), "exactness must not affect alias type");
1334 1368    }
1335 1369    assert(flat == flatten_alias_type(flat), "exact bit doesn't matter");
1336 1370  #endif
1337 1371  
1338 1372    int idx = AliasIdxTop;
1339 1373    for (int i = 0; i < num_alias_types(); i++) {
1340 1374      if (alias_type(i)->adr_type() == flat) {
1341 1375        idx = i;
1342 1376        break;
1343 1377      }
1344 1378    }
1345 1379  
1346 1380    if (idx == AliasIdxTop) {
1347 1381      if (no_create)  return NULL;
1348 1382      // Grow the array if necessary.
1349 1383      if (_num_alias_types == _max_alias_types)  grow_alias_types();
1350 1384      // Add a new alias type.
1351 1385      idx = _num_alias_types++;
1352 1386      _alias_types[idx]->Init(idx, flat);
1353 1387      if (flat == TypeInstPtr::KLASS)  alias_type(idx)->set_rewritable(false);
1354 1388      if (flat == TypeAryPtr::RANGE)   alias_type(idx)->set_rewritable(false);
1355 1389      if (flat->isa_instptr()) {
1356 1390        if (flat->offset() == java_lang_Class::klass_offset_in_bytes()
1357 1391            && flat->is_instptr()->klass() == env()->Class_klass())
1358 1392          alias_type(idx)->set_rewritable(false);
1359 1393      }
1360 1394      if (flat->isa_klassptr()) {
1361 1395        if (flat->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc))
1362 1396          alias_type(idx)->set_rewritable(false);
1363 1397        if (flat->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc))
1364 1398          alias_type(idx)->set_rewritable(false);
1365 1399        if (flat->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc))
1366 1400          alias_type(idx)->set_rewritable(false);
1367 1401        if (flat->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc))
1368 1402          alias_type(idx)->set_rewritable(false);
1369 1403      }
1370 1404      // %%% (We would like to finalize JavaThread::threadObj_offset(),
1371 1405      // but the base pointer type is not distinctive enough to identify
1372 1406      // references into JavaThread.)
1373 1407  
1374 1408      // Check for final instance fields.
1375 1409      const TypeInstPtr* tinst = flat->isa_instptr();
1376 1410      if (tinst && tinst->offset() >= instanceOopDesc::base_offset_in_bytes()) {
1377 1411        ciInstanceKlass *k = tinst->klass()->as_instance_klass();
1378 1412        ciField* field = k->get_field_by_offset(tinst->offset(), false);
1379 1413        // Set field() and is_rewritable() attributes.
1380 1414        if (field != NULL)  alias_type(idx)->set_field(field);
1381 1415      }
1382 1416      const TypeKlassPtr* tklass = flat->isa_klassptr();
1383 1417      // Check for final static fields.
1384 1418      if (tklass && tklass->klass()->is_instance_klass()) {
1385 1419        ciInstanceKlass *k = tklass->klass()->as_instance_klass();
1386 1420        ciField* field = k->get_field_by_offset(tklass->offset(), true);
1387 1421        // Set field() and is_rewritable() attributes.
1388 1422        if (field != NULL)   alias_type(idx)->set_field(field);
1389 1423      }
1390 1424    }
1391 1425  
1392 1426    // Fill the cache for next time.
1393 1427    ace->_adr_type = adr_type;
1394 1428    ace->_index    = idx;
1395 1429    assert(alias_type(adr_type) == alias_type(idx),  "type must be installed");
1396 1430  
1397 1431    // Might as well try to fill the cache for the flattened version, too.
1398 1432    AliasCacheEntry* face = probe_alias_cache(flat);
1399 1433    if (face->_adr_type == NULL) {
1400 1434      face->_adr_type = flat;
1401 1435      face->_index    = idx;
1402 1436      assert(alias_type(flat) == alias_type(idx), "flat type must work too");
1403 1437    }
1404 1438  
1405 1439    return alias_type(idx);
1406 1440  }
1407 1441  
1408 1442  
1409 1443  Compile::AliasType* Compile::alias_type(ciField* field) {
1410 1444    const TypeOopPtr* t;
1411 1445    if (field->is_static())
1412 1446      t = TypeKlassPtr::make(field->holder());
1413 1447    else
1414 1448      t = TypeOopPtr::make_from_klass_raw(field->holder());
1415 1449    AliasType* atp = alias_type(t->add_offset(field->offset_in_bytes()));
1416 1450    assert(field->is_final() == !atp->is_rewritable(), "must get the rewritable bits correct");
1417 1451    return atp;
1418 1452  }
1419 1453  
1420 1454  
1421 1455  //------------------------------have_alias_type--------------------------------
1422 1456  bool Compile::have_alias_type(const TypePtr* adr_type) {
1423 1457    AliasCacheEntry* ace = probe_alias_cache(adr_type);
1424 1458    if (ace->_adr_type == adr_type) {
1425 1459      return true;
1426 1460    }
1427 1461  
1428 1462    // Handle special cases.
1429 1463    if (adr_type == NULL)             return true;
1430 1464    if (adr_type == TypePtr::BOTTOM)  return true;
1431 1465  
1432 1466    return find_alias_type(adr_type, true) != NULL;
1433 1467  }
1434 1468  
1435 1469  //-----------------------------must_alias--------------------------------------
1436 1470  // True if all values of the given address type are in the given alias category.
1437 1471  bool Compile::must_alias(const TypePtr* adr_type, int alias_idx) {
1438 1472    if (alias_idx == AliasIdxBot)         return true;  // the universal category
1439 1473    if (adr_type == NULL)                 return true;  // NULL serves as TypePtr::TOP
1440 1474    if (alias_idx == AliasIdxTop)         return false; // the empty category
1441 1475    if (adr_type->base() == Type::AnyPtr) return false; // TypePtr::BOTTOM or its twins
1442 1476  
1443 1477    // the only remaining possible overlap is identity
1444 1478    int adr_idx = get_alias_index(adr_type);
1445 1479    assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");
1446 1480    assert(adr_idx == alias_idx ||
1447 1481           (alias_type(alias_idx)->adr_type() != TypeOopPtr::BOTTOM
1448 1482            && adr_type                       != TypeOopPtr::BOTTOM),
1449 1483           "should not be testing for overlap with an unsafe pointer");
1450 1484    return adr_idx == alias_idx;
1451 1485  }
1452 1486  
1453 1487  //------------------------------can_alias--------------------------------------
1454 1488  // True if any values of the given address type are in the given alias category.
1455 1489  bool Compile::can_alias(const TypePtr* adr_type, int alias_idx) {
1456 1490    if (alias_idx == AliasIdxTop)         return false; // the empty category
1457 1491    if (adr_type == NULL)                 return false; // NULL serves as TypePtr::TOP
1458 1492    if (alias_idx == AliasIdxBot)         return true;  // the universal category
1459 1493    if (adr_type->base() == Type::AnyPtr) return true;  // TypePtr::BOTTOM or its twins
1460 1494  
1461 1495    // the only remaining possible overlap is identity
1462 1496    int adr_idx = get_alias_index(adr_type);
1463 1497    assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");
1464 1498    return adr_idx == alias_idx;
1465 1499  }
1466 1500  
1467 1501  
1468 1502  
1469 1503  //---------------------------pop_warm_call-------------------------------------
1470 1504  WarmCallInfo* Compile::pop_warm_call() {
1471 1505    WarmCallInfo* wci = _warm_calls;
1472 1506    if (wci != NULL)  _warm_calls = wci->remove_from(wci);
1473 1507    return wci;
1474 1508  }
1475 1509  
1476 1510  //----------------------------Inline_Warm--------------------------------------
1477 1511  int Compile::Inline_Warm() {
1478 1512    // If there is room, try to inline some more warm call sites.
1479 1513    // %%% Do a graph index compaction pass when we think we're out of space?
1480 1514    if (!InlineWarmCalls)  return 0;
1481 1515  
1482 1516    int calls_made_hot = 0;
1483 1517    int room_to_grow   = NodeCountInliningCutoff - unique();
1484 1518    int amount_to_grow = MIN2(room_to_grow, (int)NodeCountInliningStep);
1485 1519    int amount_grown   = 0;
1486 1520    WarmCallInfo* call;
1487 1521    while (amount_to_grow > 0 && (call = pop_warm_call()) != NULL) {
1488 1522      int est_size = (int)call->size();
1489 1523      if (est_size > (room_to_grow - amount_grown)) {
1490 1524        // This one won't fit anyway.  Get rid of it.
1491 1525        call->make_cold();
1492 1526        continue;
1493 1527      }
1494 1528      call->make_hot();
1495 1529      calls_made_hot++;
1496 1530      amount_grown   += est_size;
1497 1531      amount_to_grow -= est_size;
1498 1532    }
1499 1533  
1500 1534    if (calls_made_hot > 0)  set_major_progress();
1501 1535    return calls_made_hot;
1502 1536  }
1503 1537  
1504 1538  
1505 1539  //----------------------------Finish_Warm--------------------------------------
1506 1540  void Compile::Finish_Warm() {
1507 1541    if (!InlineWarmCalls)  return;
1508 1542    if (failing())  return;
1509 1543    if (warm_calls() == NULL)  return;
1510 1544  
1511 1545    // Clean up loose ends, if we are out of space for inlining.
1512 1546    WarmCallInfo* call;
1513 1547    while ((call = pop_warm_call()) != NULL) {
1514 1548      call->make_cold();
1515 1549    }
1516 1550  }
1517 1551  
1518 1552  //---------------------cleanup_loop_predicates-----------------------
1519 1553  // Remove the opaque nodes that protect the predicates so that all unused
1520 1554  // checks and uncommon_traps will be eliminated from the ideal graph
1521 1555  void Compile::cleanup_loop_predicates(PhaseIterGVN &igvn) {
1522 1556    if (predicate_count()==0) return;
1523 1557    for (int i = predicate_count(); i > 0; i--) {
1524 1558      Node * n = predicate_opaque1_node(i-1);
1525 1559      assert(n->Opcode() == Op_Opaque1, "must be");
1526 1560      igvn.replace_node(n, n->in(1));
1527 1561    }
1528 1562    assert(predicate_count()==0, "should be clean!");
1529 1563    igvn.optimize();
1530 1564  }
1531 1565  
1532 1566  //------------------------------Optimize---------------------------------------
1533 1567  // Given a graph, optimize it.
1534 1568  void Compile::Optimize() {
1535 1569    TracePhase t1("optimizer", &_t_optimizer, true);
1536 1570  
1537 1571  #ifndef PRODUCT
1538 1572    if (env()->break_at_compile()) {
1539 1573      BREAKPOINT;
1540 1574    }
1541 1575  
1542 1576  #endif
1543 1577  
1544 1578    ResourceMark rm;
1545 1579    int          loop_opts_cnt;
1546 1580  
1547 1581    NOT_PRODUCT( verify_graph_edges(); )
1548 1582  
1549 1583    print_method("After Parsing");
1550 1584  
1551 1585   {
1552 1586    // Iterative Global Value Numbering, including ideal transforms
1553 1587    // Initialize IterGVN with types and values from parse-time GVN
1554 1588    PhaseIterGVN igvn(initial_gvn());
1555 1589    {
1556 1590      NOT_PRODUCT( TracePhase t2("iterGVN", &_t_iterGVN, TimeCompiler); )
1557 1591      igvn.optimize();
1558 1592    }
1559 1593  
1560 1594    print_method("Iter GVN 1", 2);
1561 1595  
1562 1596    if (failing())  return;
1563 1597  
1564 1598    // Perform escape analysis
1565 1599    if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {
1566 1600      TracePhase t2("escapeAnalysis", &_t_escapeAnalysis, true);
1567 1601      ConnectionGraph::do_analysis(this, &igvn);
1568 1602  
1569 1603      if (failing())  return;
1570 1604  
1571 1605      igvn.optimize();
1572 1606      print_method("Iter GVN 3", 2);
1573 1607  
1574 1608      if (failing())  return;
1575 1609  
1576 1610    }
1577 1611  
1578 1612    // Loop transforms on the ideal graph.  Range Check Elimination,
1579 1613    // peeling, unrolling, etc.
1580 1614  
1581 1615    // Set loop opts counter
1582 1616    loop_opts_cnt = num_loop_opts();
1583 1617    if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) {
1584 1618      {
1585 1619        TracePhase t2("idealLoop", &_t_idealLoop, true);
1586 1620        PhaseIdealLoop ideal_loop( igvn, true, UseLoopPredicate);
1587 1621        loop_opts_cnt--;
1588 1622        if (major_progress()) print_method("PhaseIdealLoop 1", 2);
1589 1623        if (failing())  return;
1590 1624      }
1591 1625      // Loop opts pass if partial peeling occurred in previous pass
1592 1626      if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) {
1593 1627        TracePhase t3("idealLoop", &_t_idealLoop, true);
1594 1628        PhaseIdealLoop ideal_loop( igvn, false, UseLoopPredicate);
1595 1629        loop_opts_cnt--;
1596 1630        if (major_progress()) print_method("PhaseIdealLoop 2", 2);
1597 1631        if (failing())  return;
1598 1632      }
1599 1633      // Loop opts pass for loop-unrolling before CCP
1600 1634      if(major_progress() && (loop_opts_cnt > 0)) {
1601 1635        TracePhase t4("idealLoop", &_t_idealLoop, true);
1602 1636        PhaseIdealLoop ideal_loop( igvn, false, UseLoopPredicate);
1603 1637        loop_opts_cnt--;
1604 1638        if (major_progress()) print_method("PhaseIdealLoop 3", 2);
1605 1639      }
1606 1640      if (!failing()) {
1607 1641        // Verify that last round of loop opts produced a valid graph
1608 1642        NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
1609 1643        PhaseIdealLoop::verify(igvn);
1610 1644      }
1611 1645    }
1612 1646    if (failing())  return;
1613 1647  
1614 1648    // Conditional Constant Propagation;
1615 1649    PhaseCCP ccp( &igvn );
1616 1650    assert( true, "Break here to ccp.dump_nodes_and_types(_root,999,1)");
1617 1651    {
1618 1652      TracePhase t2("ccp", &_t_ccp, true);
1619 1653      ccp.do_transform();
1620 1654    }
1621 1655    print_method("PhaseCPP 1", 2);
1622 1656  
1623 1657    assert( true, "Break here to ccp.dump_old2new_map()");
1624 1658  
1625 1659    // Iterative Global Value Numbering, including ideal transforms
1626 1660    {
1627 1661      NOT_PRODUCT( TracePhase t2("iterGVN2", &_t_iterGVN2, TimeCompiler); )
1628 1662      igvn = ccp;
1629 1663      igvn.optimize();
1630 1664    }
1631 1665  
1632 1666    print_method("Iter GVN 2", 2);
1633 1667  
1634 1668    if (failing())  return;
1635 1669  
1636 1670    // Loop transforms on the ideal graph.  Range Check Elimination,
1637 1671    // peeling, unrolling, etc.
1638 1672    if(loop_opts_cnt > 0) {
1639 1673      debug_only( int cnt = 0; );
1640 1674      bool loop_predication = UseLoopPredicate;
1641 1675      while(major_progress() && (loop_opts_cnt > 0)) {
1642 1676        TracePhase t2("idealLoop", &_t_idealLoop, true);
1643 1677        assert( cnt++ < 40, "infinite cycle in loop optimization" );
1644 1678        PhaseIdealLoop ideal_loop( igvn, true, loop_predication);
1645 1679        loop_opts_cnt--;
1646 1680        if (major_progress()) print_method("PhaseIdealLoop iterations", 2);
1647 1681        if (failing())  return;
1648 1682        // Perform loop predication optimization during first iteration after CCP.
1649 1683        // After that switch it off and cleanup unused loop predicates.
1650 1684        if (loop_predication) {
1651 1685          loop_predication = false;
1652 1686          cleanup_loop_predicates(igvn);
1653 1687          if (failing())  return;
1654 1688        }
1655 1689      }
1656 1690    }
1657 1691  
1658 1692    {
1659 1693      // Verify that all previous optimizations produced a valid graph
1660 1694      // at least to this point, even if no loop optimizations were done.
1661 1695      NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
1662 1696      PhaseIdealLoop::verify(igvn);
1663 1697    }
1664 1698  
1665 1699    {
1666 1700      NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); )
1667 1701      PhaseMacroExpand  mex(igvn);
1668 1702      if (mex.expand_macro_nodes()) {
1669 1703        assert(failing(), "must bail out w/ explicit message");
1670 1704        return;
1671 1705      }
1672 1706    }
1673 1707  
1674 1708   } // (End scope of igvn; run destructor if necessary for asserts.)
1675 1709  
1676 1710    // A method with only infinite loops has no edges entering loops from root
1677 1711    {
1678 1712      NOT_PRODUCT( TracePhase t2("graphReshape", &_t_graphReshaping, TimeCompiler); )
1679 1713      if (final_graph_reshaping()) {
1680 1714        assert(failing(), "must bail out w/ explicit message");
1681 1715        return;
1682 1716      }
1683 1717    }
1684 1718  
1685 1719    print_method("Optimize finished", 2);
1686 1720  }
1687 1721  
1688 1722  
1689 1723  //------------------------------Code_Gen---------------------------------------
1690 1724  // Given a graph, generate code for it
1691 1725  void Compile::Code_Gen() {
1692 1726    if (failing())  return;
1693 1727  
1694 1728    // Perform instruction selection.  You might think we could reclaim Matcher
1695 1729    // memory PDQ, but actually the Matcher is used in generating spill code.
1696 1730    // Internals of the Matcher (including some VectorSets) must remain live
1697 1731    // for awhile - thus I cannot reclaim Matcher memory lest a VectorSet usage
1698 1732    // set a bit in reclaimed memory.
1699 1733  
1700 1734    // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
1701 1735    // nodes.  Mapping is only valid at the root of each matched subtree.
1702 1736    NOT_PRODUCT( verify_graph_edges(); )
1703 1737  
1704 1738    Node_List proj_list;
1705 1739    Matcher m(proj_list);
1706 1740    _matcher = &m;
1707 1741    {
1708 1742      TracePhase t2("matcher", &_t_matcher, true);
1709 1743      m.match();
1710 1744    }
1711 1745    // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
1712 1746    // nodes.  Mapping is only valid at the root of each matched subtree.
1713 1747    NOT_PRODUCT( verify_graph_edges(); )
1714 1748  
1715 1749    // If you have too many nodes, or if matching has failed, bail out
1716 1750    check_node_count(0, "out of nodes matching instructions");
1717 1751    if (failing())  return;
1718 1752  
1719 1753    // Build a proper-looking CFG
1720 1754    PhaseCFG cfg(node_arena(), root(), m);
1721 1755    _cfg = &cfg;
1722 1756    {
1723 1757      NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); )
1724 1758      cfg.Dominators();
1725 1759      if (failing())  return;
1726 1760  
1727 1761      NOT_PRODUCT( verify_graph_edges(); )
1728 1762  
1729 1763      cfg.Estimate_Block_Frequency();
1730 1764      cfg.GlobalCodeMotion(m,unique(),proj_list);
1731 1765  
1732 1766      print_method("Global code motion", 2);
1733 1767  
1734 1768      if (failing())  return;
1735 1769      NOT_PRODUCT( verify_graph_edges(); )
1736 1770  
1737 1771      debug_only( cfg.verify(); )
1738 1772    }
1739 1773    NOT_PRODUCT( verify_graph_edges(); )
1740 1774  
1741 1775    PhaseChaitin regalloc(unique(),cfg,m);
1742 1776    _regalloc = &regalloc;
1743 1777    {
1744 1778      TracePhase t2("regalloc", &_t_registerAllocation, true);
1745 1779      // Perform any platform dependent preallocation actions.  This is used,
1746 1780      // for example, to avoid taking an implicit null pointer exception
1747 1781      // using the frame pointer on win95.
1748 1782      _regalloc->pd_preallocate_hook();
1749 1783  
1750 1784      // Perform register allocation.  After Chaitin, use-def chains are
1751 1785      // no longer accurate (at spill code) and so must be ignored.
1752 1786      // Node->LRG->reg mappings are still accurate.
1753 1787      _regalloc->Register_Allocate();
1754 1788  
1755 1789      // Bail out if the allocator builds too many nodes
1756 1790      if (failing())  return;
1757 1791    }
1758 1792  
1759 1793    // Prior to register allocation we kept empty basic blocks in case the
1760 1794    // the allocator needed a place to spill.  After register allocation we
1761 1795    // are not adding any new instructions.  If any basic block is empty, we
1762 1796    // can now safely remove it.
1763 1797    {
1764 1798      NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); )
1765 1799      cfg.remove_empty();
1766 1800      if (do_freq_based_layout()) {
1767 1801        PhaseBlockLayout layout(cfg);
1768 1802      } else {
1769 1803        cfg.set_loop_alignment();
1770 1804      }
1771 1805      cfg.fixup_flow();
1772 1806    }
1773 1807  
1774 1808    // Perform any platform dependent postallocation verifications.
1775 1809    debug_only( _regalloc->pd_postallocate_verify_hook(); )
1776 1810  
1777 1811    // Apply peephole optimizations
1778 1812    if( OptoPeephole ) {
1779 1813      NOT_PRODUCT( TracePhase t2("peephole", &_t_peephole, TimeCompiler); )
1780 1814      PhasePeephole peep( _regalloc, cfg);
1781 1815      peep.do_transform();
1782 1816    }
1783 1817  
1784 1818    // Convert Nodes to instruction bits in a buffer
1785 1819    {
1786 1820      // %%%% workspace merge brought two timers together for one job
1787 1821      TracePhase t2a("output", &_t_output, true);
1788 1822      NOT_PRODUCT( TraceTime t2b(NULL, &_t_codeGeneration, TimeCompiler, false); )
1789 1823      Output();
1790 1824    }
1791 1825  
1792 1826    print_method("Final Code");
1793 1827  
1794 1828    // He's dead, Jim.
1795 1829    _cfg     = (PhaseCFG*)0xdeadbeef;
1796 1830    _regalloc = (PhaseChaitin*)0xdeadbeef;
1797 1831  }
1798 1832  
1799 1833  
1800 1834  //------------------------------dump_asm---------------------------------------
1801 1835  // Dump formatted assembly
1802 1836  #ifndef PRODUCT
1803 1837  void Compile::dump_asm(int *pcs, uint pc_limit) {
1804 1838    bool cut_short = false;
1805 1839    tty->print_cr("#");
1806 1840    tty->print("#  ");  _tf->dump();  tty->cr();
1807 1841    tty->print_cr("#");
1808 1842  
1809 1843    // For all blocks
1810 1844    int pc = 0x0;                 // Program counter
1811 1845    char starts_bundle = ' ';
1812 1846    _regalloc->dump_frame();
1813 1847  
1814 1848    Node *n = NULL;
1815 1849    for( uint i=0; i<_cfg->_num_blocks; i++ ) {
1816 1850      if (VMThread::should_terminate()) { cut_short = true; break; }
1817 1851      Block *b = _cfg->_blocks[i];
1818 1852      if (b->is_connector() && !Verbose) continue;
1819 1853      n = b->_nodes[0];
1820 1854      if (pcs && n->_idx < pc_limit)
1821 1855        tty->print("%3.3x   ", pcs[n->_idx]);
1822 1856      else
1823 1857        tty->print("      ");
1824 1858      b->dump_head( &_cfg->_bbs );
1825 1859      if (b->is_connector()) {
1826 1860        tty->print_cr("        # Empty connector block");
1827 1861      } else if (b->num_preds() == 2 && b->pred(1)->is_CatchProj() && b->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
1828 1862        tty->print_cr("        # Block is sole successor of call");
1829 1863      }
1830 1864  
1831 1865      // For all instructions
1832 1866      Node *delay = NULL;
1833 1867      for( uint j = 0; j<b->_nodes.size(); j++ ) {
1834 1868        if (VMThread::should_terminate()) { cut_short = true; break; }
1835 1869        n = b->_nodes[j];
1836 1870        if (valid_bundle_info(n)) {
1837 1871          Bundle *bundle = node_bundling(n);
1838 1872          if (bundle->used_in_unconditional_delay()) {
1839 1873            delay = n;
1840 1874            continue;
1841 1875          }
1842 1876          if (bundle->starts_bundle())
1843 1877            starts_bundle = '+';
1844 1878        }
1845 1879  
1846 1880        if (WizardMode) n->dump();
1847 1881  
1848 1882        if( !n->is_Region() &&    // Dont print in the Assembly
1849 1883            !n->is_Phi() &&       // a few noisely useless nodes
1850 1884            !n->is_Proj() &&
1851 1885            !n->is_MachTemp() &&
1852 1886            !n->is_SafePointScalarObject() &&
1853 1887            !n->is_Catch() &&     // Would be nice to print exception table targets
1854 1888            !n->is_MergeMem() &&  // Not very interesting
1855 1889            !n->is_top() &&       // Debug info table constants
1856 1890            !(n->is_Con() && !n->is_Mach())// Debug info table constants
1857 1891            ) {
1858 1892          if (pcs && n->_idx < pc_limit)
1859 1893            tty->print("%3.3x", pcs[n->_idx]);
1860 1894          else
1861 1895            tty->print("   ");
1862 1896          tty->print(" %c ", starts_bundle);
1863 1897          starts_bundle = ' ';
1864 1898          tty->print("\t");
1865 1899          n->format(_regalloc, tty);
1866 1900          tty->cr();
1867 1901        }
1868 1902  
1869 1903        // If we have an instruction with a delay slot, and have seen a delay,
1870 1904        // then back up and print it
1871 1905        if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
1872 1906          assert(delay != NULL, "no unconditional delay instruction");
1873 1907          if (WizardMode) delay->dump();
1874 1908  
1875 1909          if (node_bundling(delay)->starts_bundle())
1876 1910            starts_bundle = '+';
1877 1911          if (pcs && n->_idx < pc_limit)
1878 1912            tty->print("%3.3x", pcs[n->_idx]);
1879 1913          else
1880 1914            tty->print("   ");
1881 1915          tty->print(" %c ", starts_bundle);
1882 1916          starts_bundle = ' ';
1883 1917          tty->print("\t");
1884 1918          delay->format(_regalloc, tty);
1885 1919          tty->print_cr("");
1886 1920          delay = NULL;
1887 1921        }
1888 1922  
1889 1923        // Dump the exception table as well
1890 1924        if( n->is_Catch() && (Verbose || WizardMode) ) {
1891 1925          // Print the exception table for this offset
1892 1926          _handler_table.print_subtable_for(pc);
1893 1927        }
1894 1928      }
1895 1929  
1896 1930      if (pcs && n->_idx < pc_limit)
1897 1931        tty->print_cr("%3.3x", pcs[n->_idx]);
1898 1932      else
1899 1933        tty->print_cr("");
1900 1934  
1901 1935      assert(cut_short || delay == NULL, "no unconditional delay branch");
1902 1936  
1903 1937    } // End of per-block dump
1904 1938    tty->print_cr("");
1905 1939  
1906 1940    if (cut_short)  tty->print_cr("*** disassembly is cut short ***");
1907 1941  }
1908 1942  #endif
1909 1943  
1910 1944  //------------------------------Final_Reshape_Counts---------------------------
1911 1945  // This class defines counters to help identify when a method
1912 1946  // may/must be executed using hardware with only 24-bit precision.
1913 1947  struct Final_Reshape_Counts : public StackObj {
1914 1948    int  _call_count;             // count non-inlined 'common' calls
1915 1949    int  _float_count;            // count float ops requiring 24-bit precision
1916 1950    int  _double_count;           // count double ops requiring more precision
1917 1951    int  _java_call_count;        // count non-inlined 'java' calls
1918 1952    int  _inner_loop_count;       // count loops which need alignment
1919 1953    VectorSet _visited;           // Visitation flags
1920 1954    Node_List _tests;             // Set of IfNodes & PCTableNodes
1921 1955  
1922 1956    Final_Reshape_Counts() :
1923 1957      _call_count(0), _float_count(0), _double_count(0),
1924 1958      _java_call_count(0), _inner_loop_count(0),
1925 1959      _visited( Thread::current()->resource_area() ) { }
1926 1960  
1927 1961    void inc_call_count  () { _call_count  ++; }
1928 1962    void inc_float_count () { _float_count ++; }
1929 1963    void inc_double_count() { _double_count++; }
1930 1964    void inc_java_call_count() { _java_call_count++; }
1931 1965    void inc_inner_loop_count() { _inner_loop_count++; }
1932 1966  
1933 1967    int  get_call_count  () const { return _call_count  ; }
1934 1968    int  get_float_count () const { return _float_count ; }
1935 1969    int  get_double_count() const { return _double_count; }
1936 1970    int  get_java_call_count() const { return _java_call_count; }
1937 1971    int  get_inner_loop_count() const { return _inner_loop_count; }
1938 1972  };
1939 1973  
1940 1974  static bool oop_offset_is_sane(const TypeInstPtr* tp) {
1941 1975    ciInstanceKlass *k = tp->klass()->as_instance_klass();
1942 1976    // Make sure the offset goes inside the instance layout.
1943 1977    return k->contains_field_offset(tp->offset());
1944 1978    // Note that OffsetBot and OffsetTop are very negative.
1945 1979  }
1946 1980  
1947 1981  //------------------------------final_graph_reshaping_impl----------------------
1948 1982  // Implement items 1-5 from final_graph_reshaping below.
1949 1983  static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) {
1950 1984  
1951 1985    if ( n->outcnt() == 0 ) return; // dead node
1952 1986    uint nop = n->Opcode();
1953 1987  
1954 1988    // Check for 2-input instruction with "last use" on right input.
1955 1989    // Swap to left input.  Implements item (2).
1956 1990    if( n->req() == 3 &&          // two-input instruction
1957 1991        n->in(1)->outcnt() > 1 && // left use is NOT a last use
1958 1992        (!n->in(1)->is_Phi() || n->in(1)->in(2) != n) && // it is not data loop
1959 1993        n->in(2)->outcnt() == 1 &&// right use IS a last use
1960 1994        !n->in(2)->is_Con() ) {   // right use is not a constant
1961 1995      // Check for commutative opcode
1962 1996      switch( nop ) {
1963 1997      case Op_AddI:  case Op_AddF:  case Op_AddD:  case Op_AddL:
1964 1998      case Op_MaxI:  case Op_MinI:
1965 1999      case Op_MulI:  case Op_MulF:  case Op_MulD:  case Op_MulL:
1966 2000      case Op_AndL:  case Op_XorL:  case Op_OrL:
1967 2001      case Op_AndI:  case Op_XorI:  case Op_OrI: {
1968 2002        // Move "last use" input to left by swapping inputs
1969 2003        n->swap_edges(1, 2);
1970 2004        break;
1971 2005      }
1972 2006      default:
1973 2007        break;
1974 2008      }
1975 2009    }
1976 2010  
1977 2011  #ifdef ASSERT
1978 2012    if( n->is_Mem() ) {
1979 2013      Compile* C = Compile::current();
1980 2014      int alias_idx = C->get_alias_index(n->as_Mem()->adr_type());
1981 2015      assert( n->in(0) != NULL || alias_idx != Compile::AliasIdxRaw ||
1982 2016              // oop will be recorded in oop map if load crosses safepoint
1983 2017              n->is_Load() && (n->as_Load()->bottom_type()->isa_oopptr() ||
1984 2018                               LoadNode::is_immutable_value(n->in(MemNode::Address))),
1985 2019              "raw memory operations should have control edge");
1986 2020    }
1987 2021  #endif
1988 2022    // Count FPU ops and common calls, implements item (3)
1989 2023    switch( nop ) {
1990 2024    // Count all float operations that may use FPU
1991 2025    case Op_AddF:
1992 2026    case Op_SubF:
1993 2027    case Op_MulF:
1994 2028    case Op_DivF:
1995 2029    case Op_NegF:
1996 2030    case Op_ModF:
1997 2031    case Op_ConvI2F:
1998 2032    case Op_ConF:
1999 2033    case Op_CmpF:
2000 2034    case Op_CmpF3:
2001 2035    // case Op_ConvL2F: // longs are split into 32-bit halves
2002 2036      frc.inc_float_count();
2003 2037      break;
2004 2038  
2005 2039    case Op_ConvF2D:
2006 2040    case Op_ConvD2F:
2007 2041      frc.inc_float_count();
2008 2042      frc.inc_double_count();
2009 2043      break;
2010 2044  
2011 2045    // Count all double operations that may use FPU
2012 2046    case Op_AddD:
2013 2047    case Op_SubD:
2014 2048    case Op_MulD:
2015 2049    case Op_DivD:
2016 2050    case Op_NegD:
2017 2051    case Op_ModD:
2018 2052    case Op_ConvI2D:
2019 2053    case Op_ConvD2I:
2020 2054    // case Op_ConvL2D: // handled by leaf call
2021 2055    // case Op_ConvD2L: // handled by leaf call
2022 2056    case Op_ConD:
2023 2057    case Op_CmpD:
2024 2058    case Op_CmpD3:
2025 2059      frc.inc_double_count();
2026 2060      break;
2027 2061    case Op_Opaque1:              // Remove Opaque Nodes before matching
2028 2062    case Op_Opaque2:              // Remove Opaque Nodes before matching
2029 2063      n->subsume_by(n->in(1));
2030 2064      break;
2031 2065    case Op_CallStaticJava:
2032 2066    case Op_CallJava:
2033 2067    case Op_CallDynamicJava:
2034 2068      frc.inc_java_call_count(); // Count java call site;
2035 2069    case Op_CallRuntime:
2036 2070    case Op_CallLeaf:
2037 2071    case Op_CallLeafNoFP: {
2038 2072      assert( n->is_Call(), "" );
2039 2073      CallNode *call = n->as_Call();
2040 2074      // Count call sites where the FP mode bit would have to be flipped.
2041 2075      // Do not count uncommon runtime calls:
2042 2076      // uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking,
2043 2077      // _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ...
2044 2078      if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) {
2045 2079        frc.inc_call_count();   // Count the call site
2046 2080      } else {                  // See if uncommon argument is shared
2047 2081        Node *n = call->in(TypeFunc::Parms);
2048 2082        int nop = n->Opcode();
2049 2083        // Clone shared simple arguments to uncommon calls, item (1).
2050 2084        if( n->outcnt() > 1 &&
2051 2085            !n->is_Proj() &&
2052 2086            nop != Op_CreateEx &&
2053 2087            nop != Op_CheckCastPP &&
2054 2088            nop != Op_DecodeN &&
2055 2089            !n->is_Mem() ) {
2056 2090          Node *x = n->clone();
2057 2091          call->set_req( TypeFunc::Parms, x );
2058 2092        }
2059 2093      }
2060 2094      break;
2061 2095    }
2062 2096  
2063 2097    case Op_StoreD:
2064 2098    case Op_LoadD:
2065 2099    case Op_LoadD_unaligned:
2066 2100      frc.inc_double_count();
2067 2101      goto handle_mem;
2068 2102    case Op_StoreF:
2069 2103    case Op_LoadF:
2070 2104      frc.inc_float_count();
2071 2105      goto handle_mem;
2072 2106  
2073 2107    case Op_StoreB:
2074 2108    case Op_StoreC:
2075 2109    case Op_StoreCM:
2076 2110    case Op_StorePConditional:
2077 2111    case Op_StoreI:
2078 2112    case Op_StoreL:
2079 2113    case Op_StoreIConditional:
2080 2114    case Op_StoreLConditional:
2081 2115    case Op_CompareAndSwapI:
2082 2116    case Op_CompareAndSwapL:
2083 2117    case Op_CompareAndSwapP:
2084 2118    case Op_CompareAndSwapN:
2085 2119    case Op_StoreP:
2086 2120    case Op_StoreN:
2087 2121    case Op_LoadB:
2088 2122    case Op_LoadUB:
2089 2123    case Op_LoadUS:
2090 2124    case Op_LoadI:
2091 2125    case Op_LoadUI2L:
2092 2126    case Op_LoadKlass:
2093 2127    case Op_LoadNKlass:
2094 2128    case Op_LoadL:
2095 2129    case Op_LoadL_unaligned:
2096 2130    case Op_LoadPLocked:
2097 2131    case Op_LoadLLocked:
2098 2132    case Op_LoadP:
2099 2133    case Op_LoadN:
2100 2134    case Op_LoadRange:
2101 2135    case Op_LoadS: {
2102 2136    handle_mem:
2103 2137  #ifdef ASSERT
2104 2138      if( VerifyOptoOopOffsets ) {
2105 2139        assert( n->is_Mem(), "" );
2106 2140        MemNode *mem  = (MemNode*)n;
2107 2141        // Check to see if address types have grounded out somehow.
2108 2142        const TypeInstPtr *tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr();
2109 2143        assert( !tp || oop_offset_is_sane(tp), "" );
2110 2144      }
2111 2145  #endif
2112 2146      break;
2113 2147    }
2114 2148  
2115 2149    case Op_AddP: {               // Assert sane base pointers
2116 2150      Node *addp = n->in(AddPNode::Address);
2117 2151      assert( !addp->is_AddP() ||
2118 2152              addp->in(AddPNode::Base)->is_top() || // Top OK for allocation
2119 2153              addp->in(AddPNode::Base) == n->in(AddPNode::Base),
2120 2154              "Base pointers must match" );
2121 2155  #ifdef _LP64
2122 2156      if (UseCompressedOops &&
2123 2157          addp->Opcode() == Op_ConP &&
2124 2158          addp == n->in(AddPNode::Base) &&
2125 2159          n->in(AddPNode::Offset)->is_Con()) {
2126 2160        // Use addressing with narrow klass to load with offset on x86.
2127 2161        // On sparc loading 32-bits constant and decoding it have less
2128 2162        // instructions (4) then load 64-bits constant (7).
2129 2163        // Do this transformation here since IGVN will convert ConN back to ConP.
2130 2164        const Type* t = addp->bottom_type();
2131 2165        if (t->isa_oopptr()) {
2132 2166          Node* nn = NULL;
2133 2167  
2134 2168          // Look for existing ConN node of the same exact type.
2135 2169          Compile* C = Compile::current();
2136 2170          Node* r  = C->root();
2137 2171          uint cnt = r->outcnt();
2138 2172          for (uint i = 0; i < cnt; i++) {
2139 2173            Node* m = r->raw_out(i);
2140 2174            if (m!= NULL && m->Opcode() == Op_ConN &&
2141 2175                m->bottom_type()->make_ptr() == t) {
2142 2176              nn = m;
2143 2177              break;
2144 2178            }
2145 2179          }
2146 2180          if (nn != NULL) {
2147 2181            // Decode a narrow oop to match address
2148 2182            // [R12 + narrow_oop_reg<<3 + offset]
2149 2183            nn = new (C,  2) DecodeNNode(nn, t);
2150 2184            n->set_req(AddPNode::Base, nn);
2151 2185            n->set_req(AddPNode::Address, nn);
2152 2186            if (addp->outcnt() == 0) {
2153 2187              addp->disconnect_inputs(NULL);
2154 2188            }
2155 2189          }
2156 2190        }
2157 2191      }
2158 2192  #endif
2159 2193      break;
2160 2194    }
2161 2195  
2162 2196  #ifdef _LP64
2163 2197    case Op_CastPP:
2164 2198      if (n->in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks()) {
2165 2199        Compile* C = Compile::current();
2166 2200        Node* in1 = n->in(1);
2167 2201        const Type* t = n->bottom_type();
2168 2202        Node* new_in1 = in1->clone();
2169 2203        new_in1->as_DecodeN()->set_type(t);
2170 2204  
2171 2205        if (!Matcher::narrow_oop_use_complex_address()) {
2172 2206          //
2173 2207          // x86, ARM and friends can handle 2 adds in addressing mode
2174 2208          // and Matcher can fold a DecodeN node into address by using
2175 2209          // a narrow oop directly and do implicit NULL check in address:
2176 2210          //
2177 2211          // [R12 + narrow_oop_reg<<3 + offset]
2178 2212          // NullCheck narrow_oop_reg
2179 2213          //
2180 2214          // On other platforms (Sparc) we have to keep new DecodeN node and
2181 2215          // use it to do implicit NULL check in address:
2182 2216          //
2183 2217          // decode_not_null narrow_oop_reg, base_reg
2184 2218          // [base_reg + offset]
2185 2219          // NullCheck base_reg
2186 2220          //
2187 2221          // Pin the new DecodeN node to non-null path on these platform (Sparc)
2188 2222          // to keep the information to which NULL check the new DecodeN node
2189 2223          // corresponds to use it as value in implicit_null_check().
2190 2224          //
2191 2225          new_in1->set_req(0, n->in(0));
2192 2226        }
2193 2227  
2194 2228        n->subsume_by(new_in1);
2195 2229        if (in1->outcnt() == 0) {
2196 2230          in1->disconnect_inputs(NULL);
2197 2231        }
2198 2232      }
2199 2233      break;
2200 2234  
2201 2235    case Op_CmpP:
2202 2236      // Do this transformation here to preserve CmpPNode::sub() and
2203 2237      // other TypePtr related Ideal optimizations (for example, ptr nullness).
2204 2238      if (n->in(1)->is_DecodeN() || n->in(2)->is_DecodeN()) {
2205 2239        Node* in1 = n->in(1);
2206 2240        Node* in2 = n->in(2);
2207 2241        if (!in1->is_DecodeN()) {
2208 2242          in2 = in1;
2209 2243          in1 = n->in(2);
2210 2244        }
2211 2245        assert(in1->is_DecodeN(), "sanity");
2212 2246  
2213 2247        Compile* C = Compile::current();
2214 2248        Node* new_in2 = NULL;
2215 2249        if (in2->is_DecodeN()) {
2216 2250          new_in2 = in2->in(1);
2217 2251        } else if (in2->Opcode() == Op_ConP) {
2218 2252          const Type* t = in2->bottom_type();
2219 2253          if (t == TypePtr::NULL_PTR) {
2220 2254            // Don't convert CmpP null check into CmpN if compressed
2221 2255            // oops implicit null check is not generated.
2222 2256            // This will allow to generate normal oop implicit null check.
2223 2257            if (Matcher::gen_narrow_oop_implicit_null_checks())
2224 2258              new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
2225 2259            //
2226 2260            // This transformation together with CastPP transformation above
2227 2261            // will generated code for implicit NULL checks for compressed oops.
2228 2262            //
2229 2263            // The original code after Optimize()
2230 2264            //
2231 2265            //    LoadN memory, narrow_oop_reg
2232 2266            //    decode narrow_oop_reg, base_reg
2233 2267            //    CmpP base_reg, NULL
2234 2268            //    CastPP base_reg // NotNull
2235 2269            //    Load [base_reg + offset], val_reg
2236 2270            //
2237 2271            // after these transformations will be
2238 2272            //
2239 2273            //    LoadN memory, narrow_oop_reg
2240 2274            //    CmpN narrow_oop_reg, NULL
2241 2275            //    decode_not_null narrow_oop_reg, base_reg
2242 2276            //    Load [base_reg + offset], val_reg
2243 2277            //
2244 2278            // and the uncommon path (== NULL) will use narrow_oop_reg directly
2245 2279            // since narrow oops can be used in debug info now (see the code in
2246 2280            // final_graph_reshaping_walk()).
2247 2281            //
2248 2282            // At the end the code will be matched to
2249 2283            // on x86:
2250 2284            //
2251 2285            //    Load_narrow_oop memory, narrow_oop_reg
2252 2286            //    Load [R12 + narrow_oop_reg<<3 + offset], val_reg
2253 2287            //    NullCheck narrow_oop_reg
2254 2288            //
2255 2289            // and on sparc:
2256 2290            //
2257 2291            //    Load_narrow_oop memory, narrow_oop_reg
2258 2292            //    decode_not_null narrow_oop_reg, base_reg
2259 2293            //    Load [base_reg + offset], val_reg
2260 2294            //    NullCheck base_reg
2261 2295            //
2262 2296          } else if (t->isa_oopptr()) {
2263 2297            new_in2 = ConNode::make(C, t->make_narrowoop());
2264 2298          }
2265 2299        }
2266 2300        if (new_in2 != NULL) {
2267 2301          Node* cmpN = new (C, 3) CmpNNode(in1->in(1), new_in2);
2268 2302          n->subsume_by( cmpN );
2269 2303          if (in1->outcnt() == 0) {
2270 2304            in1->disconnect_inputs(NULL);
2271 2305          }
2272 2306          if (in2->outcnt() == 0) {
2273 2307            in2->disconnect_inputs(NULL);
2274 2308          }
2275 2309        }
2276 2310      }
2277 2311      break;
2278 2312  
2279 2313    case Op_DecodeN:
2280 2314      assert(!n->in(1)->is_EncodeP(), "should be optimized out");
2281 2315      // DecodeN could be pinned when it can't be fold into
2282 2316      // an address expression, see the code for Op_CastPP above.
2283 2317      assert(n->in(0) == NULL || !Matcher::narrow_oop_use_complex_address(), "no control");
2284 2318      break;
2285 2319  
2286 2320    case Op_EncodeP: {
2287 2321      Node* in1 = n->in(1);
2288 2322      if (in1->is_DecodeN()) {
2289 2323        n->subsume_by(in1->in(1));
2290 2324      } else if (in1->Opcode() == Op_ConP) {
2291 2325        Compile* C = Compile::current();
2292 2326        const Type* t = in1->bottom_type();
2293 2327        if (t == TypePtr::NULL_PTR) {
2294 2328          n->subsume_by(ConNode::make(C, TypeNarrowOop::NULL_PTR));
2295 2329        } else if (t->isa_oopptr()) {
2296 2330          n->subsume_by(ConNode::make(C, t->make_narrowoop()));
2297 2331        }
2298 2332      }
2299 2333      if (in1->outcnt() == 0) {
2300 2334        in1->disconnect_inputs(NULL);
2301 2335      }
2302 2336      break;
2303 2337    }
2304 2338  
2305 2339    case Op_Proj: {
2306 2340      if (OptimizeStringConcat) {
2307 2341        ProjNode* p = n->as_Proj();
2308 2342        if (p->_is_io_use) {
2309 2343          // Separate projections were used for the exception path which
2310 2344          // are normally removed by a late inline.  If it wasn't inlined
2311 2345          // then they will hang around and should just be replaced with
2312 2346          // the original one.
2313 2347          Node* proj = NULL;
2314 2348          // Replace with just one
2315 2349          for (SimpleDUIterator i(p->in(0)); i.has_next(); i.next()) {
2316 2350            Node *use = i.get();
2317 2351            if (use->is_Proj() && p != use && use->as_Proj()->_con == p->_con) {
2318 2352              proj = use;
2319 2353              break;
2320 2354            }
2321 2355          }
2322 2356          assert(p != NULL, "must be found");
2323 2357          p->subsume_by(proj);
2324 2358        }
2325 2359      }
2326 2360      break;
2327 2361    }
2328 2362  
2329 2363    case Op_Phi:
2330 2364      if (n->as_Phi()->bottom_type()->isa_narrowoop()) {
2331 2365        // The EncodeP optimization may create Phi with the same edges
2332 2366        // for all paths. It is not handled well by Register Allocator.
2333 2367        Node* unique_in = n->in(1);
2334 2368        assert(unique_in != NULL, "");
2335 2369        uint cnt = n->req();
2336 2370        for (uint i = 2; i < cnt; i++) {
2337 2371          Node* m = n->in(i);
2338 2372          assert(m != NULL, "");
2339 2373          if (unique_in != m)
2340 2374            unique_in = NULL;
2341 2375        }
2342 2376        if (unique_in != NULL) {
2343 2377          n->subsume_by(unique_in);
2344 2378        }
2345 2379      }
2346 2380      break;
2347 2381  
2348 2382  #endif
2349 2383  
2350 2384    case Op_ModI:
2351 2385      if (UseDivMod) {
2352 2386        // Check if a%b and a/b both exist
2353 2387        Node* d = n->find_similar(Op_DivI);
2354 2388        if (d) {
2355 2389          // Replace them with a fused divmod if supported
2356 2390          Compile* C = Compile::current();
2357 2391          if (Matcher::has_match_rule(Op_DivModI)) {
2358 2392            DivModINode* divmod = DivModINode::make(C, n);
2359 2393            d->subsume_by(divmod->div_proj());
2360 2394            n->subsume_by(divmod->mod_proj());
2361 2395          } else {
2362 2396            // replace a%b with a-((a/b)*b)
2363 2397            Node* mult = new (C, 3) MulINode(d, d->in(2));
2364 2398            Node* sub  = new (C, 3) SubINode(d->in(1), mult);
2365 2399            n->subsume_by( sub );
2366 2400          }
2367 2401        }
2368 2402      }
2369 2403      break;
2370 2404  
2371 2405    case Op_ModL:
2372 2406      if (UseDivMod) {
2373 2407        // Check if a%b and a/b both exist
2374 2408        Node* d = n->find_similar(Op_DivL);
2375 2409        if (d) {
2376 2410          // Replace them with a fused divmod if supported
2377 2411          Compile* C = Compile::current();
2378 2412          if (Matcher::has_match_rule(Op_DivModL)) {
2379 2413            DivModLNode* divmod = DivModLNode::make(C, n);
2380 2414            d->subsume_by(divmod->div_proj());
2381 2415            n->subsume_by(divmod->mod_proj());
2382 2416          } else {
2383 2417            // replace a%b with a-((a/b)*b)
2384 2418            Node* mult = new (C, 3) MulLNode(d, d->in(2));
2385 2419            Node* sub  = new (C, 3) SubLNode(d->in(1), mult);
2386 2420            n->subsume_by( sub );
2387 2421          }
2388 2422        }
2389 2423      }
2390 2424      break;
2391 2425  
2392 2426    case Op_Load16B:
2393 2427    case Op_Load8B:
2394 2428    case Op_Load4B:
2395 2429    case Op_Load8S:
2396 2430    case Op_Load4S:
2397 2431    case Op_Load2S:
2398 2432    case Op_Load8C:
2399 2433    case Op_Load4C:
2400 2434    case Op_Load2C:
2401 2435    case Op_Load4I:
2402 2436    case Op_Load2I:
2403 2437    case Op_Load2L:
2404 2438    case Op_Load4F:
2405 2439    case Op_Load2F:
2406 2440    case Op_Load2D:
2407 2441    case Op_Store16B:
2408 2442    case Op_Store8B:
2409 2443    case Op_Store4B:
2410 2444    case Op_Store8C:
2411 2445    case Op_Store4C:
2412 2446    case Op_Store2C:
2413 2447    case Op_Store4I:
2414 2448    case Op_Store2I:
2415 2449    case Op_Store2L:
2416 2450    case Op_Store4F:
2417 2451    case Op_Store2F:
2418 2452    case Op_Store2D:
2419 2453      break;
2420 2454  
2421 2455    case Op_PackB:
2422 2456    case Op_PackS:
2423 2457    case Op_PackC:
2424 2458    case Op_PackI:
2425 2459    case Op_PackF:
2426 2460    case Op_PackL:
2427 2461    case Op_PackD:
2428 2462      if (n->req()-1 > 2) {
2429 2463        // Replace many operand PackNodes with a binary tree for matching
2430 2464        PackNode* p = (PackNode*) n;
2431 2465        Node* btp = p->binaryTreePack(Compile::current(), 1, n->req());
2432 2466        n->subsume_by(btp);
2433 2467      }
2434 2468      break;
2435 2469    case Op_Loop:
2436 2470    case Op_CountedLoop:
2437 2471      if (n->as_Loop()->is_inner_loop()) {
2438 2472        frc.inc_inner_loop_count();
2439 2473      }
2440 2474      break;
2441 2475    default:
2442 2476      assert( !n->is_Call(), "" );
2443 2477      assert( !n->is_Mem(), "" );
2444 2478      break;
2445 2479    }
2446 2480  
2447 2481    // Collect CFG split points
2448 2482    if (n->is_MultiBranch())
2449 2483      frc._tests.push(n);
2450 2484  }
2451 2485  
2452 2486  //------------------------------final_graph_reshaping_walk---------------------
2453 2487  // Replacing Opaque nodes with their input in final_graph_reshaping_impl(),
2454 2488  // requires that the walk visits a node's inputs before visiting the node.
2455 2489  static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ) {
2456 2490    ResourceArea *area = Thread::current()->resource_area();
2457 2491    Unique_Node_List sfpt(area);
2458 2492  
2459 2493    frc._visited.set(root->_idx); // first, mark node as visited
2460 2494    uint cnt = root->req();
2461 2495    Node *n = root;
2462 2496    uint  i = 0;
2463 2497    while (true) {
2464 2498      if (i < cnt) {
2465 2499        // Place all non-visited non-null inputs onto stack
2466 2500        Node* m = n->in(i);
2467 2501        ++i;
2468 2502        if (m != NULL && !frc._visited.test_set(m->_idx)) {
2469 2503          if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL)
2470 2504            sfpt.push(m);
2471 2505          cnt = m->req();
2472 2506          nstack.push(n, i); // put on stack parent and next input's index
2473 2507          n = m;
2474 2508          i = 0;
2475 2509        }
2476 2510      } else {
2477 2511        // Now do post-visit work
2478 2512        final_graph_reshaping_impl( n, frc );
2479 2513        if (nstack.is_empty())
2480 2514          break;             // finished
2481 2515        n = nstack.node();   // Get node from stack
2482 2516        cnt = n->req();
2483 2517        i = nstack.index();
2484 2518        nstack.pop();        // Shift to the next node on stack
2485 2519      }
2486 2520    }
2487 2521  
2488 2522    // Skip next transformation if compressed oops are not used.
2489 2523    if (!UseCompressedOops || !Matcher::gen_narrow_oop_implicit_null_checks())
2490 2524      return;
2491 2525  
2492 2526    // Go over safepoints nodes to skip DecodeN nodes for debug edges.
2493 2527    // It could be done for an uncommon traps or any safepoints/calls
2494 2528    // if the DecodeN node is referenced only in a debug info.
2495 2529    while (sfpt.size() > 0) {
2496 2530      n = sfpt.pop();
2497 2531      JVMState *jvms = n->as_SafePoint()->jvms();
2498 2532      assert(jvms != NULL, "sanity");
2499 2533      int start = jvms->debug_start();
2500 2534      int end   = n->req();
2501 2535      bool is_uncommon = (n->is_CallStaticJava() &&
2502 2536                          n->as_CallStaticJava()->uncommon_trap_request() != 0);
2503 2537      for (int j = start; j < end; j++) {
2504 2538        Node* in = n->in(j);
2505 2539        if (in->is_DecodeN()) {
2506 2540          bool safe_to_skip = true;
2507 2541          if (!is_uncommon ) {
2508 2542            // Is it safe to skip?
2509 2543            for (uint i = 0; i < in->outcnt(); i++) {
2510 2544              Node* u = in->raw_out(i);
2511 2545              if (!u->is_SafePoint() ||
2512 2546                   u->is_Call() && u->as_Call()->has_non_debug_use(n)) {
2513 2547                safe_to_skip = false;
2514 2548              }
2515 2549            }
2516 2550          }
2517 2551          if (safe_to_skip) {
2518 2552            n->set_req(j, in->in(1));
2519 2553          }
2520 2554          if (in->outcnt() == 0) {
2521 2555            in->disconnect_inputs(NULL);
2522 2556          }
2523 2557        }
2524 2558      }
2525 2559    }
2526 2560  }
2527 2561  
2528 2562  //------------------------------final_graph_reshaping--------------------------
2529 2563  // Final Graph Reshaping.
2530 2564  //
2531 2565  // (1) Clone simple inputs to uncommon calls, so they can be scheduled late
2532 2566  //     and not commoned up and forced early.  Must come after regular
2533 2567  //     optimizations to avoid GVN undoing the cloning.  Clone constant
2534 2568  //     inputs to Loop Phis; these will be split by the allocator anyways.
2535 2569  //     Remove Opaque nodes.
2536 2570  // (2) Move last-uses by commutative operations to the left input to encourage
2537 2571  //     Intel update-in-place two-address operations and better register usage
2538 2572  //     on RISCs.  Must come after regular optimizations to avoid GVN Ideal
2539 2573  //     calls canonicalizing them back.
2540 2574  // (3) Count the number of double-precision FP ops, single-precision FP ops
2541 2575  //     and call sites.  On Intel, we can get correct rounding either by
2542 2576  //     forcing singles to memory (requires extra stores and loads after each
2543 2577  //     FP bytecode) or we can set a rounding mode bit (requires setting and
2544 2578  //     clearing the mode bit around call sites).  The mode bit is only used
2545 2579  //     if the relative frequency of single FP ops to calls is low enough.
2546 2580  //     This is a key transform for SPEC mpeg_audio.
2547 2581  // (4) Detect infinite loops; blobs of code reachable from above but not
2548 2582  //     below.  Several of the Code_Gen algorithms fail on such code shapes,
2549 2583  //     so we simply bail out.  Happens a lot in ZKM.jar, but also happens
2550 2584  //     from time to time in other codes (such as -Xcomp finalizer loops, etc).
2551 2585  //     Detection is by looking for IfNodes where only 1 projection is
2552 2586  //     reachable from below or CatchNodes missing some targets.
2553 2587  // (5) Assert for insane oop offsets in debug mode.
2554 2588  
2555 2589  bool Compile::final_graph_reshaping() {
2556 2590    // an infinite loop may have been eliminated by the optimizer,
2557 2591    // in which case the graph will be empty.
2558 2592    if (root()->req() == 1) {
2559 2593      record_method_not_compilable("trivial infinite loop");
2560 2594      return true;
2561 2595    }
2562 2596  
2563 2597    Final_Reshape_Counts frc;
2564 2598  
2565 2599    // Visit everybody reachable!
2566 2600    // Allocate stack of size C->unique()/2 to avoid frequent realloc
2567 2601    Node_Stack nstack(unique() >> 1);
2568 2602    final_graph_reshaping_walk(nstack, root(), frc);
2569 2603  
2570 2604    // Check for unreachable (from below) code (i.e., infinite loops).
2571 2605    for( uint i = 0; i < frc._tests.size(); i++ ) {
2572 2606      MultiBranchNode *n = frc._tests[i]->as_MultiBranch();
2573 2607      // Get number of CFG targets.
2574 2608      // Note that PCTables include exception targets after calls.
2575 2609      uint required_outcnt = n->required_outcnt();
2576 2610      if (n->outcnt() != required_outcnt) {
2577 2611        // Check for a few special cases.  Rethrow Nodes never take the
2578 2612        // 'fall-thru' path, so expected kids is 1 less.
2579 2613        if (n->is_PCTable() && n->in(0) && n->in(0)->in(0)) {
2580 2614          if (n->in(0)->in(0)->is_Call()) {
2581 2615            CallNode *call = n->in(0)->in(0)->as_Call();
2582 2616            if (call->entry_point() == OptoRuntime::rethrow_stub()) {
2583 2617              required_outcnt--;      // Rethrow always has 1 less kid
2584 2618            } else if (call->req() > TypeFunc::Parms &&
2585 2619                       call->is_CallDynamicJava()) {
2586 2620              // Check for null receiver. In such case, the optimizer has
2587 2621              // detected that the virtual call will always result in a null
2588 2622              // pointer exception. The fall-through projection of this CatchNode
2589 2623              // will not be populated.
2590 2624              Node *arg0 = call->in(TypeFunc::Parms);
2591 2625              if (arg0->is_Type() &&
2592 2626                  arg0->as_Type()->type()->higher_equal(TypePtr::NULL_PTR)) {
2593 2627                required_outcnt--;
2594 2628              }
2595 2629            } else if (call->entry_point() == OptoRuntime::new_array_Java() &&
2596 2630                       call->req() > TypeFunc::Parms+1 &&
2597 2631                       call->is_CallStaticJava()) {
2598 2632              // Check for negative array length. In such case, the optimizer has
2599 2633              // detected that the allocation attempt will always result in an
2600 2634              // exception. There is no fall-through projection of this CatchNode .
2601 2635              Node *arg1 = call->in(TypeFunc::Parms+1);
2602 2636              if (arg1->is_Type() &&
2603 2637                  arg1->as_Type()->type()->join(TypeInt::POS)->empty()) {
2604 2638                required_outcnt--;
2605 2639              }
2606 2640            }
2607 2641          }
2608 2642        }
2609 2643        // Recheck with a better notion of 'required_outcnt'
2610 2644        if (n->outcnt() != required_outcnt) {
2611 2645          record_method_not_compilable("malformed control flow");
2612 2646          return true;            // Not all targets reachable!
2613 2647        }
2614 2648      }
2615 2649      // Check that I actually visited all kids.  Unreached kids
2616 2650      // must be infinite loops.
2617 2651      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++)
2618 2652        if (!frc._visited.test(n->fast_out(j)->_idx)) {
2619 2653          record_method_not_compilable("infinite loop");
2620 2654          return true;            // Found unvisited kid; must be unreach
2621 2655        }
2622 2656    }
2623 2657  
2624 2658    // If original bytecodes contained a mixture of floats and doubles
2625 2659    // check if the optimizer has made it homogenous, item (3).
2626 2660    if( Use24BitFPMode && Use24BitFP && UseSSE == 0 &&
2627 2661        frc.get_float_count() > 32 &&
2628 2662        frc.get_double_count() == 0 &&
2629 2663        (10 * frc.get_call_count() < frc.get_float_count()) ) {
2630 2664      set_24_bit_selection_and_mode( false,  true );
2631 2665    }
2632 2666  
2633 2667    set_java_calls(frc.get_java_call_count());
2634 2668    set_inner_loops(frc.get_inner_loop_count());
2635 2669  
2636 2670    // No infinite loops, no reason to bail out.
2637 2671    return false;
2638 2672  }
2639 2673  
2640 2674  //-----------------------------too_many_traps----------------------------------
2641 2675  // Report if there are too many traps at the current method and bci.
2642 2676  // Return true if there was a trap, and/or PerMethodTrapLimit is exceeded.
2643 2677  bool Compile::too_many_traps(ciMethod* method,
2644 2678                               int bci,
2645 2679                               Deoptimization::DeoptReason reason) {
2646 2680    ciMethodData* md = method->method_data();
2647 2681    if (md->is_empty()) {
2648 2682      // Assume the trap has not occurred, or that it occurred only
2649 2683      // because of a transient condition during start-up in the interpreter.
2650 2684      return false;
2651 2685    }
2652 2686    if (md->has_trap_at(bci, reason) != 0) {
2653 2687      // Assume PerBytecodeTrapLimit==0, for a more conservative heuristic.
2654 2688      // Also, if there are multiple reasons, or if there is no per-BCI record,
2655 2689      // assume the worst.
2656 2690      if (log())
2657 2691        log()->elem("observe trap='%s' count='%d'",
2658 2692                    Deoptimization::trap_reason_name(reason),
2659 2693                    md->trap_count(reason));
2660 2694      return true;
2661 2695    } else {
2662 2696      // Ignore method/bci and see if there have been too many globally.
2663 2697      return too_many_traps(reason, md);
2664 2698    }
2665 2699  }
2666 2700  
2667 2701  // Less-accurate variant which does not require a method and bci.
2668 2702  bool Compile::too_many_traps(Deoptimization::DeoptReason reason,
2669 2703                               ciMethodData* logmd) {
2670 2704   if (trap_count(reason) >= (uint)PerMethodTrapLimit) {
2671 2705      // Too many traps globally.
2672 2706      // Note that we use cumulative trap_count, not just md->trap_count.
2673 2707      if (log()) {
2674 2708        int mcount = (logmd == NULL)? -1: (int)logmd->trap_count(reason);
2675 2709        log()->elem("observe trap='%s' count='0' mcount='%d' ccount='%d'",
2676 2710                    Deoptimization::trap_reason_name(reason),
2677 2711                    mcount, trap_count(reason));
2678 2712      }
2679 2713      return true;
2680 2714    } else {
2681 2715      // The coast is clear.
2682 2716      return false;
2683 2717    }
2684 2718  }
2685 2719  
2686 2720  //--------------------------too_many_recompiles--------------------------------
2687 2721  // Report if there are too many recompiles at the current method and bci.
2688 2722  // Consults PerBytecodeRecompilationCutoff and PerMethodRecompilationCutoff.
2689 2723  // Is not eager to return true, since this will cause the compiler to use
2690 2724  // Action_none for a trap point, to avoid too many recompilations.
2691 2725  bool Compile::too_many_recompiles(ciMethod* method,
2692 2726                                    int bci,
2693 2727                                    Deoptimization::DeoptReason reason) {
2694 2728    ciMethodData* md = method->method_data();
2695 2729    if (md->is_empty()) {
2696 2730      // Assume the trap has not occurred, or that it occurred only
2697 2731      // because of a transient condition during start-up in the interpreter.
2698 2732      return false;
2699 2733    }
2700 2734    // Pick a cutoff point well within PerBytecodeRecompilationCutoff.
2701 2735    uint bc_cutoff = (uint) PerBytecodeRecompilationCutoff / 8;
2702 2736    uint m_cutoff  = (uint) PerMethodRecompilationCutoff / 2 + 1;  // not zero
2703 2737    Deoptimization::DeoptReason per_bc_reason
2704 2738      = Deoptimization::reason_recorded_per_bytecode_if_any(reason);
2705 2739    if ((per_bc_reason == Deoptimization::Reason_none
2706 2740         || md->has_trap_at(bci, reason) != 0)
2707 2741        // The trap frequency measure we care about is the recompile count:
2708 2742        && md->trap_recompiled_at(bci)
2709 2743        && md->overflow_recompile_count() >= bc_cutoff) {
2710 2744      // Do not emit a trap here if it has already caused recompilations.
2711 2745      // Also, if there are multiple reasons, or if there is no per-BCI record,
2712 2746      // assume the worst.
2713 2747      if (log())
2714 2748        log()->elem("observe trap='%s recompiled' count='%d' recompiles2='%d'",
2715 2749                    Deoptimization::trap_reason_name(reason),
2716 2750                    md->trap_count(reason),
2717 2751                    md->overflow_recompile_count());
2718 2752      return true;
2719 2753    } else if (trap_count(reason) != 0
2720 2754               && decompile_count() >= m_cutoff) {
2721 2755      // Too many recompiles globally, and we have seen this sort of trap.
2722 2756      // Use cumulative decompile_count, not just md->decompile_count.
2723 2757      if (log())
2724 2758        log()->elem("observe trap='%s' count='%d' mcount='%d' decompiles='%d' mdecompiles='%d'",
2725 2759                    Deoptimization::trap_reason_name(reason),
2726 2760                    md->trap_count(reason), trap_count(reason),
2727 2761                    md->decompile_count(), decompile_count());
2728 2762      return true;
2729 2763    } else {
2730 2764      // The coast is clear.
2731 2765      return false;
2732 2766    }
2733 2767  }
2734 2768  
2735 2769  
2736 2770  #ifndef PRODUCT
2737 2771  //------------------------------verify_graph_edges---------------------------
2738 2772  // Walk the Graph and verify that there is a one-to-one correspondence
2739 2773  // between Use-Def edges and Def-Use edges in the graph.
2740 2774  void Compile::verify_graph_edges(bool no_dead_code) {
2741 2775    if (VerifyGraphEdges) {
2742 2776      ResourceArea *area = Thread::current()->resource_area();
2743 2777      Unique_Node_List visited(area);
2744 2778      // Call recursive graph walk to check edges
2745 2779      _root->verify_edges(visited);
2746 2780      if (no_dead_code) {
2747 2781        // Now make sure that no visited node is used by an unvisited node.
2748 2782        bool dead_nodes = 0;
2749 2783        Unique_Node_List checked(area);
2750 2784        while (visited.size() > 0) {
2751 2785          Node* n = visited.pop();
2752 2786          checked.push(n);
2753 2787          for (uint i = 0; i < n->outcnt(); i++) {
2754 2788            Node* use = n->raw_out(i);
2755 2789            if (checked.member(use))  continue;  // already checked
2756 2790            if (visited.member(use))  continue;  // already in the graph
2757 2791            if (use->is_Con())        continue;  // a dead ConNode is OK
2758 2792            // At this point, we have found a dead node which is DU-reachable.
2759 2793            if (dead_nodes++ == 0)
2760 2794              tty->print_cr("*** Dead nodes reachable via DU edges:");
2761 2795            use->dump(2);
2762 2796            tty->print_cr("---");
2763 2797            checked.push(use);  // No repeats; pretend it is now checked.
2764 2798          }
2765 2799        }
2766 2800        assert(dead_nodes == 0, "using nodes must be reachable from root");
2767 2801      }
2768 2802    }
2769 2803  }
2770 2804  #endif
2771 2805  
2772 2806  // The Compile object keeps track of failure reasons separately from the ciEnv.
2773 2807  // This is required because there is not quite a 1-1 relation between the
2774 2808  // ciEnv and its compilation task and the Compile object.  Note that one
2775 2809  // ciEnv might use two Compile objects, if C2Compiler::compile_method decides
2776 2810  // to backtrack and retry without subsuming loads.  Other than this backtracking
2777 2811  // behavior, the Compile's failure reason is quietly copied up to the ciEnv
2778 2812  // by the logic in C2Compiler.
2779 2813  void Compile::record_failure(const char* reason) {
2780 2814    if (log() != NULL) {
2781 2815      log()->elem("failure reason='%s' phase='compile'", reason);
2782 2816    }
2783 2817    if (_failure_reason == NULL) {
2784 2818      // Record the first failure reason.
2785 2819      _failure_reason = reason;
2786 2820    }
2787 2821    if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
2788 2822      C->print_method(_failure_reason);
2789 2823    }
2790 2824    _root = NULL;  // flush the graph, too
2791 2825  }
2792 2826  
2793 2827  Compile::TracePhase::TracePhase(const char* name, elapsedTimer* accumulator, bool dolog)
2794 2828    : TraceTime(NULL, accumulator, false NOT_PRODUCT( || TimeCompiler ), false)
2795 2829  {
2796 2830    if (dolog) {
2797 2831      C = Compile::current();
2798 2832      _log = C->log();
2799 2833    } else {
2800 2834      C = NULL;
2801 2835      _log = NULL;
2802 2836    }
2803 2837    if (_log != NULL) {
2804 2838      _log->begin_head("phase name='%s' nodes='%d'", name, C->unique());
2805 2839      _log->stamp();
2806 2840      _log->end_head();
2807 2841    }
2808 2842  }
2809 2843  
2810 2844  Compile::TracePhase::~TracePhase() {
2811 2845    if (_log != NULL) {
2812 2846      _log->done("phase nodes='%d'", C->unique());
2813 2847    }
2814 2848  }

↓ open down ↓

2069 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX