hotspot Wdiff src/share/vm/opto/loopopts.cpp

Print this page

Split	Split	Close
Expand all
Collapse all

          --- old/src/share/vm/opto/loopopts.cpp
          +++ new/src/share/vm/opto/loopopts.cpp

   1    1  /*
   2    2   * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
   3    3   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4    4   *
   5    5   * This code is free software; you can redistribute it and/or modify it
   6    6   * under the terms of the GNU General Public License version 2 only, as
   7    7   * published by the Free Software Foundation.
   8    8   *
   9    9   * This code is distributed in the hope that it will be useful, but WITHOUT
  10   10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11   11   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12   12   * version 2 for more details (a copy is included in the LICENSE file that
  13   13   * accompanied this code).
  14   14   *
  15   15   * You should have received a copy of the GNU General Public License version
  16   16   * 2 along with this work; if not, write to the Free Software Foundation,
  17   17   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18   18   *
  19   19   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20   20   * or visit www.oracle.com if you need additional information or have any
  21   21   * questions.
  22   22   *
  23   23   */
  24   24  
  25   25  #include "precompiled.hpp"
  26   26  #include "memory/allocation.inline.hpp"
  27   27  #include "opto/addnode.hpp"
  28   28  #include "opto/connode.hpp"
  29   29  #include "opto/divnode.hpp"
  30   30  #include "opto/loopnode.hpp"
  31   31  #include "opto/matcher.hpp"
  32   32  #include "opto/mulnode.hpp"
  33   33  #include "opto/rootnode.hpp"
  34   34  #include "opto/subnode.hpp"
  35   35  
  36   36  //=============================================================================
  37   37  //------------------------------split_thru_phi---------------------------------
  38   38  // Split Node 'n' through merge point if there is enough win.
  39   39  Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
  40   40    if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) {
  41   41      // ConvI2L may have type information on it which is unsafe to push up
  42   42      // so disable this for now
  43   43      return NULL;
  44   44    }
  45   45    int wins = 0;
  46   46    assert(!n->is_CFG(), "");
  47   47    assert(region->is_Region(), "");
  48   48  
  49   49    const Type* type = n->bottom_type();
  50   50    const TypeOopPtr *t_oop = _igvn.type(n)->isa_oopptr();
  51   51    Node *phi;
  52   52    if (t_oop != NULL && t_oop->is_known_instance_field()) {
  53   53      int iid    = t_oop->instance_id();
  54   54      int index  = C->get_alias_index(t_oop);
  55   55      int offset = t_oop->offset();
  56   56      phi = new (C) PhiNode(region, type, NULL, iid, index, offset);
  57   57    } else {
  58   58      phi = PhiNode::make_blank(region, n);
  59   59    }
  60   60    uint old_unique = C->unique();
  61   61    for (uint i = 1; i < region->req(); i++) {
  62   62      Node *x;
  63   63      Node* the_clone = NULL;
  64   64      if (region->in(i) == C->top()) {
  65   65        x = C->top();             // Dead path?  Use a dead data op
  66   66      } else {
  67   67        x = n->clone();           // Else clone up the data op
  68   68        the_clone = x;            // Remember for possible deletion.
  69   69        // Alter data node to use pre-phi inputs
  70   70        if (n->in(0) == region)
  71   71          x->set_req( 0, region->in(i) );
  72   72        for (uint j = 1; j < n->req(); j++) {
  73   73          Node *in = n->in(j);
  74   74          if (in->is_Phi() && in->in(0) == region)
  75   75            x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
  76   76        }
  77   77      }
  78   78      // Check for a 'win' on some paths
  79   79      const Type *t = x->Value(&_igvn);
  80   80  
  81   81      bool singleton = t->singleton();
  82   82  
  83   83      // A TOP singleton indicates that there are no possible values incoming
  84   84      // along a particular edge. In most cases, this is OK, and the Phi will
  85   85      // be eliminated later in an Ideal call. However, we can't allow this to
  86   86      // happen if the singleton occurs on loop entry, as the elimination of
  87   87      // the PhiNode may cause the resulting node to migrate back to a previous
  88   88      // loop iteration.
  89   89      if (singleton && t == Type::TOP) {
  90   90        // Is_Loop() == false does not confirm the absence of a loop (e.g., an
  91   91        // irreducible loop may not be indicated by an affirmative is_Loop());
  92   92        // therefore, the only top we can split thru a phi is on a backedge of
  93   93        // a loop.
  94   94        singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
  95   95      }
  96   96  
  97   97      if (singleton) {
  98   98        wins++;
  99   99        x = ((PhaseGVN&)_igvn).makecon(t);
 100  100      } else {
 101  101        // We now call Identity to try to simplify the cloned node.
 102  102        // Note that some Identity methods call phase->type(this).
 103  103        // Make sure that the type array is big enough for
 104  104        // our new node, even though we may throw the node away.
 105  105        // (Note: This tweaking with igvn only works because x is a new node.)
 106  106        _igvn.set_type(x, t);
 107  107        // If x is a TypeNode, capture any more-precise type permanently into Node
 108  108        // otherwise it will be not updated during igvn->transform since
 109  109        // igvn->type(x) is set to x->Value() already.
 110  110        x->raise_bottom_type(t);
 111  111        Node *y = x->Identity(&_igvn);
 112  112        if (y != x) {
 113  113          wins++;
 114  114          x = y;
 115  115        } else {
 116  116          y = _igvn.hash_find(x);
 117  117          if (y) {
 118  118            wins++;
 119  119            x = y;
 120  120          } else {
 121  121            // Else x is a new node we are keeping
 122  122            // We do not need register_new_node_with_optimizer
 123  123            // because set_type has already been called.
 124  124            _igvn._worklist.push(x);
 125  125          }
 126  126        }
 127  127      }
 128  128      if (x != the_clone && the_clone != NULL)
 129  129        _igvn.remove_dead_node(the_clone);
 130  130      phi->set_req( i, x );
 131  131    }
 132  132    // Too few wins?
 133  133    if (wins <= policy) {
 134  134      _igvn.remove_dead_node(phi);
 135  135      return NULL;
 136  136    }
 137  137  
 138  138    // Record Phi
 139  139    register_new_node( phi, region );
 140  140  
 141  141    for (uint i2 = 1; i2 < phi->req(); i2++) {
 142  142      Node *x = phi->in(i2);
 143  143      // If we commoned up the cloned 'x' with another existing Node,
 144  144      // the existing Node picks up a new use.  We need to make the
 145  145      // existing Node occur higher up so it dominates its uses.
 146  146      Node *old_ctrl;
 147  147      IdealLoopTree *old_loop;
 148  148  
 149  149      if (x->is_Con()) {
 150  150        // Constant's control is always root.
 151  151        set_ctrl(x, C->root());
 152  152        continue;
 153  153      }
 154  154      // The occasional new node
 155  155      if (x->_idx >= old_unique) {     // Found a new, unplaced node?
 156  156        old_ctrl = NULL;
 157  157        old_loop = NULL;               // Not in any prior loop
 158  158      } else {
 159  159        old_ctrl = get_ctrl(x);
 160  160        old_loop = get_loop(old_ctrl); // Get prior loop
 161  161      }
 162  162      // New late point must dominate new use
 163  163      Node *new_ctrl = dom_lca(old_ctrl, region->in(i2));
 164  164      if (new_ctrl == old_ctrl) // Nothing is changed
 165  165        continue;
 166  166  
 167  167      IdealLoopTree *new_loop = get_loop(new_ctrl);
 168  168  
 169  169      // Don't move x into a loop if its uses are
 170  170      // outside of loop. Otherwise x will be cloned
 171  171      // for each use outside of this loop.
 172  172      IdealLoopTree *use_loop = get_loop(region);
 173  173      if (!new_loop->is_member(use_loop) &&
 174  174          (old_loop == NULL || !new_loop->is_member(old_loop))) {
 175  175        // Take early control, later control will be recalculated
 176  176        // during next iteration of loop optimizations.
 177  177        new_ctrl = get_early_ctrl(x);
 178  178        new_loop = get_loop(new_ctrl);
 179  179      }
 180  180      // Set new location
 181  181      set_ctrl(x, new_ctrl);
 182  182      // If changing loop bodies, see if we need to collect into new body
 183  183      if (old_loop != new_loop) {
 184  184        if (old_loop && !old_loop->_child)
 185  185          old_loop->_body.yank(x);
 186  186        if (!new_loop->_child)
 187  187          new_loop->_body.push(x);  // Collect body info
 188  188      }
 189  189    }
 190  190  
 191  191    return phi;
 192  192  }
 193  193  
 194  194  //------------------------------dominated_by------------------------------------
 195  195  // Replace the dominated test with an obvious true or false.  Place it on the
 196  196  // IGVN worklist for later cleanup.  Move control-dependent data Nodes on the
 197  197  // live path up to the dominating control.
 198  198  void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff, bool flip, bool exclude_loop_predicate ) {
 199  199  #ifndef PRODUCT
 200  200    if (VerifyLoopOptimizations && PrintOpto) tty->print_cr("dominating test");
 201  201  #endif
 202  202  
 203  203  
 204  204    // prevdom is the dominating projection of the dominating test.
 205  205    assert( iff->is_If(), "" );
 206  206    assert( iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added");
 207  207    int pop = prevdom->Opcode();
 208  208    assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
 209  209    if (flip) {
 210  210      if (pop == Op_IfTrue)
 211  211        pop = Op_IfFalse;
 212  212      else
 213  213        pop = Op_IfTrue;
 214  214    }
 215  215    // 'con' is set to true or false to kill the dominated test.
 216  216    Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO);
 217  217    set_ctrl(con, C->root()); // Constant gets a new use
 218  218    // Hack the dominated test
 219  219    _igvn.replace_input_of(iff, 1, con);
 220  220  
 221  221    // If I dont have a reachable TRUE and FALSE path following the IfNode then
 222  222    // I can assume this path reaches an infinite loop.  In this case it's not
 223  223    // important to optimize the data Nodes - either the whole compilation will
 224  224    // be tossed or this path (and all data Nodes) will go dead.
 225  225    if (iff->outcnt() != 2) return;
 226  226  
 227  227    // Make control-dependent data Nodes on the live path (path that will remain
 228  228    // once the dominated IF is removed) become control-dependent on the
 229  229    // dominating projection.
 230  230    Node* dp = iff->as_If()->proj_out(pop == Op_IfTrue);
 231  231  
 232  232    // Loop predicates may have depending checks which should not
 233  233    // be skipped. For example, range check predicate has two checks
 234  234    // for lower and upper bounds.
 235  235    ProjNode* unc_proj = iff->as_If()->proj_out(1 - dp->as_Proj()->_con)->as_Proj();
 236  236    if (exclude_loop_predicate &&
 237  237        is_uncommon_trap_proj(unc_proj, Deoptimization::Reason_predicate))
 238  238      return; // Let IGVN transformation change control dependence.
 239  239  
 240  240    IdealLoopTree *old_loop = get_loop(dp);
 241  241  
 242  242    for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
 243  243      Node* cd = dp->fast_out(i); // Control-dependent node
 244  244      if (cd->depends_only_on_test()) {
 245  245        assert(cd->in(0) == dp, "");
 246  246        _igvn.replace_input_of(cd, 0, prevdom);
 247  247        set_early_ctrl(cd);
 248  248        IdealLoopTree *new_loop = get_loop(get_ctrl(cd));
 249  249        if (old_loop != new_loop) {
 250  250          if (!old_loop->_child) old_loop->_body.yank(cd);
 251  251          if (!new_loop->_child) new_loop->_body.push(cd);
 252  252        }
 253  253        --i;
 254  254        --imax;
 255  255      }
 256  256    }
 257  257  }
 258  258  
 259  259  //------------------------------has_local_phi_input----------------------------
 260  260  // Return TRUE if 'n' has Phi inputs from its local block and no other
 261  261  // block-local inputs (all non-local-phi inputs come from earlier blocks)
 262  262  Node *PhaseIdealLoop::has_local_phi_input( Node *n ) {
 263  263    Node *n_ctrl = get_ctrl(n);
 264  264    // See if some inputs come from a Phi in this block, or from before
 265  265    // this block.
 266  266    uint i;
 267  267    for( i = 1; i < n->req(); i++ ) {
 268  268      Node *phi = n->in(i);
 269  269      if( phi->is_Phi() && phi->in(0) == n_ctrl )
 270  270        break;
 271  271    }
 272  272    if( i >= n->req() )
 273  273      return NULL;                // No Phi inputs; nowhere to clone thru
 274  274  
 275  275    // Check for inputs created between 'n' and the Phi input.  These
 276  276    // must split as well; they have already been given the chance
 277  277    // (courtesy of a post-order visit) and since they did not we must
 278  278    // recover the 'cost' of splitting them by being very profitable
 279  279    // when splitting 'n'.  Since this is unlikely we simply give up.
 280  280    for( i = 1; i < n->req(); i++ ) {
 281  281      Node *m = n->in(i);
 282  282      if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) {
 283  283        // We allow the special case of AddP's with no local inputs.
 284  284        // This allows us to split-up address expressions.
 285  285        if (m->is_AddP() &&
 286  286            get_ctrl(m->in(2)) != n_ctrl &&
 287  287            get_ctrl(m->in(3)) != n_ctrl) {
 288  288          // Move the AddP up to dominating point
 289  289          set_ctrl_and_loop(m, find_non_split_ctrl(idom(n_ctrl)));
 290  290          continue;
 291  291        }
 292  292        return NULL;
 293  293      }
 294  294    }
 295  295  
 296  296    return n_ctrl;
 297  297  }
 298  298  
 299  299  //------------------------------remix_address_expressions----------------------
 300  300  // Rework addressing expressions to get the most loop-invariant stuff
 301  301  // moved out.  We'd like to do all associative operators, but it's especially
 302  302  // important (common) to do address expressions.
 303  303  Node *PhaseIdealLoop::remix_address_expressions( Node *n ) {
 304  304    if (!has_ctrl(n))  return NULL;
 305  305    Node *n_ctrl = get_ctrl(n);
 306  306    IdealLoopTree *n_loop = get_loop(n_ctrl);
 307  307  
 308  308    // See if 'n' mixes loop-varying and loop-invariant inputs and
 309  309    // itself is loop-varying.
 310  310  
 311  311    // Only interested in binary ops (and AddP)
 312  312    if( n->req() < 3 || n->req() > 4 ) return NULL;
 313  313  
 314  314    Node *n1_ctrl = get_ctrl(n->in(                    1));
 315  315    Node *n2_ctrl = get_ctrl(n->in(                    2));
 316  316    Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3));
 317  317    IdealLoopTree *n1_loop = get_loop( n1_ctrl );
 318  318    IdealLoopTree *n2_loop = get_loop( n2_ctrl );
 319  319    IdealLoopTree *n3_loop = get_loop( n3_ctrl );
 320  320  
 321  321    // Does one of my inputs spin in a tighter loop than self?
 322  322    if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) ||
 323  323        (n_loop->is_member( n2_loop ) && n_loop != n2_loop) ||
 324  324        (n_loop->is_member( n3_loop ) && n_loop != n3_loop) )
 325  325      return NULL;                // Leave well enough alone
 326  326  
 327  327    // Is at least one of my inputs loop-invariant?
 328  328    if( n1_loop == n_loop &&
 329  329        n2_loop == n_loop &&
 330  330        n3_loop == n_loop )
 331  331      return NULL;                // No loop-invariant inputs
 332  332  
 333  333  
 334  334    int n_op = n->Opcode();
 335  335  
 336  336    // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2).
 337  337    if( n_op == Op_LShiftI ) {
 338  338      // Scale is loop invariant
 339  339      Node *scale = n->in(2);
 340  340      Node *scale_ctrl = get_ctrl(scale);
 341  341      IdealLoopTree *scale_loop = get_loop(scale_ctrl );
 342  342      if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) )
 343  343        return NULL;
 344  344      const TypeInt *scale_t = scale->bottom_type()->isa_int();
 345  345      if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 )
 346  346        return NULL;              // Dont bother with byte/short masking
 347  347      // Add must vary with loop (else shift would be loop-invariant)
 348  348      Node *add = n->in(1);
 349  349      Node *add_ctrl = get_ctrl(add);
 350  350      IdealLoopTree *add_loop = get_loop(add_ctrl);
 351  351      //assert( n_loop == add_loop, "" );
 352  352      if( n_loop != add_loop ) return NULL;  // happens w/ evil ZKM loops
 353  353  
 354  354      // Convert I-V into I+ (0-V); same for V-I
 355  355      if( add->Opcode() == Op_SubI &&
 356  356          _igvn.type( add->in(1) ) != TypeInt::ZERO ) {
 357  357        Node *zero = _igvn.intcon(0);
 358  358        set_ctrl(zero, C->root());
 359  359        Node *neg = new (C) SubINode( _igvn.intcon(0), add->in(2) );
 360  360        register_new_node( neg, get_ctrl(add->in(2) ) );
 361  361        add = new (C) AddINode( add->in(1), neg );
 362  362        register_new_node( add, add_ctrl );
 363  363      }
 364  364      if( add->Opcode() != Op_AddI ) return NULL;
 365  365      // See if one add input is loop invariant
 366  366      Node *add_var = add->in(1);
 367  367      Node *add_var_ctrl = get_ctrl(add_var);
 368  368      IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
 369  369      Node *add_invar = add->in(2);
 370  370      Node *add_invar_ctrl = get_ctrl(add_invar);
 371  371      IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl );
 372  372      if( add_var_loop == n_loop ) {
 373  373      } else if( add_invar_loop == n_loop ) {
 374  374        // Swap to find the invariant part
 375  375        add_invar = add_var;
 376  376        add_invar_ctrl = add_var_ctrl;
 377  377        add_invar_loop = add_var_loop;
 378  378        add_var = add->in(2);
 379  379        Node *add_var_ctrl = get_ctrl(add_var);
 380  380        IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
 381  381      } else                      // Else neither input is loop invariant
 382  382        return NULL;
 383  383      if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) )
 384  384        return NULL;              // No invariant part of the add?
 385  385  
 386  386      // Yes!  Reshape address expression!
 387  387      Node *inv_scale = new (C) LShiftINode( add_invar, scale );
 388  388      Node *inv_scale_ctrl =
 389  389        dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ?
 390  390        add_invar_ctrl : scale_ctrl;
 391  391      register_new_node( inv_scale, inv_scale_ctrl );
 392  392      Node *var_scale = new (C) LShiftINode( add_var, scale );
 393  393      register_new_node( var_scale, n_ctrl );
 394  394      Node *var_add = new (C) AddINode( var_scale, inv_scale );
 395  395      register_new_node( var_add, n_ctrl );
 396  396      _igvn.replace_node( n, var_add );
 397  397      return var_add;
 398  398    }
 399  399  
 400  400    // Replace (I+V) with (V+I)
 401  401    if( n_op == Op_AddI ||
 402  402        n_op == Op_AddL ||
 403  403        n_op == Op_AddF ||
 404  404        n_op == Op_AddD ||
 405  405        n_op == Op_MulI ||
 406  406        n_op == Op_MulL ||
 407  407        n_op == Op_MulF ||
 408  408        n_op == Op_MulD ) {
 409  409      if( n2_loop == n_loop ) {
 410  410        assert( n1_loop != n_loop, "" );
 411  411        n->swap_edges(1, 2);
 412  412      }
 413  413    }
 414  414  
 415  415    // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V),
 416  416    // but not if I2 is a constant.
 417  417    if( n_op == Op_AddP ) {
 418  418      if( n2_loop == n_loop && n3_loop != n_loop ) {
 419  419        if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) {
 420  420          Node *n22_ctrl = get_ctrl(n->in(2)->in(2));
 421  421          Node *n23_ctrl = get_ctrl(n->in(2)->in(3));
 422  422          IdealLoopTree *n22loop = get_loop( n22_ctrl );
 423  423          IdealLoopTree *n23_loop = get_loop( n23_ctrl );
 424  424          if( n22loop != n_loop && n22loop->is_member(n_loop) &&
 425  425              n23_loop == n_loop ) {
 426  426            Node *add1 = new (C) AddPNode( n->in(1), n->in(2)->in(2), n->in(3) );
 427  427            // Stuff new AddP in the loop preheader
 428  428            register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
 429  429            Node *add2 = new (C) AddPNode( n->in(1), add1, n->in(2)->in(3) );
 430  430            register_new_node( add2, n_ctrl );
 431  431            _igvn.replace_node( n, add2 );
 432  432            return add2;
 433  433          }
 434  434        }
 435  435      }
 436  436  
 437  437      // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V)
 438  438      if( n2_loop != n_loop && n3_loop == n_loop ) {
 439  439        if( n->in(3)->Opcode() == Op_AddI ) {
 440  440          Node *V = n->in(3)->in(1);
 441  441          Node *I = n->in(3)->in(2);
 442  442          if( is_member(n_loop,get_ctrl(V)) ) {
 443  443          } else {
 444  444            Node *tmp = V; V = I; I = tmp;
 445  445          }
 446  446          if( !is_member(n_loop,get_ctrl(I)) ) {
 447  447            Node *add1 = new (C) AddPNode( n->in(1), n->in(2), I );
 448  448            // Stuff new AddP in the loop preheader
 449  449            register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
 450  450            Node *add2 = new (C) AddPNode( n->in(1), add1, V );
 451  451            register_new_node( add2, n_ctrl );
 452  452            _igvn.replace_node( n, add2 );
 453  453            return add2;
 454  454          }
 455  455        }
 456  456      }
 457  457    }
 458  458  
 459  459    return NULL;
 460  460  }
 461  461  
 462  462  //------------------------------conditional_move-------------------------------
 463  463  // Attempt to replace a Phi with a conditional move.  We have some pretty
 464  464  // strict profitability requirements.  All Phis at the merge point must
 465  465  // be converted, so we can remove the control flow.  We need to limit the
 466  466  // number of c-moves to a small handful.  All code that was in the side-arms
 467  467  // of the CFG diamond is now speculatively executed.  This code has to be
 468  468  // "cheap enough".  We are pretty much limited to CFG diamonds that merge
 469  469  // 1 or 2 items with a total of 1 or 2 ops executed speculatively.
 470  470  Node *PhaseIdealLoop::conditional_move( Node *region ) {
 471  471  
 472  472    assert(region->is_Region(), "sanity check");
 473  473    if (region->req() != 3) return NULL;
 474  474  
 475  475    // Check for CFG diamond
 476  476    Node *lp = region->in(1);
 477  477    Node *rp = region->in(2);
 478  478    if (!lp || !rp) return NULL;
 479  479    Node *lp_c = lp->in(0);
 480  480    if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL;
 481  481    IfNode *iff = lp_c->as_If();
 482  482  
 483  483    // Check for ops pinned in an arm of the diamond.
 484  484    // Can't remove the control flow in this case
 485  485    if (lp->outcnt() > 1) return NULL;
 486  486    if (rp->outcnt() > 1) return NULL;
 487  487  
 488  488    IdealLoopTree* r_loop = get_loop(region);
 489  489    assert(r_loop == get_loop(iff), "sanity");
 490  490    // Always convert to CMOVE if all results are used only outside this loop.
 491  491    bool used_inside_loop = (r_loop == _ltree_root);
 492  492  
 493  493    // Check profitability
 494  494    int cost = 0;
 495  495    int phis = 0;
 496  496    for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
 497  497      Node *out = region->fast_out(i);
 498  498      if (!out->is_Phi()) continue; // Ignore other control edges, etc
 499  499      phis++;
 500  500      PhiNode* phi = out->as_Phi();
 501  501      BasicType bt = phi->type()->basic_type();
 502  502      switch (bt) {
 503  503      case T_FLOAT:
 504  504      case T_DOUBLE: {
 505  505        cost += Matcher::float_cmove_cost(); // Could be very expensive
 506  506        break;
 507  507      }
 508  508      case T_LONG: {
 509  509        cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's
 510  510      }
 511  511      case T_INT:                 // These all CMOV fine
 512  512      case T_ADDRESS: {           // (RawPtr)
 513  513        cost++;
 514  514        break;
 515  515      }
 516  516      case T_NARROWOOP: // Fall through
 517  517      case T_OBJECT: {            // Base oops are OK, but not derived oops
 518  518        const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr();
 519  519        // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
 520  520        // CMOVE'd derived pointer?  It's a CMOVE'd derived base.  Thus
 521  521        // CMOVE'ing a derived pointer requires we also CMOVE the base.  If we
 522  522        // have a Phi for the base here that we convert to a CMOVE all is well
 523  523        // and good.  But if the base is dead, we'll not make a CMOVE.  Later
 524  524        // the allocator will have to produce a base by creating a CMOVE of the
 525  525        // relevant bases.  This puts the allocator in the business of
 526  526        // manufacturing expensive instructions, generally a bad plan.
 527  527        // Just Say No to Conditionally-Moved Derived Pointers.
 528  528        if (tp && tp->offset() != 0)
 529  529          return NULL;
 530  530        cost++;
 531  531        break;
 532  532      }
 533  533      default:
 534  534        return NULL;              // In particular, can't do memory or I/O
 535  535      }
 536  536      // Add in cost any speculative ops
 537  537      for (uint j = 1; j < region->req(); j++) {
 538  538        Node *proj = region->in(j);
 539  539        Node *inp = phi->in(j);
 540  540        if (get_ctrl(inp) == proj) { // Found local op
 541  541          cost++;
 542  542          // Check for a chain of dependent ops; these will all become
 543  543          // speculative in a CMOV.
 544  544          for (uint k = 1; k < inp->req(); k++)
 545  545            if (get_ctrl(inp->in(k)) == proj)
 546  546              cost += ConditionalMoveLimit; // Too much speculative goo
 547  547        }
 548  548      }
 549  549      // See if the Phi is used by a Cmp or Narrow oop Decode/Encode.
 550  550      // This will likely Split-If, a higher-payoff operation.
 551  551      for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
 552  552        Node* use = phi->fast_out(k);
 553  553        if (use->is_Cmp() || use->is_DecodeN() || use->is_EncodeP())
 554  554          cost += ConditionalMoveLimit;
 555  555        // Is there a use inside the loop?
 556  556        // Note: check only basic types since CMoveP is pinned.
 557  557        if (!used_inside_loop && is_java_primitive(bt)) {
 558  558          IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use);
 559  559          if (r_loop == u_loop || r_loop->is_member(u_loop)) {
 560  560            used_inside_loop = true;
 561  561          }
 562  562        }
 563  563      }
 564  564    }
 565  565    Node* bol = iff->in(1);
 566  566    assert(bol->Opcode() == Op_Bool, "");
 567  567    int cmp_op = bol->in(1)->Opcode();
 568  568    // It is expensive to generate flags from a float compare.
 569  569    // Avoid duplicated float compare.
 570  570    if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
 571  571  
 572  572    float infrequent_prob = PROB_UNLIKELY_MAG(3);
 573  573    // Ignore cost and blocks frequency if CMOVE can be moved outside the loop.
 574  574    if (used_inside_loop) {
 575  575      if (cost >= ConditionalMoveLimit) return NULL; // Too much goo
 576  576  
 577  577      // BlockLayoutByFrequency optimization moves infrequent branch
 578  578      // from hot path. No point in CMOV'ing in such case (110 is used
 579  579      // instead of 100 to take into account not exactness of float value).
 580  580      if (BlockLayoutByFrequency) {
 581  581        infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f);
 582  582      }
 583  583    }
 584  584    // Check for highly predictable branch.  No point in CMOV'ing if
 585  585    // we are going to predict accurately all the time.
 586  586    if (iff->_prob < infrequent_prob ||
 587  587        iff->_prob > (1.0f - infrequent_prob))
 588  588      return NULL;
 589  589  
 590  590    // --------------
 591  591    // Now replace all Phis with CMOV's
 592  592    Node *cmov_ctrl = iff->in(0);
 593  593    uint flip = (lp->Opcode() == Op_IfTrue);
 594  594    while (1) {
 595  595      PhiNode* phi = NULL;
 596  596      for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
 597  597        Node *out = region->fast_out(i);
 598  598        if (out->is_Phi()) {
 599  599          phi = out->as_Phi();
 600  600          break;
 601  601        }
 602  602      }
 603  603      if (phi == NULL)  break;
 604  604  #ifndef PRODUCT
 605  605      if (PrintOpto && VerifyLoopOptimizations) tty->print_cr("CMOV");
 606  606  #endif
 607  607      // Move speculative ops
 608  608      for (uint j = 1; j < region->req(); j++) {
 609  609        Node *proj = region->in(j);
 610  610        Node *inp = phi->in(j);
 611  611        if (get_ctrl(inp) == proj) { // Found local op
 612  612  #ifndef PRODUCT
 613  613          if (PrintOpto && VerifyLoopOptimizations) {
 614  614            tty->print("  speculate: ");
 615  615            inp->dump();
 616  616          }
 617  617  #endif
 618  618          set_ctrl(inp, cmov_ctrl);
 619  619        }
 620  620      }
 621  621      Node *cmov = CMoveNode::make( C, cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi) );
 622  622      register_new_node( cmov, cmov_ctrl );
 623  623      _igvn.replace_node( phi, cmov );
 624  624  #ifndef PRODUCT
 625  625      if (TraceLoopOpts) {
 626  626        tty->print("CMOV  ");
 627  627        r_loop->dump_head();
 628  628        if (Verbose) {
 629  629          bol->in(1)->dump(1);
 630  630          cmov->dump(1);
 631  631        }
 632  632      }
 633  633      if (VerifyLoopOptimizations) verify();
 634  634  #endif
 635  635    }
 636  636  
 637  637    // The useless CFG diamond will fold up later; see the optimization in
 638  638    // RegionNode::Ideal.
 639  639    _igvn._worklist.push(region);
 640  640  
 641  641    return iff->in(1);
 642  642  }
 643  643  
 644  644  //------------------------------split_if_with_blocks_pre-----------------------
 645  645  // Do the real work in a non-recursive function.  Data nodes want to be
 646  646  // cloned in the pre-order so they can feed each other nicely.
 647  647  Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
 648  648    // Cloning these guys is unlikely to win
 649  649    int n_op = n->Opcode();
 650  650    if( n_op == Op_MergeMem ) return n;
 651  651    if( n->is_Proj() ) return n;
 652  652    // Do not clone-up CmpFXXX variations, as these are always
 653  653    // followed by a CmpI
 654  654    if( n->is_Cmp() ) return n;
 655  655    // Attempt to use a conditional move instead of a phi/branch
 656  656    if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
 657  657      Node *cmov = conditional_move( n );
 658  658      if( cmov ) return cmov;
 659  659    }
 660  660    if( n->is_CFG() || n->is_LoadStore() )
 661  661      return n;
 662  662    if( n_op == Op_Opaque1 ||     // Opaque nodes cannot be mod'd
 663  663        n_op == Op_Opaque2 ) {
 664  664      if( !C->major_progress() )   // If chance of no more loop opts...
 665  665        _igvn._worklist.push(n);  // maybe we'll remove them
 666  666      return n;
 667  667    }
 668  668  
 669  669    if( n->is_Con() ) return n;   // No cloning for Con nodes
 670  670  
 671  671    Node *n_ctrl = get_ctrl(n);
 672  672    if( !n_ctrl ) return n;       // Dead node
 673  673  
 674  674    // Attempt to remix address expressions for loop invariants
 675  675    Node *m = remix_address_expressions( n );
 676  676    if( m ) return m;
 677  677  
 678  678    // Determine if the Node has inputs from some local Phi.
 679  679    // Returns the block to clone thru.
 680  680    Node *n_blk = has_local_phi_input( n );
 681  681    if( !n_blk ) return n;
 682  682    // Do not clone the trip counter through on a CountedLoop
 683  683    // (messes up the canonical shape).
 684  684    if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
 685  685  
 686  686    // Check for having no control input; not pinned.  Allow
 687  687    // dominating control.
 688  688    if( n->in(0) ) {
 689  689      Node *dom = idom(n_blk);
 690  690      if( dom_lca( n->in(0), dom ) != n->in(0) )
 691  691        return n;
 692  692    }
 693  693    // Policy: when is it profitable.  You must get more wins than
 694  694    // policy before it is considered profitable.  Policy is usually 0,
 695  695    // so 1 win is considered profitable.  Big merges will require big
 696  696    // cloning, so get a larger policy.
 697  697    int policy = n_blk->req() >> 2;
 698  698  
 699  699    // If the loop is a candidate for range check elimination,
 700  700    // delay splitting through it's phi until a later loop optimization
 701  701    if (n_blk->is_CountedLoop()) {
 702  702      IdealLoopTree *lp = get_loop(n_blk);
 703  703      if (lp && lp->_rce_candidate) {
 704  704        return n;
 705  705      }
 706  706    }
 707  707  
 708  708    // Use same limit as split_if_with_blocks_post
 709  709    if( C->unique() > 35000 ) return n; // Method too big
 710  710  
 711  711    // Split 'n' through the merge point if it is profitable
 712  712    Node *phi = split_thru_phi( n, n_blk, policy );
 713  713    if (!phi) return n;
 714  714  
 715  715    // Found a Phi to split thru!
 716  716    // Replace 'n' with the new phi
 717  717    _igvn.replace_node( n, phi );
 718  718    // Moved a load around the loop, 'en-registering' something.
 719  719    if (n_blk->is_Loop() && n->is_Load() &&
 720  720        !phi->in(LoopNode::LoopBackControl)->is_Load())
 721  721      C->set_major_progress();
 722  722  
 723  723    return phi;
 724  724  }
 725  725  
 726  726  static bool merge_point_too_heavy(Compile* C, Node* region) {
 727  727    // Bail out if the region and its phis have too many users.
 728  728    int weight = 0;
 729  729    for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
 730  730      weight += region->fast_out(i)->outcnt();
 731  731    }
 732  732    int nodes_left = MaxNodeLimit - C->live_nodes();
 733  733    if (weight * 8 > nodes_left) {
 734  734  #ifndef PRODUCT
 735  735      if (PrintOpto)
 736  736        tty->print_cr("*** Split-if bails out:  %d nodes, region weight %d", C->unique(), weight);
 737  737  #endif
 738  738      return true;
 739  739    } else {
 740  740      return false;
 741  741    }
 742  742  }
 743  743  
 744  744  static bool merge_point_safe(Node* region) {
 745  745    // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode
 746  746    // having a PhiNode input. This sidesteps the dangerous case where the split
 747  747    // ConvI2LNode may become TOP if the input Value() does not
 748  748    // overlap the ConvI2L range, leaving a node which may not dominate its
 749  749    // uses.
 750  750    // A better fix for this problem can be found in the BugTraq entry, but
 751  751    // expediency for Mantis demands this hack.
 752  752    // 6855164: If the merge point has a FastLockNode with a PhiNode input, we stop
 753  753    // split_if_with_blocks from splitting a block because we could not move around
 754  754    // the FastLockNode.
 755  755    for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
 756  756      Node* n = region->fast_out(i);
 757  757      if (n->is_Phi()) {
 758  758        for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
 759  759          Node* m = n->fast_out(j);
 760  760          if (m->is_FastLock())
 761  761            return false;
 762  762  #ifdef _LP64
 763  763          if (m->Opcode() == Op_ConvI2L)
 764  764            return false;
 765  765  #endif
 766  766        }
 767  767      }
 768  768    }
 769  769    return true;
 770  770  }
 771  771  
 772  772  
 773  773  //------------------------------place_near_use---------------------------------
 774  774  // Place some computation next to use but not inside inner loops.
 775  775  // For inner loop uses move it to the preheader area.
 776  776  Node *PhaseIdealLoop::place_near_use( Node *useblock ) const {
 777  777    IdealLoopTree *u_loop = get_loop( useblock );
 778  778    return (u_loop->_irreducible || u_loop->_child)
 779  779      ? useblock
 780  780      : u_loop->_head->in(LoopNode::EntryControl);
 781  781  }
 782  782  
 783  783  
 784  784  //------------------------------split_if_with_blocks_post----------------------
 785  785  // Do the real work in a non-recursive function.  CFG hackery wants to be
 786  786  // in the post-order, so it can dirty the I-DOM info and not use the dirtied
 787  787  // info.
 788  788  void PhaseIdealLoop::split_if_with_blocks_post( Node *n ) {
 789  789  
 790  790    // Cloning Cmp through Phi's involves the split-if transform.
 791  791    // FastLock is not used by an If
 792  792    if( n->is_Cmp() && !n->is_FastLock() ) {
 793  793      if( C->unique() > 35000 ) return; // Method too big
 794  794  
 795  795      // Do not do 'split-if' if irreducible loops are present.
 796  796      if( _has_irreducible_loops )
 797  797        return;
 798  798  
 799  799      Node *n_ctrl = get_ctrl(n);
 800  800      // Determine if the Node has inputs from some local Phi.
 801  801      // Returns the block to clone thru.
 802  802      Node *n_blk = has_local_phi_input( n );
 803  803      if( n_blk != n_ctrl ) return;
 804  804  
 805  805      if( merge_point_too_heavy(C, n_ctrl) )
 806  806        return;
 807  807  
 808  808      if( n->outcnt() != 1 ) return; // Multiple bool's from 1 compare?
 809  809      Node *bol = n->unique_out();
 810  810      assert( bol->is_Bool(), "expect a bool here" );
 811  811      if( bol->outcnt() != 1 ) return;// Multiple branches from 1 compare?
 812  812      Node *iff = bol->unique_out();
 813  813  
 814  814      // Check some safety conditions
 815  815      if( iff->is_If() ) {        // Classic split-if?
 816  816        if( iff->in(0) != n_ctrl ) return; // Compare must be in same blk as if
 817  817      } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
 818  818        // Can't split CMove with different control edge.
 819  819        if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) return;
 820  820        if( get_ctrl(iff->in(2)) == n_ctrl ||
 821  821            get_ctrl(iff->in(3)) == n_ctrl )
 822  822          return;                 // Inputs not yet split-up
 823  823        if ( get_loop(n_ctrl) != get_loop(get_ctrl(iff)) ) {
 824  824          return;                 // Loop-invar test gates loop-varying CMOVE
 825  825        }
 826  826      } else {
 827  827        return;  // some other kind of node, such as an Allocate
 828  828      }
 829  829  
 830  830      // Do not do 'split-if' if some paths are dead.  First do dead code
 831  831      // elimination and then see if its still profitable.
 832  832      for( uint i = 1; i < n_ctrl->req(); i++ )
 833  833        if( n_ctrl->in(i) == C->top() )
 834  834          return;
 835  835  
 836  836      // When is split-if profitable?  Every 'win' on means some control flow
 837  837      // goes dead, so it's almost always a win.
 838  838      int policy = 0;
 839  839      // If trying to do a 'Split-If' at the loop head, it is only
 840  840      // profitable if the cmp folds up on BOTH paths.  Otherwise we
 841  841      // risk peeling a loop forever.
 842  842  
 843  843      // CNC - Disabled for now.  Requires careful handling of loop
 844  844      // body selection for the cloned code.  Also, make sure we check
 845  845      // for any input path not being in the same loop as n_ctrl.  For
 846  846      // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
 847  847      // because the alternative loop entry points won't be converted
 848  848      // into LoopNodes.
 849  849      IdealLoopTree *n_loop = get_loop(n_ctrl);
 850  850      for( uint j = 1; j < n_ctrl->req(); j++ )
 851  851        if( get_loop(n_ctrl->in(j)) != n_loop )
 852  852          return;
 853  853  
 854  854      // Check for safety of the merge point.
 855  855      if( !merge_point_safe(n_ctrl) ) {
 856  856        return;
 857  857      }
 858  858  
 859  859      // Split compare 'n' through the merge point if it is profitable
 860  860      Node *phi = split_thru_phi( n, n_ctrl, policy );
 861  861      if( !phi ) return;
 862  862  
 863  863      // Found a Phi to split thru!
 864  864      // Replace 'n' with the new phi
 865  865      _igvn.replace_node( n, phi );
 866  866  
 867  867      // Now split the bool up thru the phi
 868  868      Node *bolphi = split_thru_phi( bol, n_ctrl, -1 );
 869  869      _igvn.replace_node( bol, bolphi );
 870  870      assert( iff->in(1) == bolphi, "" );
 871  871      if( bolphi->Value(&_igvn)->singleton() )
 872  872        return;
 873  873  
 874  874      // Conditional-move?  Must split up now
 875  875      if( !iff->is_If() ) {
 876  876        Node *cmovphi = split_thru_phi( iff, n_ctrl, -1 );
 877  877        _igvn.replace_node( iff, cmovphi );
 878  878        return;
 879  879      }
 880  880  
 881  881      // Now split the IF
 882  882      do_split_if( iff );
 883  883      return;
 884  884    }
 885  885  
 886  886    // Check for an IF ready to split; one that has its
 887  887    // condition codes input coming from a Phi at the block start.
 888  888    int n_op = n->Opcode();
 889  889  
 890  890    // Check for an IF being dominated by another IF same test
 891  891    if( n_op == Op_If ) {
 892  892      Node *bol = n->in(1);
 893  893      uint max = bol->outcnt();
 894  894      // Check for same test used more than once?
 895  895      if( n_op == Op_If && max > 1 && bol->is_Bool() ) {
 896  896        // Search up IDOMs to see if this IF is dominated.
 897  897        Node *cutoff = get_ctrl(bol);
 898  898  
 899  899        // Now search up IDOMs till cutoff, looking for a dominating test
 900  900        Node *prevdom = n;
 901  901        Node *dom = idom(prevdom);
 902  902        while( dom != cutoff ) {
 903  903          if( dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom ) {
 904  904            // Replace the dominated test with an obvious true or false.
 905  905            // Place it on the IGVN worklist for later cleanup.
 906  906            C->set_major_progress();
 907  907            dominated_by( prevdom, n, false, true );
 908  908  #ifndef PRODUCT
 909  909            if( VerifyLoopOptimizations ) verify();
 910  910  #endif
 911  911            return;
 912  912          }
 913  913          prevdom = dom;
 914  914          dom = idom(prevdom);
 915  915        }
 916  916      }
 917  917    }
 918  918  
 919  919    // See if a shared loop-varying computation has no loop-varying uses.
 920  920    // Happens if something is only used for JVM state in uncommon trap exits,
 921  921    // like various versions of induction variable+offset.  Clone the
 922  922    // computation per usage to allow it to sink out of the loop.
 923  923    if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about)
 924  924      Node *n_ctrl = get_ctrl(n);
 925  925      IdealLoopTree *n_loop = get_loop(n_ctrl);
 926  926      if( n_loop != _ltree_root ) {
 927  927        DUIterator_Fast imax, i = n->fast_outs(imax);
 928  928        for (; i < imax; i++) {
 929  929          Node* u = n->fast_out(i);
 930  930          if( !has_ctrl(u) )     break; // Found control user
 931  931          IdealLoopTree *u_loop = get_loop(get_ctrl(u));
 932  932          if( u_loop == n_loop ) break; // Found loop-varying use
 933  933          if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop
 934  934          if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003
 935  935        }
 936  936        bool did_break = (i < imax);  // Did we break out of the previous loop?
 937  937        if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
 938  938          Node *late_load_ctrl = NULL;
 939  939          if (n->is_Load()) {
 940  940            // If n is a load, get and save the result from get_late_ctrl(),
 941  941            // to be later used in calculating the control for n's clones.
 942  942            clear_dom_lca_tags();
 943  943            late_load_ctrl = get_late_ctrl(n, n_ctrl);
 944  944          }
 945  945          // If n is a load, and the late control is the same as the current
 946  946          // control, then the cloning of n is a pointless exercise, because
 947  947          // GVN will ensure that we end up where we started.
 948  948          if (!n->is_Load() || late_load_ctrl != n_ctrl) {
 949  949            for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
 950  950              Node *u = n->last_out(j); // Clone private computation per use
 951  951              _igvn.rehash_node_delayed(u);
 952  952              Node *x = n->clone(); // Clone computation
 953  953              Node *x_ctrl = NULL;
 954  954              if( u->is_Phi() ) {
 955  955                // Replace all uses of normal nodes.  Replace Phi uses
 956  956                // individually, so the separate Nodes can sink down
 957  957                // different paths.
 958  958                uint k = 1;
 959  959                while( u->in(k) != n ) k++;
 960  960                u->set_req( k, x );
 961  961                // x goes next to Phi input path
 962  962                x_ctrl = u->in(0)->in(k);
 963  963                --j;
 964  964              } else {              // Normal use
 965  965                // Replace all uses
 966  966                for( uint k = 0; k < u->req(); k++ ) {
 967  967                  if( u->in(k) == n ) {
 968  968                    u->set_req( k, x );
 969  969                    --j;
 970  970                  }
 971  971                }
 972  972                x_ctrl = get_ctrl(u);
 973  973              }
 974  974  
 975  975              // Find control for 'x' next to use but not inside inner loops.
 976  976              // For inner loop uses get the preheader area.
 977  977              x_ctrl = place_near_use(x_ctrl);
 978  978  
 979  979              if (n->is_Load()) {
 980  980                // For loads, add a control edge to a CFG node outside of the loop
 981  981                // to force them to not combine and return back inside the loop
 982  982                // during GVN optimization (4641526).
 983  983                //
 984  984                // Because we are setting the actual control input, factor in
 985  985                // the result from get_late_ctrl() so we respect any
 986  986                // anti-dependences. (6233005).
 987  987                x_ctrl = dom_lca(late_load_ctrl, x_ctrl);
 988  988  
 989  989                // Don't allow the control input to be a CFG splitting node.
 990  990                // Such nodes should only have ProjNodes as outs, e.g. IfNode
 991  991                // should only have IfTrueNode and IfFalseNode (4985384).
 992  992                x_ctrl = find_non_split_ctrl(x_ctrl);
 993  993                assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
 994  994  
 995  995                x->set_req(0, x_ctrl);
 996  996              }
 997  997              register_new_node(x, x_ctrl);
 998  998  
 999  999              // Some institutional knowledge is needed here: 'x' is
1000 1000              // yanked because if the optimizer runs GVN on it all the
1001 1001              // cloned x's will common up and undo this optimization and
1002 1002              // be forced back in the loop.  This is annoying because it
1003 1003              // makes +VerifyOpto report false-positives on progress.  I
1004 1004              // tried setting control edges on the x's to force them to
1005 1005              // not combine, but the matching gets worried when it tries
1006 1006              // to fold a StoreP and an AddP together (as part of an
1007 1007              // address expression) and the AddP and StoreP have
1008 1008              // different controls.
1009 1009              if( !x->is_Load() && !x->is_DecodeN() ) _igvn._worklist.yank(x);
1010 1010            }
1011 1011            _igvn.remove_dead_node(n);
1012 1012          }
1013 1013        }
1014 1014      }
1015 1015    }
1016 1016  
1017 1017    // Check for Opaque2's who's loop has disappeared - who's input is in the
1018 1018    // same loop nest as their output.  Remove 'em, they are no longer useful.
1019 1019    if( n_op == Op_Opaque2 &&
1020 1020        n->in(1) != NULL &&
1021 1021        get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
1022 1022      _igvn.replace_node( n, n->in(1) );
1023 1023    }
1024 1024  }
1025 1025  
1026 1026  //------------------------------split_if_with_blocks---------------------------
1027 1027  // Check for aggressive application of 'split-if' optimization,
1028 1028  // using basic block level info.
1029 1029  void PhaseIdealLoop::split_if_with_blocks( VectorSet &visited, Node_Stack &nstack ) {
1030 1030    Node *n = C->root();
1031 1031    visited.set(n->_idx); // first, mark node as visited
1032 1032    // Do pre-visit work for root
1033 1033    n = split_if_with_blocks_pre( n );
1034 1034    uint cnt = n->outcnt();
1035 1035    uint i   = 0;
1036 1036    while (true) {
1037 1037      // Visit all children
1038 1038      if (i < cnt) {
1039 1039        Node* use = n->raw_out(i);
1040 1040        ++i;
1041 1041        if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
1042 1042          // Now do pre-visit work for this use
1043 1043          use = split_if_with_blocks_pre( use );
1044 1044          nstack.push(n, i); // Save parent and next use's index.
1045 1045          n   = use;         // Process all children of current use.
1046 1046          cnt = use->outcnt();
1047 1047          i   = 0;
1048 1048        }
1049 1049      }
1050 1050      else {
1051 1051        // All of n's children have been processed, complete post-processing.
1052 1052        if (cnt != 0 && !n->is_Con()) {
1053 1053          assert(has_node(n), "no dead nodes");
1054 1054          split_if_with_blocks_post( n );
1055 1055        }
1056 1056        if (nstack.is_empty()) {
1057 1057          // Finished all nodes on stack.
1058 1058          break;
1059 1059        }
1060 1060        // Get saved parent node and next use's index. Visit the rest of uses.
1061 1061        n   = nstack.node();
1062 1062        cnt = n->outcnt();
1063 1063        i   = nstack.index();
1064 1064        nstack.pop();
1065 1065      }
1066 1066    }
1067 1067  }
1068 1068  
1069 1069  
1070 1070  //=============================================================================
1071 1071  //
1072 1072  //                   C L O N E   A   L O O P   B O D Y
1073 1073  //
1074 1074  
1075 1075  //------------------------------clone_iff--------------------------------------
1076 1076  // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1077 1077  // "Nearly" because all Nodes have been cloned from the original in the loop,
1078 1078  // but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1079 1079  // through the Phi recursively, and return a Bool.
1080 1080  BoolNode *PhaseIdealLoop::clone_iff( PhiNode *phi, IdealLoopTree *loop ) {
1081 1081  
1082 1082    // Convert this Phi into a Phi merging Bools
1083 1083    uint i;
1084 1084    for( i = 1; i < phi->req(); i++ ) {
1085 1085      Node *b = phi->in(i);
1086 1086      if( b->is_Phi() ) {
1087 1087        _igvn.replace_input_of(phi, i, clone_iff( b->as_Phi(), loop ));
1088 1088      } else {
1089 1089        assert( b->is_Bool(), "" );
1090 1090      }
1091 1091    }
1092 1092  
1093 1093    Node *sample_bool = phi->in(1);
1094 1094    Node *sample_cmp  = sample_bool->in(1);
1095 1095  
1096 1096    // Make Phis to merge the Cmp's inputs.
1097 1097    PhiNode *phi1 = new (C) PhiNode( phi->in(0), Type::TOP );
1098 1098    PhiNode *phi2 = new (C) PhiNode( phi->in(0), Type::TOP );
1099 1099    for( i = 1; i < phi->req(); i++ ) {
1100 1100      Node *n1 = phi->in(i)->in(1)->in(1);
1101 1101      Node *n2 = phi->in(i)->in(1)->in(2);
1102 1102      phi1->set_req( i, n1 );
1103 1103      phi2->set_req( i, n2 );
1104 1104      phi1->set_type( phi1->type()->meet(n1->bottom_type()) );
1105 1105      phi2->set_type( phi2->type()->meet(n2->bottom_type()) );
1106 1106    }
1107 1107    // See if these Phis have been made before.
1108 1108    // Register with optimizer
1109 1109    Node *hit1 = _igvn.hash_find_insert(phi1);
1110 1110    if( hit1 ) {                  // Hit, toss just made Phi
1111 1111      _igvn.remove_dead_node(phi1); // Remove new phi
1112 1112      assert( hit1->is_Phi(), "" );
1113 1113      phi1 = (PhiNode*)hit1;      // Use existing phi
1114 1114    } else {                      // Miss
1115 1115      _igvn.register_new_node_with_optimizer(phi1);
1116 1116    }
1117 1117    Node *hit2 = _igvn.hash_find_insert(phi2);
1118 1118    if( hit2 ) {                  // Hit, toss just made Phi
1119 1119      _igvn.remove_dead_node(phi2); // Remove new phi
1120 1120      assert( hit2->is_Phi(), "" );
1121 1121      phi2 = (PhiNode*)hit2;      // Use existing phi
1122 1122    } else {                      // Miss
1123 1123      _igvn.register_new_node_with_optimizer(phi2);
1124 1124    }
1125 1125    // Register Phis with loop/block info
1126 1126    set_ctrl(phi1, phi->in(0));
1127 1127    set_ctrl(phi2, phi->in(0));
1128 1128    // Make a new Cmp
1129 1129    Node *cmp = sample_cmp->clone();
1130 1130    cmp->set_req( 1, phi1 );
1131 1131    cmp->set_req( 2, phi2 );
1132 1132    _igvn.register_new_node_with_optimizer(cmp);
1133 1133    set_ctrl(cmp, phi->in(0));
1134 1134  
1135 1135    // Make a new Bool
1136 1136    Node *b = sample_bool->clone();
1137 1137    b->set_req(1,cmp);
1138 1138    _igvn.register_new_node_with_optimizer(b);
1139 1139    set_ctrl(b, phi->in(0));
1140 1140  
1141 1141    assert( b->is_Bool(), "" );
1142 1142    return (BoolNode*)b;
1143 1143  }
1144 1144  
1145 1145  //------------------------------clone_bool-------------------------------------
1146 1146  // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1147 1147  // "Nearly" because all Nodes have been cloned from the original in the loop,
1148 1148  // but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1149 1149  // through the Phi recursively, and return a Bool.
1150 1150  CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) {
1151 1151    uint i;
1152 1152    // Convert this Phi into a Phi merging Bools
1153 1153    for( i = 1; i < phi->req(); i++ ) {
1154 1154      Node *b = phi->in(i);
1155 1155      if( b->is_Phi() ) {
1156 1156        _igvn.replace_input_of(phi, i, clone_bool( b->as_Phi(), loop ));
1157 1157      } else {
1158 1158        assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
1159 1159      }
1160 1160    }
1161 1161  
1162 1162    Node *sample_cmp = phi->in(1);
1163 1163  
1164 1164    // Make Phis to merge the Cmp's inputs.
1165 1165    PhiNode *phi1 = new (C) PhiNode( phi->in(0), Type::TOP );
1166 1166    PhiNode *phi2 = new (C) PhiNode( phi->in(0), Type::TOP );
1167 1167    for( uint j = 1; j < phi->req(); j++ ) {
1168 1168      Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP
1169 1169      Node *n1, *n2;
1170 1170      if( cmp_top->is_Cmp() ) {
1171 1171        n1 = cmp_top->in(1);
1172 1172        n2 = cmp_top->in(2);
1173 1173      } else {
1174 1174        n1 = n2 = cmp_top;
1175 1175      }
1176 1176      phi1->set_req( j, n1 );
1177 1177      phi2->set_req( j, n2 );
1178 1178      phi1->set_type( phi1->type()->meet(n1->bottom_type()) );
1179 1179      phi2->set_type( phi2->type()->meet(n2->bottom_type()) );
1180 1180    }
1181 1181  
1182 1182    // See if these Phis have been made before.
1183 1183    // Register with optimizer
1184 1184    Node *hit1 = _igvn.hash_find_insert(phi1);
1185 1185    if( hit1 ) {                  // Hit, toss just made Phi
1186 1186      _igvn.remove_dead_node(phi1); // Remove new phi
1187 1187      assert( hit1->is_Phi(), "" );
1188 1188      phi1 = (PhiNode*)hit1;      // Use existing phi
1189 1189    } else {                      // Miss
1190 1190      _igvn.register_new_node_with_optimizer(phi1);
1191 1191    }
1192 1192    Node *hit2 = _igvn.hash_find_insert(phi2);
1193 1193    if( hit2 ) {                  // Hit, toss just made Phi
1194 1194      _igvn.remove_dead_node(phi2); // Remove new phi
1195 1195      assert( hit2->is_Phi(), "" );
1196 1196      phi2 = (PhiNode*)hit2;      // Use existing phi
1197 1197    } else {                      // Miss
1198 1198      _igvn.register_new_node_with_optimizer(phi2);
1199 1199    }
1200 1200    // Register Phis with loop/block info
1201 1201    set_ctrl(phi1, phi->in(0));
1202 1202    set_ctrl(phi2, phi->in(0));
1203 1203    // Make a new Cmp
1204 1204    Node *cmp = sample_cmp->clone();
1205 1205    cmp->set_req( 1, phi1 );
1206 1206    cmp->set_req( 2, phi2 );
1207 1207    _igvn.register_new_node_with_optimizer(cmp);
1208 1208    set_ctrl(cmp, phi->in(0));
1209 1209  
1210 1210    assert( cmp->is_Cmp(), "" );
1211 1211    return (CmpNode*)cmp;
1212 1212  }
1213 1213  
1214 1214  //------------------------------sink_use---------------------------------------
1215 1215  // If 'use' was in the loop-exit block, it now needs to be sunk
1216 1216  // below the post-loop merge point.
1217 1217  void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
1218 1218    if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) {
1219 1219      set_ctrl(use, post_loop);
1220 1220      for (DUIterator j = use->outs(); use->has_out(j); j++)
1221 1221        sink_use(use->out(j), post_loop);
1222 1222    }
1223 1223  }
1224 1224  
1225 1225  //------------------------------clone_loop-------------------------------------
1226 1226  //
1227 1227  //                   C L O N E   A   L O O P   B O D Y
1228 1228  //
1229 1229  // This is the basic building block of the loop optimizations.  It clones an
1230 1230  // entire loop body.  It makes an old_new loop body mapping; with this mapping
1231 1231  // you can find the new-loop equivalent to an old-loop node.  All new-loop
1232 1232  // nodes are exactly equal to their old-loop counterparts, all edges are the
1233 1233  // same.  All exits from the old-loop now have a RegionNode that merges the
1234 1234  // equivalent new-loop path.  This is true even for the normal "loop-exit"
1235 1235  // condition.  All uses of loop-invariant old-loop values now come from (one
1236 1236  // or more) Phis that merge their new-loop equivalents.
1237 1237  //
1238 1238  // This operation leaves the graph in an illegal state: there are two valid
1239 1239  // control edges coming from the loop pre-header to both loop bodies.  I'll
1240 1240  // definitely have to hack the graph after running this transform.
1241 1241  //
1242 1242  // From this building block I will further edit edges to perform loop peeling
1243 1243  // or loop unrolling or iteration splitting (Range-Check-Elimination), etc.
1244 1244  //
1245 1245  // Parameter side_by_size_idom:
1246 1246  //   When side_by_size_idom is NULL, the dominator tree is constructed for
1247 1247  //      the clone loop to dominate the original.  Used in construction of
1248 1248  //      pre-main-post loop sequence.
1249 1249  //   When nonnull, the clone and original are side-by-side, both are
1250 1250  //      dominated by the side_by_side_idom node.  Used in construction of
1251 1251  //      unswitched loops.
1252 1252  void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
1253 1253                                   Node* side_by_side_idom) {
1254 1254  
1255 1255    // Step 1: Clone the loop body.  Make the old->new mapping.
1256 1256    uint i;
1257 1257    for( i = 0; i < loop->_body.size(); i++ ) {
1258 1258      Node *old = loop->_body.at(i);
1259 1259      Node *nnn = old->clone();
1260 1260      old_new.map( old->_idx, nnn );
1261 1261      _igvn.register_new_node_with_optimizer(nnn);
1262 1262    }
1263 1263  
1264 1264  
1265 1265    // Step 2: Fix the edges in the new body.  If the old input is outside the
1266 1266    // loop use it.  If the old input is INside the loop, use the corresponding
1267 1267    // new node instead.
1268 1268    for( i = 0; i < loop->_body.size(); i++ ) {
1269 1269      Node *old = loop->_body.at(i);
1270 1270      Node *nnn = old_new[old->_idx];
1271 1271      // Fix CFG/Loop controlling the new node
1272 1272      if (has_ctrl(old)) {
1273 1273        set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
1274 1274      } else {
1275 1275        set_loop(nnn, loop->_parent);
1276 1276        if (old->outcnt() > 0) {
1277 1277          set_idom( nnn, old_new[idom(old)->_idx], dd );
1278 1278        }
1279 1279      }
1280 1280      // Correct edges to the new node
1281 1281      for( uint j = 0; j < nnn->req(); j++ ) {
1282 1282          Node *n = nnn->in(j);
1283 1283          if( n ) {
1284 1284            IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n );
1285 1285            if( loop->is_member( old_in_loop ) )
1286 1286              nnn->set_req(j, old_new[n->_idx]);
1287 1287          }
1288 1288      }
1289 1289      _igvn.hash_find_insert(nnn);
1290 1290    }
1291 1291    Node *newhead = old_new[loop->_head->_idx];
1292 1292    set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
1293 1293  
1294 1294  
1295 1295    // Step 3: Now fix control uses.  Loop varying control uses have already
1296 1296    // been fixed up (as part of all input edges in Step 2).  Loop invariant
1297 1297    // control uses must be either an IfFalse or an IfTrue.  Make a merge
1298 1298    // point to merge the old and new IfFalse/IfTrue nodes; make the use
1299 1299    // refer to this.
1300 1300    ResourceArea *area = Thread::current()->resource_area();
1301 1301    Node_List worklist(area);
1302 1302    uint new_counter = C->unique();
1303 1303    for( i = 0; i < loop->_body.size(); i++ ) {
1304 1304      Node* old = loop->_body.at(i);
1305 1305      if( !old->is_CFG() ) continue;
1306 1306      Node* nnn = old_new[old->_idx];
1307 1307  
1308 1308      // Copy uses to a worklist, so I can munge the def-use info
1309 1309      // with impunity.
1310 1310      for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1311 1311        worklist.push(old->fast_out(j));
1312 1312  
1313 1313      while( worklist.size() ) {  // Visit all uses
1314 1314        Node *use = worklist.pop();
1315 1315        if (!has_node(use))  continue; // Ignore dead nodes
1316 1316        IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1317 1317        if( !loop->is_member( use_loop ) && use->is_CFG() ) {
1318 1318          // Both OLD and USE are CFG nodes here.
1319 1319          assert( use->is_Proj(), "" );
1320 1320  
1321 1321          // Clone the loop exit control projection
1322 1322          Node *newuse = use->clone();
1323 1323          newuse->set_req(0,nnn);
1324 1324          _igvn.register_new_node_with_optimizer(newuse);
1325 1325          set_loop(newuse, use_loop);
1326 1326          set_idom(newuse, nnn, dom_depth(nnn) + 1 );
1327 1327  
1328 1328          // We need a Region to merge the exit from the peeled body and the
1329 1329          // exit from the old loop body.
1330 1330          RegionNode *r = new (C) RegionNode(3);
1331 1331          // Map the old use to the new merge point
1332 1332          old_new.map( use->_idx, r );
1333 1333          uint dd_r = MIN2(dom_depth(newuse),dom_depth(use));
1334 1334          assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" );
1335 1335  
1336 1336          // The original user of 'use' uses 'r' instead.
1337 1337          for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
1338 1338            Node* useuse = use->last_out(l);
1339 1339            _igvn.rehash_node_delayed(useuse);
1340 1340            uint uses_found = 0;
1341 1341            if( useuse->in(0) == use ) {
1342 1342              useuse->set_req(0, r);
1343 1343              uses_found++;
1344 1344              if( useuse->is_CFG() ) {
1345 1345                assert( dom_depth(useuse) > dd_r, "" );
1346 1346                set_idom(useuse, r, dom_depth(useuse));
1347 1347              }
1348 1348            }
1349 1349            for( uint k = 1; k < useuse->req(); k++ ) {
1350 1350              if( useuse->in(k) == use ) {
1351 1351                useuse->set_req(k, r);
1352 1352                uses_found++;
1353 1353              }
1354 1354            }
1355 1355            l -= uses_found;    // we deleted 1 or more copies of this edge
1356 1356          }
1357 1357  
1358 1358          // Now finish up 'r'
1359 1359          r->set_req( 1, newuse );
1360 1360          r->set_req( 2,    use );
1361 1361          _igvn.register_new_node_with_optimizer(r);
1362 1362          set_loop(r, use_loop);
1363 1363          set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r);
1364 1364        } // End of if a loop-exit test
1365 1365      }
1366 1366    }
1367 1367  
1368 1368    // Step 4: If loop-invariant use is not control, it must be dominated by a
1369 1369    // loop exit IfFalse/IfTrue.  Find "proper" loop exit.  Make a Region
1370 1370    // there if needed.  Make a Phi there merging old and new used values.
1371 1371    Node_List *split_if_set = NULL;
1372 1372    Node_List *split_bool_set = NULL;
1373 1373    Node_List *split_cex_set = NULL;
1374 1374    for( i = 0; i < loop->_body.size(); i++ ) {
1375 1375      Node* old = loop->_body.at(i);
1376 1376      Node* nnn = old_new[old->_idx];
1377 1377      // Copy uses to a worklist, so I can munge the def-use info
1378 1378      // with impunity.
1379 1379      for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1380 1380        worklist.push(old->fast_out(j));
1381 1381  
1382 1382      while( worklist.size() ) {
1383 1383        Node *use = worklist.pop();
1384 1384        if (!has_node(use))  continue; // Ignore dead nodes
1385 1385        if (use->in(0) == C->top())  continue;

↓ open down ↓

1385 lines elided

↑ open up ↑

1386 1386        IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1387 1387        // Check for data-use outside of loop - at least one of OLD or USE
1388 1388        // must not be a CFG node.
1389 1389        if( !loop->is_member( use_loop ) && (!old->is_CFG() || !use->is_CFG())) {
1390 1390  
1391 1391          // If the Data use is an IF, that means we have an IF outside of the
1392 1392          // loop that is switching on a condition that is set inside of the
1393 1393          // loop.  Happens if people set a loop-exit flag; then test the flag
1394 1394          // in the loop to break the loop, then test is again outside of the
1395 1395          // loop to determine which way the loop exited.
1396      -        if( use->is_If() || use->is_CMove() ) {
     1396 +        // Loop predicate If node connects to Bool node through Opaque1 node.
     1397 +        if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use)) {
1397 1398            // Since this code is highly unlikely, we lazily build the worklist
1398 1399            // of such Nodes to go split.
1399 1400            if( !split_if_set )
1400 1401              split_if_set = new Node_List(area);
1401 1402            split_if_set->push(use);
1402 1403          }
1403 1404          if( use->is_Bool() ) {
1404 1405            if( !split_bool_set )
1405 1406              split_bool_set = new Node_List(area);
1406 1407            split_bool_set->push(use);

1407 1408          }
1408 1409          if( use->Opcode() == Op_CreateEx ) {
1409 1410            if( !split_cex_set )
1410 1411              split_cex_set = new Node_List(area);
1411 1412            split_cex_set->push(use);
1412 1413          }
1413 1414  
1414 1415  
1415 1416          // Get "block" use is in
1416 1417          uint idx = 0;
1417 1418          while( use->in(idx) != old ) idx++;
1418 1419          Node *prev = use->is_CFG() ? use : get_ctrl(use);
1419 1420          assert( !loop->is_member( get_loop( prev ) ), "" );
1420 1421          Node *cfg = prev->_idx >= new_counter
1421 1422            ? prev->in(2)
1422 1423            : idom(prev);
1423 1424          if( use->is_Phi() )     // Phi use is in prior block
1424 1425            cfg = prev->in(idx);  // NOT in block of Phi itself
1425 1426          if (cfg->is_top()) {    // Use is dead?
1426 1427            _igvn.replace_input_of(use, idx, C->top());
1427 1428            continue;
1428 1429          }
1429 1430  
1430 1431          while( !loop->is_member( get_loop( cfg ) ) ) {
1431 1432            prev = cfg;
1432 1433            cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
1433 1434          }
1434 1435          // If the use occurs after merging several exits from the loop, then
1435 1436          // old value must have dominated all those exits.  Since the same old
1436 1437          // value was used on all those exits we did not need a Phi at this
1437 1438          // merge point.  NOW we do need a Phi here.  Each loop exit value
1438 1439          // is now merged with the peeled body exit; each exit gets its own
1439 1440          // private Phi and those Phis need to be merged here.
1440 1441          Node *phi;
1441 1442          if( prev->is_Region() ) {
1442 1443            if( idx == 0 ) {      // Updating control edge?
1443 1444              phi = prev;         // Just use existing control
1444 1445            } else {              // Else need a new Phi
1445 1446              phi = PhiNode::make( prev, old );
1446 1447              // Now recursively fix up the new uses of old!
1447 1448              for( uint i = 1; i < prev->req(); i++ ) {
1448 1449                worklist.push(phi); // Onto worklist once for each 'old' input
1449 1450              }
1450 1451            }
1451 1452          } else {
1452 1453            // Get new RegionNode merging old and new loop exits
1453 1454            prev = old_new[prev->_idx];
1454 1455            assert( prev, "just made this in step 7" );
1455 1456            if( idx == 0 ) {      // Updating control edge?
1456 1457              phi = prev;         // Just use existing control
1457 1458            } else {              // Else need a new Phi
1458 1459              // Make a new Phi merging data values properly
1459 1460              phi = PhiNode::make( prev, old );
1460 1461              phi->set_req( 1, nnn );
1461 1462            }
1462 1463          }
1463 1464          // If inserting a new Phi, check for prior hits
1464 1465          if( idx != 0 ) {
1465 1466            Node *hit = _igvn.hash_find_insert(phi);
1466 1467            if( hit == NULL ) {
1467 1468             _igvn.register_new_node_with_optimizer(phi); // Register new phi
1468 1469            } else {                                      // or
1469 1470              // Remove the new phi from the graph and use the hit
1470 1471              _igvn.remove_dead_node(phi);
1471 1472              phi = hit;                                  // Use existing phi
1472 1473            }
1473 1474            set_ctrl(phi, prev);
1474 1475          }
1475 1476          // Make 'use' use the Phi instead of the old loop body exit value
1476 1477          _igvn.replace_input_of(use, idx, phi);
1477 1478          if( use->_idx >= new_counter ) { // If updating new phis
1478 1479            // Not needed for correctness, but prevents a weak assert
1479 1480            // in AddPNode from tripping (when we end up with different
1480 1481            // base & derived Phis that will become the same after
1481 1482            // IGVN does CSE).
1482 1483            Node *hit = _igvn.hash_find_insert(use);
1483 1484            if( hit )             // Go ahead and re-hash for hits.
1484 1485              _igvn.replace_node( use, hit );
1485 1486          }
1486 1487  
1487 1488          // If 'use' was in the loop-exit block, it now needs to be sunk
1488 1489          // below the post-loop merge point.
1489 1490          sink_use( use, prev );
1490 1491        }
1491 1492      }
1492 1493    }
1493 1494  
1494 1495    // Check for IFs that need splitting/cloning.  Happens if an IF outside of
1495 1496    // the loop uses a condition set in the loop.  The original IF probably
1496 1497    // takes control from one or more OLD Regions (which in turn get from NEW
1497 1498    // Regions).  In any case, there will be a set of Phis for each merge point
1498 1499    // from the IF up to where the original BOOL def exists the loop.
1499 1500    if( split_if_set ) {
1500 1501      while( split_if_set->size() ) {
1501 1502        Node *iff = split_if_set->pop();
1502 1503        if( iff->in(1)->is_Phi() ) {
1503 1504          BoolNode *b = clone_iff( iff->in(1)->as_Phi(), loop );
1504 1505          _igvn.replace_input_of(iff, 1, b);
1505 1506        }
1506 1507      }
1507 1508    }
1508 1509    if( split_bool_set ) {
1509 1510      while( split_bool_set->size() ) {
1510 1511        Node *b = split_bool_set->pop();
1511 1512        Node *phi = b->in(1);
1512 1513        assert( phi->is_Phi(), "" );
1513 1514        CmpNode *cmp = clone_bool( (PhiNode*)phi, loop );
1514 1515        _igvn.replace_input_of(b, 1, cmp);
1515 1516      }
1516 1517    }
1517 1518    if( split_cex_set ) {
1518 1519      while( split_cex_set->size() ) {
1519 1520        Node *b = split_cex_set->pop();
1520 1521        assert( b->in(0)->is_Region(), "" );
1521 1522        assert( b->in(1)->is_Phi(), "" );
1522 1523        assert( b->in(0)->in(0) == b->in(1)->in(0), "" );
1523 1524        split_up( b, b->in(0), NULL );
1524 1525      }
1525 1526    }
1526 1527  
1527 1528  }
1528 1529  
1529 1530  
1530 1531  //---------------------- stride_of_possible_iv -------------------------------------
1531 1532  // Looks for an iff/bool/comp with one operand of the compare
1532 1533  // being a cycle involving an add and a phi,
1533 1534  // with an optional truncation (left-shift followed by a right-shift)
1534 1535  // of the add. Returns zero if not an iv.
1535 1536  int PhaseIdealLoop::stride_of_possible_iv(Node* iff) {
1536 1537    Node* trunc1 = NULL;
1537 1538    Node* trunc2 = NULL;
1538 1539    const TypeInt* ttype = NULL;
1539 1540    if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) {
1540 1541      return 0;
1541 1542    }
1542 1543    BoolNode* bl = iff->in(1)->as_Bool();
1543 1544    Node* cmp = bl->in(1);
1544 1545    if (!cmp || cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU) {
1545 1546      return 0;
1546 1547    }
1547 1548    // Must have an invariant operand
1548 1549    if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) {
1549 1550      return 0;
1550 1551    }
1551 1552    Node* add2 = NULL;
1552 1553    Node* cmp1 = cmp->in(1);
1553 1554    if (cmp1->is_Phi()) {
1554 1555      // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) )))
1555 1556      Node* phi = cmp1;
1556 1557      for (uint i = 1; i < phi->req(); i++) {
1557 1558        Node* in = phi->in(i);
1558 1559        Node* add = CountedLoopNode::match_incr_with_optional_truncation(in,
1559 1560                                  &trunc1, &trunc2, &ttype);
1560 1561        if (add && add->in(1) == phi) {
1561 1562          add2 = add->in(2);
1562 1563          break;
1563 1564        }
1564 1565      }
1565 1566    } else {
1566 1567      // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) )))
1567 1568      Node* addtrunc = cmp1;
1568 1569      Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc,
1569 1570                                  &trunc1, &trunc2, &ttype);
1570 1571      if (add && add->in(1)->is_Phi()) {
1571 1572        Node* phi = add->in(1);
1572 1573        for (uint i = 1; i < phi->req(); i++) {
1573 1574          if (phi->in(i) == addtrunc) {
1574 1575            add2 = add->in(2);
1575 1576            break;
1576 1577          }
1577 1578        }
1578 1579      }
1579 1580    }
1580 1581    if (add2 != NULL) {
1581 1582      const TypeInt* add2t = _igvn.type(add2)->is_int();
1582 1583      if (add2t->is_con()) {
1583 1584        return add2t->get_con();
1584 1585      }
1585 1586    }
1586 1587    return 0;
1587 1588  }
1588 1589  
1589 1590  
1590 1591  //---------------------- stay_in_loop -------------------------------------
1591 1592  // Return the (unique) control output node that's in the loop (if it exists.)
1592 1593  Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) {
1593 1594    Node* unique = NULL;
1594 1595    if (!n) return NULL;
1595 1596    for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
1596 1597      Node* use = n->fast_out(i);
1597 1598      if (!has_ctrl(use) && loop->is_member(get_loop(use))) {
1598 1599        if (unique != NULL) {
1599 1600          return NULL;
1600 1601        }
1601 1602        unique = use;
1602 1603      }
1603 1604    }
1604 1605    return unique;
1605 1606  }
1606 1607  
1607 1608  //------------------------------ register_node -------------------------------------
1608 1609  // Utility to register node "n" with PhaseIdealLoop
1609 1610  void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) {
1610 1611    _igvn.register_new_node_with_optimizer(n);
1611 1612    loop->_body.push(n);
1612 1613    if (n->is_CFG()) {
1613 1614      set_loop(n, loop);
1614 1615      set_idom(n, pred, ddepth);
1615 1616    } else {
1616 1617      set_ctrl(n, pred);
1617 1618    }
1618 1619  }
1619 1620  
1620 1621  //------------------------------ proj_clone -------------------------------------
1621 1622  // Utility to create an if-projection
1622 1623  ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) {
1623 1624    ProjNode* c = p->clone()->as_Proj();
1624 1625    c->set_req(0, iff);
1625 1626    return c;
1626 1627  }
1627 1628  
1628 1629  //------------------------------ short_circuit_if -------------------------------------
1629 1630  // Force the iff control output to be the live_proj
1630 1631  Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) {
1631 1632    int proj_con = live_proj->_con;
1632 1633    assert(proj_con == 0 || proj_con == 1, "false or true projection");
1633 1634    Node *con = _igvn.intcon(proj_con);
1634 1635    set_ctrl(con, C->root());
1635 1636    if (iff) {
1636 1637      iff->set_req(1, con);
1637 1638    }
1638 1639    return con;
1639 1640  }
1640 1641  
1641 1642  //------------------------------ insert_if_before_proj -------------------------------------
1642 1643  // Insert a new if before an if projection (* - new node)
1643 1644  //
1644 1645  // before
1645 1646  //           if(test)
1646 1647  //           /     \
1647 1648  //          v       v
1648 1649  //    other-proj   proj (arg)
1649 1650  //
1650 1651  // after
1651 1652  //           if(test)
1652 1653  //           /     \
1653 1654  //          /       v
1654 1655  //         |      * proj-clone
1655 1656  //         v          |
1656 1657  //    other-proj      v
1657 1658  //                * new_if(relop(cmp[IU](left,right)))
1658 1659  //                  /  \
1659 1660  //                 v    v
1660 1661  //         * new-proj  proj
1661 1662  //         (returned)
1662 1663  //
1663 1664  ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) {
1664 1665    IfNode* iff = proj->in(0)->as_If();
1665 1666    IdealLoopTree *loop = get_loop(proj);
1666 1667    ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
1667 1668    int ddepth = dom_depth(proj);
1668 1669  
1669 1670    _igvn.rehash_node_delayed(iff);
1670 1671    _igvn.rehash_node_delayed(proj);
1671 1672  
1672 1673    proj->set_req(0, NULL);  // temporary disconnect
1673 1674    ProjNode* proj2 = proj_clone(proj, iff);
1674 1675    register_node(proj2, loop, iff, ddepth);
1675 1676  
1676 1677    Node* cmp = Signed ? (Node*) new (C)CmpINode(left, right) : (Node*) new (C)CmpUNode(left, right);
1677 1678    register_node(cmp, loop, proj2, ddepth);
1678 1679  
1679 1680    BoolNode* bol = new (C)BoolNode(cmp, relop);
1680 1681    register_node(bol, loop, proj2, ddepth);
1681 1682  
1682 1683    IfNode* new_if = new (C)IfNode(proj2, bol, iff->_prob, iff->_fcnt);
1683 1684    register_node(new_if, loop, proj2, ddepth);
1684 1685  
1685 1686    proj->set_req(0, new_if); // reattach
1686 1687    set_idom(proj, new_if, ddepth);
1687 1688  
1688 1689    ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj();
1689 1690    register_node(new_exit, get_loop(other_proj), new_if, ddepth);
1690 1691  
1691 1692    return new_exit;
1692 1693  }
1693 1694  
1694 1695  //------------------------------ insert_region_before_proj -------------------------------------
1695 1696  // Insert a region before an if projection (* - new node)
1696 1697  //
1697 1698  // before
1698 1699  //           if(test)
1699 1700  //          /      |
1700 1701  //         v       |
1701 1702  //       proj      v
1702 1703  //               other-proj
1703 1704  //
1704 1705  // after
1705 1706  //           if(test)
1706 1707  //          /      |
1707 1708  //         v       |
1708 1709  // * proj-clone    v
1709 1710  //         |     other-proj
1710 1711  //         v
1711 1712  // * new-region
1712 1713  //         |
1713 1714  //         v
1714 1715  // *      dum_if
1715 1716  //       /     \
1716 1717  //      v       \
1717 1718  // * dum-proj    v
1718 1719  //              proj
1719 1720  //
1720 1721  RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
1721 1722    IfNode* iff = proj->in(0)->as_If();
1722 1723    IdealLoopTree *loop = get_loop(proj);
1723 1724    ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
1724 1725    int ddepth = dom_depth(proj);
1725 1726  
1726 1727    _igvn.rehash_node_delayed(iff);
1727 1728    _igvn.rehash_node_delayed(proj);
1728 1729  
1729 1730    proj->set_req(0, NULL);  // temporary disconnect
1730 1731    ProjNode* proj2 = proj_clone(proj, iff);
1731 1732    register_node(proj2, loop, iff, ddepth);
1732 1733  
1733 1734    RegionNode* reg = new (C)RegionNode(2);
1734 1735    reg->set_req(1, proj2);
1735 1736    register_node(reg, loop, iff, ddepth);
1736 1737  
1737 1738    IfNode* dum_if = new (C)IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt);
1738 1739    register_node(dum_if, loop, reg, ddepth);
1739 1740  
1740 1741    proj->set_req(0, dum_if); // reattach
1741 1742    set_idom(proj, dum_if, ddepth);
1742 1743  
1743 1744    ProjNode* dum_proj = proj_clone(other_proj, dum_if);
1744 1745    register_node(dum_proj, loop, dum_if, ddepth);
1745 1746  
1746 1747    return reg;
1747 1748  }
1748 1749  
1749 1750  //------------------------------ insert_cmpi_loop_exit -------------------------------------
1750 1751  // Clone a signed compare loop exit from an unsigned compare and
1751 1752  // insert it before the unsigned cmp on the stay-in-loop path.
1752 1753  // All new nodes inserted in the dominator tree between the original
1753 1754  // if and it's projections.  The original if test is replaced with
1754 1755  // a constant to force the stay-in-loop path.
1755 1756  //
1756 1757  // This is done to make sure that the original if and it's projections
1757 1758  // still dominate the same set of control nodes, that the ctrl() relation
1758 1759  // from data nodes to them is preserved, and that their loop nesting is
1759 1760  // preserved.
1760 1761  //
1761 1762  // before
1762 1763  //          if(i <u limit)    unsigned compare loop exit
1763 1764  //         /       |
1764 1765  //        v        v
1765 1766  //   exit-proj   stay-in-loop-proj
1766 1767  //
1767 1768  // after
1768 1769  //          if(stay-in-loop-const)  original if
1769 1770  //         /       |
1770 1771  //        /        v
1771 1772  //       /  if(i <  limit)    new signed test
1772 1773  //      /  /       |
1773 1774  //     /  /        v
1774 1775  //    /  /  if(i <u limit)    new cloned unsigned test
1775 1776  //   /  /   /      |
1776 1777  //   v  v  v       |
1777 1778  //    region       |
1778 1779  //        |        |
1779 1780  //      dum-if     |
1780 1781  //     /  |        |
1781 1782  // ether  |        |
1782 1783  //        v        v
1783 1784  //   exit-proj   stay-in-loop-proj
1784 1785  //
1785 1786  IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
1786 1787    const bool Signed   = true;
1787 1788    const bool Unsigned = false;
1788 1789  
1789 1790    BoolNode* bol = if_cmpu->in(1)->as_Bool();
1790 1791    if (bol->_test._test != BoolTest::lt) return NULL;
1791 1792    CmpNode* cmpu = bol->in(1)->as_Cmp();
1792 1793    if (cmpu->Opcode() != Op_CmpU) return NULL;
1793 1794    int stride = stride_of_possible_iv(if_cmpu);
1794 1795    if (stride == 0) return NULL;
1795 1796  
1796 1797    ProjNode* lp_continue = stay_in_loop(if_cmpu, loop)->as_Proj();
1797 1798    ProjNode* lp_exit     = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj();
1798 1799  
1799 1800    Node* limit = NULL;
1800 1801    if (stride > 0) {
1801 1802      limit = cmpu->in(2);
1802 1803    } else {
1803 1804      limit = _igvn.makecon(TypeInt::ZERO);
1804 1805      set_ctrl(limit, C->root());
1805 1806    }
1806 1807    // Create a new region on the exit path
1807 1808    RegionNode* reg = insert_region_before_proj(lp_exit);
1808 1809  
1809 1810    // Clone the if-cmpu-true-false using a signed compare
1810 1811    BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
1811 1812    ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
1812 1813    reg->add_req(cmpi_exit);
1813 1814  
1814 1815    // Clone the if-cmpu-true-false
1815 1816    BoolTest::mask rel_u = bol->_test._test;
1816 1817    ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue);
1817 1818    reg->add_req(cmpu_exit);
1818 1819  
1819 1820    // Force original if to stay in loop.
1820 1821    short_circuit_if(if_cmpu, lp_continue);
1821 1822  
1822 1823    return cmpi_exit->in(0)->as_If();
1823 1824  }
1824 1825  
1825 1826  //------------------------------ remove_cmpi_loop_exit -------------------------------------
1826 1827  // Remove a previously inserted signed compare loop exit.
1827 1828  void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) {
1828 1829    Node* lp_proj = stay_in_loop(if_cmp, loop);
1829 1830    assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI &&
1830 1831           stay_in_loop(lp_proj, loop)->is_If() &&
1831 1832           stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu");
1832 1833    Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO);
1833 1834    set_ctrl(con, C->root());
1834 1835    if_cmp->set_req(1, con);
1835 1836  }
1836 1837  
1837 1838  //------------------------------ scheduled_nodelist -------------------------------------
1838 1839  // Create a post order schedule of nodes that are in the
1839 1840  // "member" set.  The list is returned in "sched".
1840 1841  // The first node in "sched" is the loop head, followed by
1841 1842  // nodes which have no inputs in the "member" set, and then
1842 1843  // followed by the nodes that have an immediate input dependence
1843 1844  // on a node in "sched".
1844 1845  void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) {
1845 1846  
1846 1847    assert(member.test(loop->_head->_idx), "loop head must be in member set");
1847 1848    Arena *a = Thread::current()->resource_area();
1848 1849    VectorSet visited(a);
1849 1850    Node_Stack nstack(a, loop->_body.size());
1850 1851  
1851 1852    Node* n  = loop->_head;  // top of stack is cached in "n"
1852 1853    uint idx = 0;
1853 1854    visited.set(n->_idx);
1854 1855  
1855 1856    // Initially push all with no inputs from within member set
1856 1857    for(uint i = 0; i < loop->_body.size(); i++ ) {
1857 1858      Node *elt = loop->_body.at(i);
1858 1859      if (member.test(elt->_idx)) {
1859 1860        bool found = false;
1860 1861        for (uint j = 0; j < elt->req(); j++) {
1861 1862          Node* def = elt->in(j);
1862 1863          if (def && member.test(def->_idx) && def != elt) {
1863 1864            found = true;
1864 1865            break;
1865 1866          }
1866 1867        }
1867 1868        if (!found && elt != loop->_head) {
1868 1869          nstack.push(n, idx);
1869 1870          n = elt;
1870 1871          assert(!visited.test(n->_idx), "not seen yet");
1871 1872          visited.set(n->_idx);
1872 1873        }
1873 1874      }
1874 1875    }
1875 1876  
1876 1877    // traverse out's that are in the member set
1877 1878    while (true) {
1878 1879      if (idx < n->outcnt()) {
1879 1880        Node* use = n->raw_out(idx);
1880 1881        idx++;
1881 1882        if (!visited.test_set(use->_idx)) {
1882 1883          if (member.test(use->_idx)) {
1883 1884            nstack.push(n, idx);
1884 1885            n = use;
1885 1886            idx = 0;
1886 1887          }
1887 1888        }
1888 1889      } else {
1889 1890        // All outputs processed
1890 1891        sched.push(n);
1891 1892        if (nstack.is_empty()) break;
1892 1893        n   = nstack.node();
1893 1894        idx = nstack.index();
1894 1895        nstack.pop();
1895 1896      }
1896 1897    }
1897 1898  }
1898 1899  
1899 1900  
1900 1901  //------------------------------ has_use_in_set -------------------------------------
1901 1902  // Has a use in the vector set
1902 1903  bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) {
1903 1904    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1904 1905      Node* use = n->fast_out(j);
1905 1906      if (vset.test(use->_idx)) {
1906 1907        return true;
1907 1908      }
1908 1909    }
1909 1910    return false;
1910 1911  }
1911 1912  
1912 1913  
1913 1914  //------------------------------ has_use_internal_to_set -------------------------------------
1914 1915  // Has use internal to the vector set (ie. not in a phi at the loop head)
1915 1916  bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) {
1916 1917    Node* head  = loop->_head;
1917 1918    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1918 1919      Node* use = n->fast_out(j);
1919 1920      if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) {
1920 1921        return true;
1921 1922      }
1922 1923    }
1923 1924    return false;
1924 1925  }
1925 1926  
1926 1927  
1927 1928  //------------------------------ clone_for_use_outside_loop -------------------------------------
1928 1929  // clone "n" for uses that are outside of loop
1929 1930  int PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
1930 1931    int cloned = 0;
1931 1932    assert(worklist.size() == 0, "should be empty");
1932 1933    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1933 1934      Node* use = n->fast_out(j);
1934 1935      if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
1935 1936        worklist.push(use);
1936 1937      }
1937 1938    }
1938 1939    while( worklist.size() ) {
1939 1940      Node *use = worklist.pop();
1940 1941      if (!has_node(use) || use->in(0) == C->top()) continue;
1941 1942      uint j;
1942 1943      for (j = 0; j < use->req(); j++) {
1943 1944        if (use->in(j) == n) break;
1944 1945      }
1945 1946      assert(j < use->req(), "must be there");
1946 1947  
1947 1948      // clone "n" and insert it between the inputs of "n" and the use outside the loop
1948 1949      Node* n_clone = n->clone();
1949 1950      _igvn.replace_input_of(use, j, n_clone);
1950 1951      cloned++;
1951 1952      Node* use_c;
1952 1953      if (!use->is_Phi()) {
1953 1954        use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
1954 1955      } else {
1955 1956        // Use in a phi is considered a use in the associated predecessor block
1956 1957        use_c = use->in(0)->in(j);
1957 1958      }
1958 1959      set_ctrl(n_clone, use_c);
1959 1960      assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
1960 1961      get_loop(use_c)->_body.push(n_clone);
1961 1962      _igvn.register_new_node_with_optimizer(n_clone);
1962 1963  #if !defined(PRODUCT)
1963 1964      if (TracePartialPeeling) {
1964 1965        tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
1965 1966      }
1966 1967  #endif
1967 1968    }
1968 1969    return cloned;
1969 1970  }
1970 1971  
1971 1972  
1972 1973  //------------------------------ clone_for_special_use_inside_loop -------------------------------------
1973 1974  // clone "n" for special uses that are in the not_peeled region.
1974 1975  // If these def-uses occur in separate blocks, the code generator
1975 1976  // marks the method as not compilable.  For example, if a "BoolNode"
1976 1977  // is in a different basic block than the "IfNode" that uses it, then
1977 1978  // the compilation is aborted in the code generator.
1978 1979  void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
1979 1980                                                          VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
1980 1981    if (n->is_Phi() || n->is_Load()) {
1981 1982      return;
1982 1983    }
1983 1984    assert(worklist.size() == 0, "should be empty");
1984 1985    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1985 1986      Node* use = n->fast_out(j);
1986 1987      if ( not_peel.test(use->_idx) &&
1987 1988           (use->is_If() || use->is_CMove() || use->is_Bool()) &&
1988 1989           use->in(1) == n)  {
1989 1990        worklist.push(use);
1990 1991      }
1991 1992    }
1992 1993    if (worklist.size() > 0) {
1993 1994      // clone "n" and insert it between inputs of "n" and the use
1994 1995      Node* n_clone = n->clone();
1995 1996      loop->_body.push(n_clone);
1996 1997      _igvn.register_new_node_with_optimizer(n_clone);
1997 1998      set_ctrl(n_clone, get_ctrl(n));
1998 1999      sink_list.push(n_clone);
1999 2000      not_peel <<= n_clone->_idx;  // add n_clone to not_peel set.
2000 2001  #if !defined(PRODUCT)
2001 2002      if (TracePartialPeeling) {
2002 2003        tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx);
2003 2004      }
2004 2005  #endif
2005 2006      while( worklist.size() ) {
2006 2007        Node *use = worklist.pop();
2007 2008        _igvn.rehash_node_delayed(use);
2008 2009        for (uint j = 1; j < use->req(); j++) {
2009 2010          if (use->in(j) == n) {
2010 2011            use->set_req(j, n_clone);
2011 2012          }
2012 2013        }
2013 2014      }
2014 2015    }
2015 2016  }
2016 2017  
2017 2018  
2018 2019  //------------------------------ insert_phi_for_loop -------------------------------------
2019 2020  // Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
2020 2021  void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) {
2021 2022    Node *phi = PhiNode::make(lp, back_edge_val);
2022 2023    phi->set_req(LoopNode::EntryControl, lp_entry_val);
2023 2024    // Use existing phi if it already exists
2024 2025    Node *hit = _igvn.hash_find_insert(phi);
2025 2026    if( hit == NULL ) {
2026 2027      _igvn.register_new_node_with_optimizer(phi);
2027 2028      set_ctrl(phi, lp);
2028 2029    } else {
2029 2030      // Remove the new phi from the graph and use the hit
2030 2031      _igvn.remove_dead_node(phi);
2031 2032      phi = hit;
2032 2033    }
2033 2034    _igvn.replace_input_of(use, idx, phi);
2034 2035  }
2035 2036  
2036 2037  #ifdef ASSERT
2037 2038  //------------------------------ is_valid_loop_partition -------------------------------------
2038 2039  // Validate the loop partition sets: peel and not_peel
2039 2040  bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list,
2040 2041                                                VectorSet& not_peel ) {
2041 2042    uint i;
2042 2043    // Check that peel_list entries are in the peel set
2043 2044    for (i = 0; i < peel_list.size(); i++) {
2044 2045      if (!peel.test(peel_list.at(i)->_idx)) {
2045 2046        return false;
2046 2047      }
2047 2048    }
2048 2049    // Check at loop members are in one of peel set or not_peel set
2049 2050    for (i = 0; i < loop->_body.size(); i++ ) {
2050 2051      Node *def  = loop->_body.at(i);
2051 2052      uint di = def->_idx;
2052 2053      // Check that peel set elements are in peel_list
2053 2054      if (peel.test(di)) {
2054 2055        if (not_peel.test(di)) {
2055 2056          return false;
2056 2057        }
2057 2058        // Must be in peel_list also
2058 2059        bool found = false;
2059 2060        for (uint j = 0; j < peel_list.size(); j++) {
2060 2061          if (peel_list.at(j)->_idx == di) {
2061 2062            found = true;
2062 2063            break;
2063 2064          }
2064 2065        }
2065 2066        if (!found) {
2066 2067          return false;
2067 2068        }
2068 2069      } else if (not_peel.test(di)) {
2069 2070        if (peel.test(di)) {
2070 2071          return false;
2071 2072        }
2072 2073      } else {
2073 2074        return false;
2074 2075      }
2075 2076    }
2076 2077    return true;
2077 2078  }
2078 2079  
2079 2080  //------------------------------ is_valid_clone_loop_exit_use -------------------------------------
2080 2081  // Ensure a use outside of loop is of the right form
2081 2082  bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) {
2082 2083    Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2083 2084    return (use->is_Phi() &&
2084 2085            use_c->is_Region() && use_c->req() == 3 &&
2085 2086            (use_c->in(exit_idx)->Opcode() == Op_IfTrue ||
2086 2087             use_c->in(exit_idx)->Opcode() == Op_IfFalse ||
2087 2088             use_c->in(exit_idx)->Opcode() == Op_JumpProj) &&
2088 2089            loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) );
2089 2090  }
2090 2091  
2091 2092  //------------------------------ is_valid_clone_loop_form -------------------------------------
2092 2093  // Ensure that all uses outside of loop are of the right form
2093 2094  bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
2094 2095                                                 uint orig_exit_idx, uint clone_exit_idx) {
2095 2096    uint len = peel_list.size();
2096 2097    for (uint i = 0; i < len; i++) {
2097 2098      Node *def = peel_list.at(i);
2098 2099  
2099 2100      for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2100 2101        Node *use = def->fast_out(j);
2101 2102        Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2102 2103        if (!loop->is_member(get_loop(use_c))) {
2103 2104          // use is not in the loop, check for correct structure
2104 2105          if (use->in(0) == def) {
2105 2106            // Okay
2106 2107          } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) {
2107 2108            return false;
2108 2109          }
2109 2110        }
2110 2111      }
2111 2112    }
2112 2113    return true;
2113 2114  }
2114 2115  #endif
2115 2116  
2116 2117  //------------------------------ partial_peel -------------------------------------
2117 2118  // Partially peel (aka loop rotation) the top portion of a loop (called
2118 2119  // the peel section below) by cloning it and placing one copy just before
2119 2120  // the new loop head and the other copy at the bottom of the new loop.
2120 2121  //
2121 2122  //    before                       after                where it came from
2122 2123  //
2123 2124  //    stmt1                        stmt1
2124 2125  //  loop:                          stmt2                     clone
2125 2126  //    stmt2                        if condA goto exitA       clone
2126 2127  //    if condA goto exitA        new_loop:                   new
2127 2128  //    stmt3                        stmt3                     clone
2128 2129  //    if !condB goto loop          if condB goto exitB       clone
2129 2130  //  exitB:                         stmt2                     orig
2130 2131  //    stmt4                        if !condA goto new_loop   orig
2131 2132  //  exitA:                         goto exitA
2132 2133  //                               exitB:
2133 2134  //                                 stmt4
2134 2135  //                               exitA:
2135 2136  //
2136 2137  // Step 1: find the cut point: an exit test on probable
2137 2138  //         induction variable.
2138 2139  // Step 2: schedule (with cloning) operations in the peel
2139 2140  //         section that can be executed after the cut into
2140 2141  //         the section that is not peeled.  This may need
2141 2142  //         to clone operations into exit blocks.  For
2142 2143  //         instance, a reference to A[i] in the not-peel
2143 2144  //         section and a reference to B[i] in an exit block
2144 2145  //         may cause a left-shift of i by 2 to be placed
2145 2146  //         in the peel block.  This step will clone the left
2146 2147  //         shift into the exit block and sink the left shift
2147 2148  //         from the peel to the not-peel section.
2148 2149  // Step 3: clone the loop, retarget the control, and insert
2149 2150  //         phis for values that are live across the new loop
2150 2151  //         head.  This is very dependent on the graph structure
2151 2152  //         from clone_loop.  It creates region nodes for
2152 2153  //         exit control and associated phi nodes for values
2153 2154  //         flow out of the loop through that exit.  The region
2154 2155  //         node is dominated by the clone's control projection.
2155 2156  //         So the clone's peel section is placed before the
2156 2157  //         new loop head, and the clone's not-peel section is
2157 2158  //         forms the top part of the new loop.  The original
2158 2159  //         peel section forms the tail of the new loop.
2159 2160  // Step 4: update the dominator tree and recompute the
2160 2161  //         dominator depth.
2161 2162  //
2162 2163  //                   orig
2163 2164  //
2164 2165  //                   stmt1
2165 2166  //                     |
2166 2167  //                     v
2167 2168  //               loop predicate
2168 2169  //                     |
2169 2170  //                     v
2170 2171  //                   loop<----+
2171 2172  //                     |      |
2172 2173  //                   stmt2    |
2173 2174  //                     |      |
2174 2175  //                     v      |
2175 2176  //                    ifA     |
2176 2177  //                   / |      |
2177 2178  //                  v  v      |
2178 2179  //               false true   ^  <-- last_peel
2179 2180  //               /     |      |
2180 2181  //              /   ===|==cut |
2181 2182  //             /     stmt3    |  <-- first_not_peel
2182 2183  //            /        |      |
2183 2184  //            |        v      |
2184 2185  //            v       ifB     |
2185 2186  //          exitA:   / \      |
2186 2187  //                  /   \     |
2187 2188  //                 v     v    |
2188 2189  //               false true   |
2189 2190  //               /       \    |
2190 2191  //              /         ----+
2191 2192  //             |
2192 2193  //             v
2193 2194  //           exitB:
2194 2195  //           stmt4
2195 2196  //
2196 2197  //
2197 2198  //            after clone loop
2198 2199  //
2199 2200  //                   stmt1
2200 2201  //                     |
2201 2202  //                     v
2202 2203  //               loop predicate
2203 2204  //                 /       \
2204 2205  //        clone   /         \   orig
2205 2206  //               /           \
2206 2207  //              /             \
2207 2208  //             v               v
2208 2209  //   +---->loop                loop<----+
2209 2210  //   |      |                    |      |
2210 2211  //   |    stmt2                stmt2    |
2211 2212  //   |      |                    |      |
2212 2213  //   |      v                    v      |
2213 2214  //   |      ifA                 ifA     |
2214 2215  //   |      | \                / |      |
2215 2216  //   |      v  v              v  v      |
2216 2217  //   ^    true  false      false true   ^  <-- last_peel
2217 2218  //   |      |   ^   \       /    |      |
2218 2219  //   | cut==|==  \   \     /  ===|==cut |
2219 2220  //   |    stmt3   \   \   /    stmt3    |  <-- first_not_peel
2220 2221  //   |      |    dom   | |       |      |
2221 2222  //   |      v      \  1v v2      v      |
2222 2223  //   |      ifB     regionA     ifB     |
2223 2224  //   |      / \        |       / \      |
2224 2225  //   |     /   \       v      /   \     |
2225 2226  //   |    v     v    exitA:  v     v    |
2226 2227  //   |    true  false      false true   |
2227 2228  //   |    /     ^   \      /       \    |
2228 2229  //   +----       \   \    /         ----+
2229 2230  //               dom  \  /
2230 2231  //                 \  1v v2
2231 2232  //                  regionB
2232 2233  //                     |
2233 2234  //                     v
2234 2235  //                   exitB:
2235 2236  //                   stmt4
2236 2237  //
2237 2238  //
2238 2239  //           after partial peel
2239 2240  //
2240 2241  //                  stmt1
2241 2242  //                     |
2242 2243  //                     v
2243 2244  //               loop predicate
2244 2245  //                 /
2245 2246  //        clone   /             orig
2246 2247  //               /          TOP
2247 2248  //              /             \
2248 2249  //             v               v
2249 2250  //    TOP->loop                loop----+
2250 2251  //          |                    |      |
2251 2252  //        stmt2                stmt2    |
2252 2253  //          |                    |      |
2253 2254  //          v                    v      |
2254 2255  //          ifA                 ifA     |
2255 2256  //          | \                / |      |
2256 2257  //          v  v              v  v      |
2257 2258  //        true  false      false true   |     <-- last_peel
2258 2259  //          |   ^   \       /    +------|---+
2259 2260  //  +->newloop   \   \     /  === ==cut |   |
2260 2261  //  |     stmt3   \   \   /     TOP     |   |
2261 2262  //  |       |    dom   | |      stmt3   |   | <-- first_not_peel
2262 2263  //  |       v      \  1v v2      v      |   |
2263 2264  //  |       ifB     regionA     ifB     ^   v
2264 2265  //  |       / \        |       / \      |   |
2265 2266  //  |      /   \       v      /   \     |   |
2266 2267  //  |     v     v    exitA:  v     v    |   |
2267 2268  //  |     true  false      false true   |   |
2268 2269  //  |     /     ^   \      /       \    |   |
2269 2270  //  |    |       \   \    /         v   |   |
2270 2271  //  |    |       dom  \  /         TOP  |   |
2271 2272  //  |    |         \  1v v2             |   |
2272 2273  //  ^    v          regionB             |   |
2273 2274  //  |    |             |                |   |
2274 2275  //  |    |             v                ^   v
2275 2276  //  |    |           exitB:             |   |
2276 2277  //  |    |           stmt4              |   |
2277 2278  //  |    +------------>-----------------+   |
2278 2279  //  |                                       |
2279 2280  //  +-----------------<---------------------+
2280 2281  //
2281 2282  //
2282 2283  //              final graph
2283 2284  //
2284 2285  //                  stmt1
2285 2286  //                    |
2286 2287  //                    v
2287 2288  //               loop predicate
2288 2289  //                    |
2289 2290  //                    v
2290 2291  //                  stmt2 clone
2291 2292  //                    |
2292 2293  //                    v
2293 2294  //         ........> ifA clone
2294 2295  //         :        / |
2295 2296  //        dom      /  |
2296 2297  //         :      v   v
2297 2298  //         :  false   true
2298 2299  //         :  |       |
2299 2300  //         :  |       v
2300 2301  //         :  |    newloop<-----+
2301 2302  //         :  |        |        |
2302 2303  //         :  |     stmt3 clone |
2303 2304  //         :  |        |        |
2304 2305  //         :  |        v        |
2305 2306  //         :  |       ifB       |
2306 2307  //         :  |      / \        |
2307 2308  //         :  |     v   v       |
2308 2309  //         :  |  false true     |
2309 2310  //         :  |   |     |       |
2310 2311  //         :  |   v    stmt2    |
2311 2312  //         :  | exitB:  |       |
2312 2313  //         :  | stmt4   v       |
2313 2314  //         :  |       ifA orig  |
2314 2315  //         :  |      /  \       |
2315 2316  //         :  |     /    \      |
2316 2317  //         :  |    v     v      |
2317 2318  //         :  |  false  true    |
2318 2319  //         :  |  /        \     |
2319 2320  //         :  v  v         -----+
2320 2321  //          RegionA
2321 2322  //             |
2322 2323  //             v
2323 2324  //           exitA
2324 2325  //
2325 2326  bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
2326 2327  
2327 2328    assert(!loop->_head->is_CountedLoop(), "Non-counted loop only");
2328 2329    if (!loop->_head->is_Loop()) {
2329 2330      return false;  }
2330 2331  
2331 2332    LoopNode *head  = loop->_head->as_Loop();
2332 2333  
2333 2334    if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
2334 2335      return false;
2335 2336    }
2336 2337  
2337 2338    // Check for complex exit control
2338 2339    for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
2339 2340      Node *n = loop->_body.at(ii);
2340 2341      int opc = n->Opcode();
2341 2342      if (n->is_Call()        ||
2342 2343          opc == Op_Catch     ||
2343 2344          opc == Op_CatchProj ||
2344 2345          opc == Op_Jump      ||
2345 2346          opc == Op_JumpProj) {
2346 2347  #if !defined(PRODUCT)
2347 2348        if (TracePartialPeeling) {
2348 2349          tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
2349 2350        }
2350 2351  #endif
2351 2352        return false;
2352 2353      }
2353 2354    }
2354 2355  
2355 2356    int dd = dom_depth(head);
2356 2357  
2357 2358    // Step 1: find cut point
2358 2359  
2359 2360    // Walk up dominators to loop head looking for first loop exit
2360 2361    // which is executed on every path thru loop.
2361 2362    IfNode *peel_if = NULL;
2362 2363    IfNode *peel_if_cmpu = NULL;
2363 2364  
2364 2365    Node *iff = loop->tail();
2365 2366    while( iff != head ) {
2366 2367      if( iff->is_If() ) {
2367 2368        Node *ctrl = get_ctrl(iff->in(1));
2368 2369        if (ctrl->is_top()) return false; // Dead test on live IF.
2369 2370        // If loop-varying exit-test, check for induction variable
2370 2371        if( loop->is_member(get_loop(ctrl)) &&
2371 2372            loop->is_loop_exit(iff) &&
2372 2373            is_possible_iv_test(iff)) {
2373 2374          Node* cmp = iff->in(1)->in(1);
2374 2375          if (cmp->Opcode() == Op_CmpI) {
2375 2376            peel_if = iff->as_If();
2376 2377          } else {
2377 2378            assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
2378 2379            peel_if_cmpu = iff->as_If();
2379 2380          }
2380 2381        }
2381 2382      }
2382 2383      iff = idom(iff);
2383 2384    }
2384 2385    // Prefer signed compare over unsigned compare.
2385 2386    IfNode* new_peel_if = NULL;
2386 2387    if (peel_if == NULL) {
2387 2388      if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
2388 2389        return false;   // No peel point found
2389 2390      }
2390 2391      new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
2391 2392      if (new_peel_if == NULL) {
2392 2393        return false;   // No peel point found
2393 2394      }
2394 2395      peel_if = new_peel_if;
2395 2396    }
2396 2397    Node* last_peel        = stay_in_loop(peel_if, loop);
2397 2398    Node* first_not_peeled = stay_in_loop(last_peel, loop);
2398 2399    if (first_not_peeled == NULL || first_not_peeled == head) {
2399 2400      return false;
2400 2401    }
2401 2402  
2402 2403  #if !defined(PRODUCT)
2403 2404    if (TraceLoopOpts) {
2404 2405      tty->print("PartialPeel  ");
2405 2406      loop->dump_head();
2406 2407    }
2407 2408  
2408 2409    if (TracePartialPeeling) {
2409 2410      tty->print_cr("before partial peel one iteration");
2410 2411      Node_List wl;
2411 2412      Node* t = head->in(2);
2412 2413      while (true) {
2413 2414        wl.push(t);
2414 2415        if (t == head) break;
2415 2416        t = idom(t);
2416 2417      }
2417 2418      while (wl.size() > 0) {
2418 2419        Node* tt = wl.pop();
2419 2420        tt->dump();
2420 2421        if (tt == last_peel) tty->print_cr("-- cut --");
2421 2422      }
2422 2423    }
2423 2424  #endif
2424 2425    ResourceArea *area = Thread::current()->resource_area();
2425 2426    VectorSet peel(area);
2426 2427    VectorSet not_peel(area);
2427 2428    Node_List peel_list(area);
2428 2429    Node_List worklist(area);
2429 2430    Node_List sink_list(area);
2430 2431  
2431 2432    // Set of cfg nodes to peel are those that are executable from
2432 2433    // the head through last_peel.
2433 2434    assert(worklist.size() == 0, "should be empty");
2434 2435    worklist.push(head);
2435 2436    peel.set(head->_idx);
2436 2437    while (worklist.size() > 0) {
2437 2438      Node *n = worklist.pop();
2438 2439      if (n != last_peel) {
2439 2440        for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2440 2441          Node* use = n->fast_out(j);
2441 2442          if (use->is_CFG() &&
2442 2443              loop->is_member(get_loop(use)) &&
2443 2444              !peel.test_set(use->_idx)) {
2444 2445            worklist.push(use);
2445 2446          }
2446 2447        }
2447 2448      }
2448 2449    }
2449 2450  
2450 2451    // Set of non-cfg nodes to peel are those that are control
2451 2452    // dependent on the cfg nodes.
2452 2453    uint i;
2453 2454    for(i = 0; i < loop->_body.size(); i++ ) {
2454 2455      Node *n = loop->_body.at(i);
2455 2456      Node *n_c = has_ctrl(n) ? get_ctrl(n) : n;
2456 2457      if (peel.test(n_c->_idx)) {
2457 2458        peel.set(n->_idx);
2458 2459      } else {
2459 2460        not_peel.set(n->_idx);
2460 2461      }
2461 2462    }
2462 2463  
2463 2464    // Step 2: move operations from the peeled section down into the
2464 2465    //         not-peeled section
2465 2466  
2466 2467    // Get a post order schedule of nodes in the peel region
2467 2468    // Result in right-most operand.
2468 2469    scheduled_nodelist(loop, peel, peel_list );
2469 2470  
2470 2471    assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
2471 2472  
2472 2473    // For future check for too many new phis
2473 2474    uint old_phi_cnt = 0;
2474 2475    for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
2475 2476      Node* use = head->fast_out(j);
2476 2477      if (use->is_Phi()) old_phi_cnt++;
2477 2478    }
2478 2479  
2479 2480  #if !defined(PRODUCT)
2480 2481    if (TracePartialPeeling) {
2481 2482      tty->print_cr("\npeeled list");
2482 2483    }
2483 2484  #endif
2484 2485  
2485 2486    // Evacuate nodes in peel region into the not_peeled region if possible
2486 2487    uint new_phi_cnt = 0;
2487 2488    uint cloned_for_outside_use = 0;
2488 2489    for (i = 0; i < peel_list.size();) {
2489 2490      Node* n = peel_list.at(i);
2490 2491  #if !defined(PRODUCT)
2491 2492    if (TracePartialPeeling) n->dump();
2492 2493  #endif
2493 2494      bool incr = true;
2494 2495      if ( !n->is_CFG() ) {
2495 2496  
2496 2497        if ( has_use_in_set(n, not_peel) ) {
2497 2498  
2498 2499          // If not used internal to the peeled region,
2499 2500          // move "n" from peeled to not_peeled region.
2500 2501  
2501 2502          if ( !has_use_internal_to_set(n, peel, loop) ) {
2502 2503  
2503 2504            // if not pinned and not a load (which maybe anti-dependent on a store)
2504 2505            // and not a CMove (Matcher expects only bool->cmove).
2505 2506            if ( n->in(0) == NULL && !n->is_Load() && !n->is_CMove() ) {
2506 2507              cloned_for_outside_use += clone_for_use_outside_loop( loop, n, worklist );
2507 2508              sink_list.push(n);
2508 2509              peel     >>= n->_idx; // delete n from peel set.
2509 2510              not_peel <<= n->_idx; // add n to not_peel set.
2510 2511              peel_list.remove(i);
2511 2512              incr = false;
2512 2513  #if !defined(PRODUCT)
2513 2514              if (TracePartialPeeling) {
2514 2515                tty->print_cr("sink to not_peeled region: %d newbb: %d",
2515 2516                              n->_idx, get_ctrl(n)->_idx);
2516 2517              }
2517 2518  #endif
2518 2519            }
2519 2520          } else {
2520 2521            // Otherwise check for special def-use cases that span
2521 2522            // the peel/not_peel boundary such as bool->if
2522 2523            clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist );
2523 2524            new_phi_cnt++;
2524 2525          }
2525 2526        }
2526 2527      }
2527 2528      if (incr) i++;
2528 2529    }
2529 2530  
2530 2531    if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) {
2531 2532  #if !defined(PRODUCT)
2532 2533      if (TracePartialPeeling) {
2533 2534        tty->print_cr("\nToo many new phis: %d  old %d new cmpi: %c",
2534 2535                      new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F');
2535 2536      }
2536 2537  #endif
2537 2538      if (new_peel_if != NULL) {
2538 2539        remove_cmpi_loop_exit(new_peel_if, loop);
2539 2540      }
2540 2541      // Inhibit more partial peeling on this loop
2541 2542      assert(!head->is_partial_peel_loop(), "not partial peeled");
2542 2543      head->mark_partial_peel_failed();
2543 2544      if (cloned_for_outside_use > 0) {
2544 2545        // Terminate this round of loop opts because
2545 2546        // the graph outside this loop was changed.
2546 2547        C->set_major_progress();
2547 2548        return true;
2548 2549      }
2549 2550      return false;
2550 2551    }
2551 2552  
2552 2553    // Step 3: clone loop, retarget control, and insert new phis
2553 2554  
2554 2555    // Create new loop head for new phis and to hang
2555 2556    // the nodes being moved (sinked) from the peel region.
2556 2557    LoopNode* new_head = new (C) LoopNode(last_peel, last_peel);
2557 2558    new_head->set_unswitch_count(head->unswitch_count()); // Preserve
2558 2559    _igvn.register_new_node_with_optimizer(new_head);
2559 2560    assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled");
2560 2561    first_not_peeled->set_req(0, new_head);
2561 2562    set_loop(new_head, loop);
2562 2563    loop->_body.push(new_head);
2563 2564    not_peel.set(new_head->_idx);
2564 2565    set_idom(new_head, last_peel, dom_depth(first_not_peeled));
2565 2566    set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled));
2566 2567  
2567 2568    while (sink_list.size() > 0) {
2568 2569      Node* n = sink_list.pop();
2569 2570      set_ctrl(n, new_head);
2570 2571    }
2571 2572  
2572 2573    assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
2573 2574  
2574 2575    clone_loop( loop, old_new, dd );
2575 2576  
2576 2577    const uint clone_exit_idx = 1;
2577 2578    const uint orig_exit_idx  = 2;
2578 2579    assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop");
2579 2580  
2580 2581    Node* head_clone             = old_new[head->_idx];
2581 2582    LoopNode* new_head_clone     = old_new[new_head->_idx]->as_Loop();
2582 2583    Node* orig_tail_clone        = head_clone->in(2);
2583 2584  
2584 2585    // Add phi if "def" node is in peel set and "use" is not
2585 2586  
2586 2587    for(i = 0; i < peel_list.size(); i++ ) {
2587 2588      Node *def  = peel_list.at(i);
2588 2589      if (!def->is_CFG()) {
2589 2590        for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2590 2591          Node *use = def->fast_out(j);
2591 2592          if (has_node(use) && use->in(0) != C->top() &&
2592 2593              (!peel.test(use->_idx) ||
2593 2594               (use->is_Phi() && use->in(0) == head)) ) {
2594 2595            worklist.push(use);
2595 2596          }
2596 2597        }
2597 2598        while( worklist.size() ) {
2598 2599          Node *use = worklist.pop();
2599 2600          for (uint j = 1; j < use->req(); j++) {
2600 2601            Node* n = use->in(j);
2601 2602            if (n == def) {
2602 2603  
2603 2604              // "def" is in peel set, "use" is not in peel set
2604 2605              // or "use" is in the entry boundary (a phi) of the peel set
2605 2606  
2606 2607              Node* use_c = has_ctrl(use) ? get_ctrl(use) : use;
2607 2608  
2608 2609              if ( loop->is_member(get_loop( use_c )) ) {
2609 2610                // use is in loop
2610 2611                if (old_new[use->_idx] != NULL) { // null for dead code
2611 2612                  Node* use_clone = old_new[use->_idx];
2612 2613                  _igvn.replace_input_of(use, j, C->top());
2613 2614                  insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone );
2614 2615                }
2615 2616              } else {
2616 2617                assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format");
2617 2618                // use is not in the loop, check if the live range includes the cut
2618 2619                Node* lp_if = use_c->in(orig_exit_idx)->in(0);
2619 2620                if (not_peel.test(lp_if->_idx)) {
2620 2621                  assert(j == orig_exit_idx, "use from original loop");
2621 2622                  insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone );
2622 2623                }
2623 2624              }
2624 2625            }
2625 2626          }
2626 2627        }
2627 2628      }
2628 2629    }
2629 2630  
2630 2631    // Step 3b: retarget control
2631 2632  
2632 2633    // Redirect control to the new loop head if a cloned node in
2633 2634    // the not_peeled region has control that points into the peeled region.
2634 2635    // This necessary because the cloned peeled region will be outside
2635 2636    // the loop.
2636 2637    //                            from    to
2637 2638    //          cloned-peeled    <---+
2638 2639    //    new_head_clone:            |    <--+
2639 2640    //          cloned-not_peeled  in(0)    in(0)
2640 2641    //          orig-peeled
2641 2642  
2642 2643    for(i = 0; i < loop->_body.size(); i++ ) {
2643 2644      Node *n = loop->_body.at(i);
2644 2645      if (!n->is_CFG()           && n->in(0) != NULL        &&
2645 2646          not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) {
2646 2647        Node* n_clone = old_new[n->_idx];
2647 2648        _igvn.replace_input_of(n_clone, 0, new_head_clone);
2648 2649      }
2649 2650    }
2650 2651  
2651 2652    // Backedge of the surviving new_head (the clone) is original last_peel
2652 2653    _igvn.replace_input_of(new_head_clone, LoopNode::LoopBackControl, last_peel);
2653 2654  
2654 2655    // Cut first node in original not_peel set
2655 2656    _igvn.rehash_node_delayed(new_head);                     // Multiple edge updates:
2656 2657    new_head->set_req(LoopNode::EntryControl,    C->top());  //   use rehash_node_delayed / set_req instead of
2657 2658    new_head->set_req(LoopNode::LoopBackControl, C->top());  //   multiple replace_input_of calls
2658 2659  
2659 2660    // Copy head_clone back-branch info to original head
2660 2661    // and remove original head's loop entry and
2661 2662    // clone head's back-branch
2662 2663    _igvn.rehash_node_delayed(head); // Multiple edge updates
2663 2664    head->set_req(LoopNode::EntryControl,    head_clone->in(LoopNode::LoopBackControl));
2664 2665    head->set_req(LoopNode::LoopBackControl, C->top());
2665 2666    _igvn.replace_input_of(head_clone, LoopNode::LoopBackControl, C->top());
2666 2667  
2667 2668    // Similarly modify the phis
2668 2669    for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) {
2669 2670      Node* use = head->fast_out(k);
2670 2671      if (use->is_Phi() && use->outcnt() > 0) {
2671 2672        Node* use_clone = old_new[use->_idx];
2672 2673        _igvn.rehash_node_delayed(use); // Multiple edge updates
2673 2674        use->set_req(LoopNode::EntryControl,    use_clone->in(LoopNode::LoopBackControl));
2674 2675        use->set_req(LoopNode::LoopBackControl, C->top());
2675 2676        _igvn.replace_input_of(use_clone, LoopNode::LoopBackControl, C->top());
2676 2677      }
2677 2678    }
2678 2679  
2679 2680    // Step 4: update dominator tree and dominator depth
2680 2681  
2681 2682    set_idom(head, orig_tail_clone, dd);
2682 2683    recompute_dom_depth();
2683 2684  
2684 2685    // Inhibit more partial peeling on this loop
2685 2686    new_head_clone->set_partial_peel_loop();
2686 2687    C->set_major_progress();
2687 2688    loop->record_for_igvn();
2688 2689  
2689 2690  #if !defined(PRODUCT)
2690 2691    if (TracePartialPeeling) {
2691 2692      tty->print_cr("\nafter partial peel one iteration");
2692 2693      Node_List wl(area);
2693 2694      Node* t = last_peel;
2694 2695      while (true) {
2695 2696        wl.push(t);
2696 2697        if (t == head_clone) break;
2697 2698        t = idom(t);
2698 2699      }
2699 2700      while (wl.size() > 0) {
2700 2701        Node* tt = wl.pop();
2701 2702        if (tt == head) tty->print_cr("orig head");
2702 2703        else if (tt == new_head_clone) tty->print_cr("new head");
2703 2704        else if (tt == head_clone) tty->print_cr("clone head");
2704 2705        tt->dump();
2705 2706      }
2706 2707    }
2707 2708  #endif
2708 2709    return true;
2709 2710  }
2710 2711  
2711 2712  //------------------------------reorg_offsets----------------------------------
2712 2713  // Reorganize offset computations to lower register pressure.  Mostly
2713 2714  // prevent loop-fallout uses of the pre-incremented trip counter (which are
2714 2715  // then alive with the post-incremented trip counter forcing an extra
2715 2716  // register move)
2716 2717  void PhaseIdealLoop::reorg_offsets(IdealLoopTree *loop) {
2717 2718    // Perform it only for canonical counted loops.
2718 2719    // Loop's shape could be messed up by iteration_split_impl.
2719 2720    if (!loop->_head->is_CountedLoop())
2720 2721      return;
2721 2722    if (!loop->_head->as_Loop()->is_valid_counted_loop())
2722 2723      return;
2723 2724  
2724 2725    CountedLoopNode *cl = loop->_head->as_CountedLoop();
2725 2726    CountedLoopEndNode *cle = cl->loopexit();
2726 2727    Node *exit = cle->proj_out(false);
2727 2728    Node *phi = cl->phi();
2728 2729  
2729 2730    // Check for the special case of folks using the pre-incremented
2730 2731    // trip-counter on the fall-out path (forces the pre-incremented
2731 2732    // and post-incremented trip counter to be live at the same time).
2732 2733    // Fix this by adjusting to use the post-increment trip counter.
2733 2734  
2734 2735    bool progress = true;
2735 2736    while (progress) {
2736 2737      progress = false;
2737 2738      for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
2738 2739        Node* use = phi->fast_out(i);   // User of trip-counter
2739 2740        if (!has_ctrl(use))  continue;
2740 2741        Node *u_ctrl = get_ctrl(use);
2741 2742        if (use->is_Phi()) {
2742 2743          u_ctrl = NULL;
2743 2744          for (uint j = 1; j < use->req(); j++)
2744 2745            if (use->in(j) == phi)
2745 2746              u_ctrl = dom_lca(u_ctrl, use->in(0)->in(j));
2746 2747        }
2747 2748        IdealLoopTree *u_loop = get_loop(u_ctrl);
2748 2749        // Look for loop-invariant use
2749 2750        if (u_loop == loop) continue;
2750 2751        if (loop->is_member(u_loop)) continue;
2751 2752        // Check that use is live out the bottom.  Assuming the trip-counter
2752 2753        // update is right at the bottom, uses of of the loop middle are ok.
2753 2754        if (dom_lca(exit, u_ctrl) != exit) continue;
2754 2755        // Hit!  Refactor use to use the post-incremented tripcounter.
2755 2756        // Compute a post-increment tripcounter.
2756 2757        Node *opaq = new (C) Opaque2Node( C, cle->incr() );
2757 2758        register_new_node(opaq, exit);
2758 2759        Node *neg_stride = _igvn.intcon(-cle->stride_con());
2759 2760        set_ctrl(neg_stride, C->root());
2760 2761        Node *post = new (C) AddINode( opaq, neg_stride);
2761 2762        register_new_node(post, exit);
2762 2763        _igvn.rehash_node_delayed(use);
2763 2764        for (uint j = 1; j < use->req(); j++) {
2764 2765          if (use->in(j) == phi)
2765 2766            use->set_req(j, post);
2766 2767        }
2767 2768        // Since DU info changed, rerun loop
2768 2769        progress = true;
2769 2770        break;
2770 2771      }
2771 2772    }
2772 2773  
2773 2774  }

↓ open down ↓

1367 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX