New src/share/vm/opto/loopopts.cpp

   1 /*
   2  * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "memory/allocation.inline.hpp"
  27 #include "opto/addnode.hpp"
  28 #include "opto/connode.hpp"
  29 #include "opto/divnode.hpp"
  30 #include "opto/loopnode.hpp"
  31 #include "opto/matcher.hpp"
  32 #include "opto/mulnode.hpp"
  33 #include "opto/movenode.hpp"
  34 #include "opto/opaquenode.hpp"
  35 #include "opto/rootnode.hpp"
  36 #include "opto/shenandoahSupport.hpp"
  37 #include "opto/subnode.hpp"
  38 
  39 //=============================================================================
  40 //------------------------------split_thru_phi---------------------------------
  41 // Split Node 'n' through merge point if there is enough win.
  42 Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
  43   if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) {
  44     // ConvI2L may have type information on it which is unsafe to push up
  45     // so disable this for now
  46     return NULL;
  47   }
  48 
  49   int wins = 0;
  50   assert(!n->is_CFG(), "");
  51   assert(region->is_Region(), "");
  52 
  53   const Type* type = n->bottom_type();
  54   const TypeOopPtr *t_oop = _igvn.type(n)->isa_oopptr();
  55   Node *phi;
  56   if (t_oop != NULL && t_oop->is_known_instance_field()) {
  57     int iid    = t_oop->instance_id();
  58     int index  = C->get_alias_index(t_oop);
  59     int offset = t_oop->offset();
  60     phi = new PhiNode(region, type, NULL, iid, index, offset);
  61   } else {
  62     phi = PhiNode::make_blank(region, n);
  63   }
  64   uint old_unique = C->unique();
  65   for (uint i = 1; i < region->req(); i++) {
  66     Node *x;
  67     Node* the_clone = NULL;
  68     if (region->in(i) == C->top()) {
  69       x = C->top();             // Dead path?  Use a dead data op
  70     } else {
  71       x = n->clone();           // Else clone up the data op
  72       the_clone = x;            // Remember for possible deletion.
  73       // Alter data node to use pre-phi inputs
  74       if (n->in(0) == region)
  75         x->set_req( 0, region->in(i) );
  76       for (uint j = 1; j < n->req(); j++) {
  77         Node *in = n->in(j);
  78         if (in->is_Phi() && in->in(0) == region)
  79           x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
  80       }
  81     }
  82     // Check for a 'win' on some paths
  83     const Type *t = x->Value(&_igvn);
  84 
  85     bool singleton = t->singleton();
  86 
  87     // A TOP singleton indicates that there are no possible values incoming
  88     // along a particular edge. In most cases, this is OK, and the Phi will
  89     // be eliminated later in an Ideal call. However, we can't allow this to
  90     // happen if the singleton occurs on loop entry, as the elimination of
  91     // the PhiNode may cause the resulting node to migrate back to a previous
  92     // loop iteration.
  93     if (singleton && t == Type::TOP) {
  94       // Is_Loop() == false does not confirm the absence of a loop (e.g., an
  95       // irreducible loop may not be indicated by an affirmative is_Loop());
  96       // therefore, the only top we can split thru a phi is on a backedge of
  97       // a loop.
  98       singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
  99     }
 100 
 101     if (singleton) {
 102       wins++;
 103       x = ((PhaseGVN&)_igvn).makecon(t);
 104     } else {
 105       // We now call Identity to try to simplify the cloned node.
 106       // Note that some Identity methods call phase->type(this).
 107       // Make sure that the type array is big enough for
 108       // our new node, even though we may throw the node away.
 109       // (Note: This tweaking with igvn only works because x is a new node.)
 110       _igvn.set_type(x, t);
 111       // If x is a TypeNode, capture any more-precise type permanently into Node
 112       // otherwise it will be not updated during igvn->transform since
 113       // igvn->type(x) is set to x->Value() already.
 114       x->raise_bottom_type(t);
 115       if (x->Opcode() != Op_ShenandoahWriteBarrier) {
 116         Node *y = x->Identity(&_igvn);
 117         if (y != x) {
 118           wins++;
 119           x = y;
 120         } else {
 121           y = _igvn.hash_find(x);
 122           if (y) {
 123             wins++;
 124             x = y;
 125           } else {
 126             // Else x is a new node we are keeping
 127             // We do not need register_new_node_with_optimizer
 128             // because set_type has already been called.
 129             _igvn._worklist.push(x);
 130           }
 131         }
 132       } else {
 133         _igvn._worklist.push(x);
 134       }
 135     }
 136     if (x != the_clone && the_clone != NULL)
 137       _igvn.remove_dead_node(the_clone);
 138     phi->set_req( i, x );
 139   }
 140   // Too few wins?
 141   if (wins <= policy) {
 142     _igvn.remove_dead_node(phi);
 143     return NULL;
 144   }
 145 
 146   // Record Phi
 147   register_new_node( phi, region );
 148 
 149   for (uint i2 = 1; i2 < phi->req(); i2++) {
 150     Node *x = phi->in(i2);
 151     // If we commoned up the cloned 'x' with another existing Node,
 152     // the existing Node picks up a new use.  We need to make the
 153     // existing Node occur higher up so it dominates its uses.
 154     Node *old_ctrl;
 155     IdealLoopTree *old_loop;
 156 
 157     if (x->is_Con()) {
 158       // Constant's control is always root.
 159       set_ctrl(x, C->root());
 160       continue;
 161     }
 162     // The occasional new node
 163     if (x->_idx >= old_unique) {     // Found a new, unplaced node?
 164       old_ctrl = NULL;
 165       old_loop = NULL;               // Not in any prior loop
 166     } else {
 167       old_ctrl = get_ctrl(x);
 168       old_loop = get_loop(old_ctrl); // Get prior loop
 169     }
 170     // New late point must dominate new use
 171     Node *new_ctrl = dom_lca(old_ctrl, region->in(i2));
 172     if (new_ctrl == old_ctrl) // Nothing is changed
 173       continue;
 174 
 175     IdealLoopTree *new_loop = get_loop(new_ctrl);
 176 
 177     // Don't move x into a loop if its uses are
 178     // outside of loop. Otherwise x will be cloned
 179     // for each use outside of this loop.
 180     IdealLoopTree *use_loop = get_loop(region);
 181     if (!new_loop->is_member(use_loop) &&
 182         (old_loop == NULL || !new_loop->is_member(old_loop))) {
 183       // Take early control, later control will be recalculated
 184       // during next iteration of loop optimizations.
 185       new_ctrl = get_early_ctrl(x);
 186       new_loop = get_loop(new_ctrl);
 187     }
 188     // Set new location
 189     set_ctrl(x, new_ctrl);
 190     // If changing loop bodies, see if we need to collect into new body
 191     if (old_loop != new_loop) {
 192       if (old_loop && !old_loop->_child)
 193         old_loop->_body.yank(x);
 194       if (!new_loop->_child)
 195         new_loop->_body.push(x);  // Collect body info
 196     }
 197   }
 198 
 199   return phi;
 200 }
 201 
 202 /**
 203  * When splitting a Shenandoah write barrier through a phi, we
 204  * can not replace the write-barrier input of the ShenandoahWBMemProj
 205  * with the phi. We must also split the ShenandoahWBMemProj through the
 206  * phi and generate a new memory phi for it.
 207  */
 208 void PhaseIdealLoop::split_mem_thru_phi(Node* n, Node* r, Node* phi) {
 209   if (n->Opcode() == Op_ShenandoahWriteBarrier) {
 210     if (n->has_out_with(Op_ShenandoahWBMemProj)) {
 211       Node* old_mem_phi = n->in(ShenandoahBarrierNode::Memory);
 212       assert(r->is_Region(), "need region to control phi");
 213       assert(phi->is_Phi(), "expect phi");
 214       Node* memphi = PhiNode::make(r, old_mem_phi, Type::MEMORY, C->alias_type(n->adr_type())->adr_type());
 215       for (uint i = 1; i < r->req(); i++) {
 216         Node* wb = phi->in(i);
 217         if (wb->Opcode() == Op_ShenandoahWriteBarrier) {
 218           // assert(! wb->has_out_with(Op_ShenandoahWBMemProj), "new clone does not have mem proj");
 219           Node* new_proj = new ShenandoahWBMemProjNode(wb);
 220           register_new_node(new_proj, r->in(i));
 221           memphi->set_req(i, new_proj);
 222         } else {
 223           if (old_mem_phi->is_Phi() && old_mem_phi->in(0) == r) {
 224             memphi->set_req(i, old_mem_phi->in(i));
 225           }
 226         }
 227       }
 228       register_new_node(memphi, r);
 229       Node* old_mem_out = n->find_out_with(Op_ShenandoahWBMemProj);
 230       while (old_mem_out != NULL) {
 231         assert(old_mem_out != NULL, "expect memory projection");
 232         _igvn.replace_node(old_mem_out, memphi);
 233         old_mem_out = n->find_out_with(Op_ShenandoahWBMemProj);
 234       }
 235     }
 236     assert(! n->has_out_with(Op_ShenandoahWBMemProj), "no more memory outs");
 237   }
 238 }
 239 
 240 //------------------------------dominated_by------------------------------------
 241 // Replace the dominated test with an obvious true or false.  Place it on the
 242 // IGVN worklist for later cleanup.  Move control-dependent data Nodes on the
 243 // live path up to the dominating control.
 244 void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff, bool flip, bool exclude_loop_predicate ) {
 245 #ifndef PRODUCT
 246   if (VerifyLoopOptimizations && PrintOpto) tty->print_cr("dominating test");
 247 #endif
 248 
 249 
 250   // prevdom is the dominating projection of the dominating test.
 251   assert( iff->is_If(), "" );
 252   assert( iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added");
 253   int pop = prevdom->Opcode();
 254   assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
 255   if (flip) {
 256     if (pop == Op_IfTrue)
 257       pop = Op_IfFalse;
 258     else
 259       pop = Op_IfTrue;
 260   }
 261   // 'con' is set to true or false to kill the dominated test.
 262   Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO);
 263   set_ctrl(con, C->root()); // Constant gets a new use
 264   // Hack the dominated test
 265   _igvn.replace_input_of(iff, 1, con);
 266 
 267   // If I dont have a reachable TRUE and FALSE path following the IfNode then
 268   // I can assume this path reaches an infinite loop.  In this case it's not
 269   // important to optimize the data Nodes - either the whole compilation will
 270   // be tossed or this path (and all data Nodes) will go dead.
 271   if (iff->outcnt() != 2) return;
 272 
 273   // Make control-dependent data Nodes on the live path (path that will remain
 274   // once the dominated IF is removed) become control-dependent on the
 275   // dominating projection.
 276   Node* dp = iff->as_If()->proj_out(pop == Op_IfTrue);
 277 
 278   // Loop predicates may have depending checks which should not
 279   // be skipped. For example, range check predicate has two checks
 280   // for lower and upper bounds.
 281   if (dp == NULL)
 282     return;
 283 
 284   ProjNode* dp_proj  = dp->as_Proj();
 285   ProjNode* unc_proj = iff->as_If()->proj_out(1 - dp_proj->_con)->as_Proj();
 286   if (exclude_loop_predicate &&
 287       (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) != NULL ||
 288        unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_range_check) != NULL)) {
 289     // If this is a range check (IfNode::is_range_check), do not
 290     // reorder because Compile::allow_range_check_smearing might have
 291     // changed the check.
 292     return; // Let IGVN transformation change control dependence.
 293   }
 294 
 295   IdealLoopTree *old_loop = get_loop(dp);
 296 
 297   for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
 298     Node* cd = dp->fast_out(i); // Control-dependent node
 299     if (cd->depends_only_on_test()) {
 300       assert(cd->in(0) == dp, "");
 301       _igvn.replace_input_of(cd, 0, prevdom);
 302       set_early_ctrl(cd);
 303       IdealLoopTree *new_loop = get_loop(get_ctrl(cd));
 304       if (old_loop != new_loop) {
 305         if (!old_loop->_child) old_loop->_body.yank(cd);
 306         if (!new_loop->_child) new_loop->_body.push(cd);
 307       }
 308       --i;
 309       --imax;
 310     }
 311   }
 312 }
 313 
 314 //------------------------------has_local_phi_input----------------------------
 315 // Return TRUE if 'n' has Phi inputs from its local block and no other
 316 // block-local inputs (all non-local-phi inputs come from earlier blocks)
 317 Node *PhaseIdealLoop::has_local_phi_input( Node *n ) {
 318   Node *n_ctrl = get_ctrl(n);
 319   // See if some inputs come from a Phi in this block, or from before
 320   // this block.
 321   uint i;
 322   for( i = 1; i < n->req(); i++ ) {
 323     Node *phi = n->in(i);
 324     if( phi->is_Phi() && phi->in(0) == n_ctrl )
 325       break;
 326   }
 327   if( i >= n->req() )
 328     return NULL;                // No Phi inputs; nowhere to clone thru
 329 
 330   // Check for inputs created between 'n' and the Phi input.  These
 331   // must split as well; they have already been given the chance
 332   // (courtesy of a post-order visit) and since they did not we must
 333   // recover the 'cost' of splitting them by being very profitable
 334   // when splitting 'n'.  Since this is unlikely we simply give up.
 335   for( i = 1; i < n->req(); i++ ) {
 336     Node *m = n->in(i);
 337     if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) {
 338       // We allow the special case of AddP's with no local inputs.
 339       // This allows us to split-up address expressions.
 340       if (m->is_AddP() &&
 341           get_ctrl(m->in(2)) != n_ctrl &&
 342           get_ctrl(m->in(3)) != n_ctrl) {
 343         // Move the AddP up to dominating point
 344         set_ctrl_and_loop(m, find_non_split_ctrl(idom(n_ctrl)));
 345         continue;
 346       }
 347       return NULL;
 348     }
 349   }
 350 
 351   return n_ctrl;
 352 }
 353 
 354 //------------------------------remix_address_expressions----------------------
 355 // Rework addressing expressions to get the most loop-invariant stuff
 356 // moved out.  We'd like to do all associative operators, but it's especially
 357 // important (common) to do address expressions.
 358 Node *PhaseIdealLoop::remix_address_expressions( Node *n ) {
 359   if (!has_ctrl(n))  return NULL;
 360   Node *n_ctrl = get_ctrl(n);
 361   IdealLoopTree *n_loop = get_loop(n_ctrl);
 362 
 363   // See if 'n' mixes loop-varying and loop-invariant inputs and
 364   // itself is loop-varying.
 365 
 366   // Only interested in binary ops (and AddP)
 367   if( n->req() < 3 || n->req() > 4 ) return NULL;
 368 
 369   Node *n1_ctrl = get_ctrl(n->in(                    1));
 370   Node *n2_ctrl = get_ctrl(n->in(                    2));
 371   Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3));
 372   IdealLoopTree *n1_loop = get_loop( n1_ctrl );
 373   IdealLoopTree *n2_loop = get_loop( n2_ctrl );
 374   IdealLoopTree *n3_loop = get_loop( n3_ctrl );
 375 
 376   // Does one of my inputs spin in a tighter loop than self?
 377   if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) ||
 378       (n_loop->is_member( n2_loop ) && n_loop != n2_loop) ||
 379       (n_loop->is_member( n3_loop ) && n_loop != n3_loop) )
 380     return NULL;                // Leave well enough alone
 381 
 382   // Is at least one of my inputs loop-invariant?
 383   if( n1_loop == n_loop &&
 384       n2_loop == n_loop &&
 385       n3_loop == n_loop )
 386     return NULL;                // No loop-invariant inputs
 387 
 388 
 389   int n_op = n->Opcode();
 390 
 391   // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2).
 392   if( n_op == Op_LShiftI ) {
 393     // Scale is loop invariant
 394     Node *scale = n->in(2);
 395     Node *scale_ctrl = get_ctrl(scale);
 396     IdealLoopTree *scale_loop = get_loop(scale_ctrl );
 397     if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) )
 398       return NULL;
 399     const TypeInt *scale_t = scale->bottom_type()->isa_int();
 400     if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 )
 401       return NULL;              // Dont bother with byte/short masking
 402     // Add must vary with loop (else shift would be loop-invariant)
 403     Node *add = n->in(1);
 404     Node *add_ctrl = get_ctrl(add);
 405     IdealLoopTree *add_loop = get_loop(add_ctrl);
 406     //assert( n_loop == add_loop, "" );
 407     if( n_loop != add_loop ) return NULL;  // happens w/ evil ZKM loops
 408 
 409     // Convert I-V into I+ (0-V); same for V-I
 410     if( add->Opcode() == Op_SubI &&
 411         _igvn.type( add->in(1) ) != TypeInt::ZERO ) {
 412       Node *zero = _igvn.intcon(0);
 413       set_ctrl(zero, C->root());
 414       Node *neg = new SubINode( _igvn.intcon(0), add->in(2) );
 415       register_new_node( neg, get_ctrl(add->in(2) ) );
 416       add = new AddINode( add->in(1), neg );
 417       register_new_node( add, add_ctrl );
 418     }
 419     if( add->Opcode() != Op_AddI ) return NULL;
 420     // See if one add input is loop invariant
 421     Node *add_var = add->in(1);
 422     Node *add_var_ctrl = get_ctrl(add_var);
 423     IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
 424     Node *add_invar = add->in(2);
 425     Node *add_invar_ctrl = get_ctrl(add_invar);
 426     IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl );
 427     if( add_var_loop == n_loop ) {
 428     } else if( add_invar_loop == n_loop ) {
 429       // Swap to find the invariant part
 430       add_invar = add_var;
 431       add_invar_ctrl = add_var_ctrl;
 432       add_invar_loop = add_var_loop;
 433       add_var = add->in(2);
 434       Node *add_var_ctrl = get_ctrl(add_var);
 435       IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
 436     } else                      // Else neither input is loop invariant
 437       return NULL;
 438     if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) )
 439       return NULL;              // No invariant part of the add?
 440 
 441     // Yes!  Reshape address expression!
 442     Node *inv_scale = new LShiftINode( add_invar, scale );
 443     Node *inv_scale_ctrl =
 444       dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ?
 445       add_invar_ctrl : scale_ctrl;
 446     register_new_node( inv_scale, inv_scale_ctrl );
 447     Node *var_scale = new LShiftINode( add_var, scale );
 448     register_new_node( var_scale, n_ctrl );
 449     Node *var_add = new AddINode( var_scale, inv_scale );
 450     register_new_node( var_add, n_ctrl );
 451     _igvn.replace_node( n, var_add );
 452     return var_add;
 453   }
 454 
 455   // Replace (I+V) with (V+I)
 456   if( n_op == Op_AddI ||
 457       n_op == Op_AddL ||
 458       n_op == Op_AddF ||
 459       n_op == Op_AddD ||
 460       n_op == Op_MulI ||
 461       n_op == Op_MulL ||
 462       n_op == Op_MulF ||
 463       n_op == Op_MulD ) {
 464     if( n2_loop == n_loop ) {
 465       assert( n1_loop != n_loop, "" );
 466       n->swap_edges(1, 2);
 467     }
 468   }
 469 
 470   // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V),
 471   // but not if I2 is a constant.
 472   if( n_op == Op_AddP ) {
 473     if( n2_loop == n_loop && n3_loop != n_loop ) {
 474       if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) {
 475         Node *n22_ctrl = get_ctrl(n->in(2)->in(2));
 476         Node *n23_ctrl = get_ctrl(n->in(2)->in(3));
 477         IdealLoopTree *n22loop = get_loop( n22_ctrl );
 478         IdealLoopTree *n23_loop = get_loop( n23_ctrl );
 479         if( n22loop != n_loop && n22loop->is_member(n_loop) &&
 480             n23_loop == n_loop ) {
 481           Node *add1 = new AddPNode( n->in(1), n->in(2)->in(2), n->in(3) );
 482           // Stuff new AddP in the loop preheader
 483           register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
 484           Node *add2 = new AddPNode( n->in(1), add1, n->in(2)->in(3) );
 485           register_new_node( add2, n_ctrl );
 486           _igvn.replace_node( n, add2 );
 487           return add2;
 488         }
 489       }
 490     }
 491 
 492     // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V)
 493     if( n2_loop != n_loop && n3_loop == n_loop ) {
 494       if( n->in(3)->Opcode() == Op_AddI ) {
 495         Node *V = n->in(3)->in(1);
 496         Node *I = n->in(3)->in(2);
 497         if( is_member(n_loop,get_ctrl(V)) ) {
 498         } else {
 499           Node *tmp = V; V = I; I = tmp;
 500         }
 501         if( !is_member(n_loop,get_ctrl(I)) ) {
 502           Node *add1 = new AddPNode( n->in(1), n->in(2), I );
 503           // Stuff new AddP in the loop preheader
 504           register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
 505           Node *add2 = new AddPNode( n->in(1), add1, V );
 506           register_new_node( add2, n_ctrl );
 507           _igvn.replace_node( n, add2 );
 508           return add2;
 509         }
 510       }
 511     }
 512   }
 513 
 514   return NULL;
 515 }
 516 
 517 //------------------------------conditional_move-------------------------------
 518 // Attempt to replace a Phi with a conditional move.  We have some pretty
 519 // strict profitability requirements.  All Phis at the merge point must
 520 // be converted, so we can remove the control flow.  We need to limit the
 521 // number of c-moves to a small handful.  All code that was in the side-arms
 522 // of the CFG diamond is now speculatively executed.  This code has to be
 523 // "cheap enough".  We are pretty much limited to CFG diamonds that merge
 524 // 1 or 2 items with a total of 1 or 2 ops executed speculatively.
 525 Node *PhaseIdealLoop::conditional_move( Node *region ) {
 526 
 527   assert(region->is_Region(), "sanity check");
 528   if (region->req() != 3) return NULL;
 529 
 530   // Check for CFG diamond
 531   Node *lp = region->in(1);
 532   Node *rp = region->in(2);
 533   if (!lp || !rp) return NULL;
 534   Node *lp_c = lp->in(0);
 535   if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL;
 536   IfNode *iff = lp_c->as_If();
 537 
 538   // Check for ops pinned in an arm of the diamond.
 539   // Can't remove the control flow in this case
 540   if (lp->outcnt() > 1) return NULL;
 541   if (rp->outcnt() > 1) return NULL;
 542 
 543   IdealLoopTree* r_loop = get_loop(region);
 544   assert(r_loop == get_loop(iff), "sanity");
 545   // Always convert to CMOVE if all results are used only outside this loop.
 546   bool used_inside_loop = (r_loop == _ltree_root);
 547 
 548   // Check profitability
 549   int cost = 0;
 550   int phis = 0;
 551   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
 552     Node *out = region->fast_out(i);
 553     if (!out->is_Phi()) continue; // Ignore other control edges, etc
 554     phis++;
 555     PhiNode* phi = out->as_Phi();
 556     BasicType bt = phi->type()->basic_type();
 557     switch (bt) {
 558     case T_FLOAT:
 559     case T_DOUBLE: {
 560       cost += Matcher::float_cmove_cost(); // Could be very expensive
 561       break;
 562     }
 563     case T_LONG: {
 564       cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's
 565     }
 566     case T_INT:                 // These all CMOV fine
 567     case T_ADDRESS: {           // (RawPtr)
 568       cost++;
 569       break;
 570     }
 571     case T_NARROWOOP: // Fall through
 572     case T_OBJECT: {            // Base oops are OK, but not derived oops
 573       const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr();
 574       // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
 575       // CMOVE'd derived pointer?  It's a CMOVE'd derived base.  Thus
 576       // CMOVE'ing a derived pointer requires we also CMOVE the base.  If we
 577       // have a Phi for the base here that we convert to a CMOVE all is well
 578       // and good.  But if the base is dead, we'll not make a CMOVE.  Later
 579       // the allocator will have to produce a base by creating a CMOVE of the
 580       // relevant bases.  This puts the allocator in the business of
 581       // manufacturing expensive instructions, generally a bad plan.
 582       // Just Say No to Conditionally-Moved Derived Pointers.
 583       if (tp && tp->offset() != 0)
 584         return NULL;
 585       cost++;
 586       break;
 587     }
 588     default:
 589       return NULL;              // In particular, can't do memory or I/O
 590     }
 591     // Add in cost any speculative ops
 592     for (uint j = 1; j < region->req(); j++) {
 593       Node *proj = region->in(j);
 594       Node *inp = phi->in(j);
 595       if (get_ctrl(inp) == proj) { // Found local op
 596         cost++;
 597         // Check for a chain of dependent ops; these will all become
 598         // speculative in a CMOV.
 599         for (uint k = 1; k < inp->req(); k++)
 600           if (get_ctrl(inp->in(k)) == proj)
 601             cost += ConditionalMoveLimit; // Too much speculative goo
 602       }
 603     }
 604     // See if the Phi is used by a Cmp or Narrow oop Decode/Encode.
 605     // This will likely Split-If, a higher-payoff operation.
 606     for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
 607       Node* use = phi->fast_out(k);
 608       if (use->is_Cmp() || use->is_DecodeNarrowPtr() || use->is_EncodeNarrowPtr())
 609         cost += ConditionalMoveLimit;
 610       // Is there a use inside the loop?
 611       // Note: check only basic types since CMoveP is pinned.
 612       if (!used_inside_loop && is_java_primitive(bt)) {
 613         IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use);
 614         if (r_loop == u_loop || r_loop->is_member(u_loop)) {
 615           used_inside_loop = true;
 616         }
 617       }
 618     }
 619   }
 620   Node* bol = iff->in(1);
 621   assert(bol->Opcode() == Op_Bool, "");
 622   int cmp_op = bol->in(1)->Opcode();
 623   // It is expensive to generate flags from a float compare.
 624   // Avoid duplicated float compare.
 625   if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
 626 
 627   float infrequent_prob = PROB_UNLIKELY_MAG(3);
 628   // Ignore cost and blocks frequency if CMOVE can be moved outside the loop.
 629   if (used_inside_loop) {
 630     if (cost >= ConditionalMoveLimit) return NULL; // Too much goo
 631 
 632     // BlockLayoutByFrequency optimization moves infrequent branch
 633     // from hot path. No point in CMOV'ing in such case (110 is used
 634     // instead of 100 to take into account not exactness of float value).
 635     if (BlockLayoutByFrequency) {
 636       infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f);
 637     }
 638   }
 639   // Check for highly predictable branch.  No point in CMOV'ing if
 640   // we are going to predict accurately all the time.
 641   if (iff->_prob < infrequent_prob ||
 642       iff->_prob > (1.0f - infrequent_prob))
 643     return NULL;
 644 
 645   // --------------
 646   // Now replace all Phis with CMOV's
 647   Node *cmov_ctrl = iff->in(0);
 648   uint flip = (lp->Opcode() == Op_IfTrue);
 649   while (1) {
 650     PhiNode* phi = NULL;
 651     for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
 652       Node *out = region->fast_out(i);
 653       if (out->is_Phi()) {
 654         phi = out->as_Phi();
 655         break;
 656       }
 657     }
 658     if (phi == NULL)  break;
 659 #ifndef PRODUCT
 660     if (PrintOpto && VerifyLoopOptimizations) tty->print_cr("CMOV");
 661 #endif
 662     // Move speculative ops
 663     for (uint j = 1; j < region->req(); j++) {
 664       Node *proj = region->in(j);
 665       Node *inp = phi->in(j);
 666       if (get_ctrl(inp) == proj) { // Found local op
 667 #ifndef PRODUCT
 668         if (PrintOpto && VerifyLoopOptimizations) {
 669           tty->print("  speculate: ");
 670           inp->dump();
 671         }
 672 #endif
 673         set_ctrl(inp, cmov_ctrl);
 674       }
 675     }
 676     Node *cmov = CMoveNode::make(cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi));
 677     register_new_node( cmov, cmov_ctrl );
 678     _igvn.replace_node( phi, cmov );
 679 #ifndef PRODUCT
 680     if (TraceLoopOpts) {
 681       tty->print("CMOV  ");
 682       r_loop->dump_head();
 683       if (Verbose) {
 684         bol->in(1)->dump(1);
 685         cmov->dump(1);
 686       }
 687     }
 688     if (VerifyLoopOptimizations) verify();
 689 #endif
 690   }
 691 
 692   // The useless CFG diamond will fold up later; see the optimization in
 693   // RegionNode::Ideal.
 694   _igvn._worklist.push(region);
 695 
 696   return iff->in(1);
 697 }
 698 
 699 #ifdef ASSERT
 700 static void enqueue_cfg_uses(Node* m, Unique_Node_List& wq) {
 701   for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
 702     Node* u = m->fast_out(i);
 703     if (u->is_CFG()) {
 704       if (u->Opcode() == Op_NeverBranch) {
 705         u = ((NeverBranchNode*)u)->proj_out(0);
 706         enqueue_cfg_uses(u, wq);
 707       } else {
 708         wq.push(u);
 709       }
 710     }
 711   }
 712 }
 713 #endif
 714 
 715 // Try moving a store out of a loop, right before the loop
 716 Node* PhaseIdealLoop::try_move_store_before_loop(Node* n, Node *n_ctrl) {
 717   // Store has to be first in the loop body
 718   IdealLoopTree *n_loop = get_loop(n_ctrl);
 719   if (n->is_Store() && n_loop != _ltree_root && n_loop->is_loop() && n->in(0) != NULL) {
 720     Node* address = n->in(MemNode::Address);
 721     Node* value = n->in(MemNode::ValueIn);
 722     Node* mem = n->in(MemNode::Memory);
 723     IdealLoopTree* address_loop = get_loop(get_ctrl(address));
 724     IdealLoopTree* value_loop = get_loop(get_ctrl(value));
 725 
 726     // - address and value must be loop invariant
 727     // - memory must be a memory Phi for the loop
 728     // - Store must be the only store on this memory slice in the
 729     // loop: if there's another store following this one then value
 730     // written at iteration i by the second store could be overwritten
 731     // at iteration i+n by the first store: it's not safe to move the
 732     // first store out of the loop
 733     // - nothing must observe the Phi memory: it guarantees no read
 734     // before the store and no early exit out of the loop
 735     // With those conditions, we are also guaranteed the store post
 736     // dominates the loop head. Otherwise there would be extra Phi
 737     // involved between the loop's Phi and the store.
 738 
 739     if (!n_loop->is_member(address_loop) &&
 740         !n_loop->is_member(value_loop) &&
 741         mem->is_Phi() && mem->in(0) == n_loop->_head &&
 742         mem->outcnt() == 1 &&
 743         mem->in(LoopNode::LoopBackControl) == n) {
 744 
 745 #ifdef ASSERT
 746       // Verify that store's control does post dominate loop entry and
 747       // that there's no early exit of the loop before the store.
 748       bool ctrl_ok = false;
 749       {
 750         // Follow control from loop head until n, we exit the loop or
 751         // we reach the tail
 752         ResourceMark rm;
 753         Unique_Node_List wq;
 754         wq.push(n_loop->_head);
 755         assert(n_loop->_tail != NULL, "need a tail");
 756         for (uint next = 0; next < wq.size(); ++next) {
 757           Node *m = wq.at(next);
 758           if (m == n->in(0)) {
 759             ctrl_ok = true;
 760             continue;
 761           }
 762           assert(!has_ctrl(m), "should be CFG");
 763           if (!n_loop->is_member(get_loop(m)) || m == n_loop->_tail) {
 764             ctrl_ok = false;
 765             break;
 766           }
 767           enqueue_cfg_uses(m, wq);
 768         }
 769       }
 770       assert(ctrl_ok, "bad control");
 771 #endif
 772 
 773       // move the Store
 774       _igvn.replace_input_of(mem, LoopNode::LoopBackControl, mem);
 775       _igvn.replace_input_of(n, 0, n_loop->_head->in(LoopNode::EntryControl));
 776       _igvn.replace_input_of(n, MemNode::Memory, mem->in(LoopNode::EntryControl));
 777       // Disconnect the phi now. An empty phi can confuse other
 778       // optimizations in this pass of loop opts.
 779       _igvn.replace_node(mem, mem->in(LoopNode::EntryControl));
 780       n_loop->_body.yank(mem);
 781 
 782       IdealLoopTree* new_loop = get_loop(n->in(0));
 783       set_ctrl_and_loop(n, n->in(0));
 784 
 785       return n;
 786     }
 787   }
 788   return NULL;
 789 }
 790 
 791 // Try moving a store out of a loop, right after the loop
 792 void PhaseIdealLoop::try_move_store_after_loop(Node* n) {
 793   if (n->is_Store() && n->in(0) != NULL) {
 794     Node *n_ctrl = get_ctrl(n);
 795     IdealLoopTree *n_loop = get_loop(n_ctrl);
 796     // Store must be in a loop
 797     if (n_loop != _ltree_root && !n_loop->_irreducible) {
 798       Node* address = n->in(MemNode::Address);
 799       Node* value = n->in(MemNode::ValueIn);
 800       IdealLoopTree* address_loop = get_loop(get_ctrl(address));
 801       // address must be loop invariant
 802       if (!n_loop->is_member(address_loop)) {
 803         // Store must be last on this memory slice in the loop and
 804         // nothing in the loop must observe it
 805         Node* phi = NULL;
 806         for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
 807           Node* u = n->fast_out(i);
 808           if (has_ctrl(u)) { // control use?
 809             IdealLoopTree *u_loop = get_loop(get_ctrl(u));
 810             if (!n_loop->is_member(u_loop)) {
 811               continue;
 812             }
 813             if (u->is_Phi() && u->in(0) == n_loop->_head) {
 814               assert(_igvn.type(u) == Type::MEMORY, "bad phi");
 815               assert(phi == NULL, "already found");
 816               phi = u;
 817               continue;
 818             }
 819           }
 820           phi = NULL;
 821           break;
 822         }
 823         if (phi != NULL) {
 824           // Nothing in the loop before the store (next iteration)
 825           // must observe the stored value
 826           bool mem_ok = true;
 827           {
 828             ResourceMark rm;
 829             Unique_Node_List wq;
 830             wq.push(phi);
 831             for (uint next = 0; next < wq.size() && mem_ok; ++next) {
 832               Node *m = wq.at(next);
 833               for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax && mem_ok; i++) {
 834                 Node* u = m->fast_out(i);
 835                 if (u->is_Store() || u->is_Phi()) {
 836                   if (u != n) {
 837                     wq.push(u);
 838                     mem_ok = (wq.size() <= 10);
 839                   }
 840                 } else {
 841                   mem_ok = false;
 842                   break;
 843                 }
 844               }
 845             }
 846           }
 847           if (mem_ok) {
 848             // Move the Store out of the loop creating clones along
 849             // all paths out of the loop that observe the stored value
 850             _igvn.rehash_node_delayed(phi);
 851             int count = phi->replace_edge(n, n->in(MemNode::Memory));
 852             assert(count > 0, "inconsistent phi");
 853             for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
 854               Node* u = n->fast_out(i);
 855               Node* c = get_ctrl(u);
 856 
 857               if (u->is_Phi()) {
 858                 c = u->in(0)->in(u->find_edge(n));
 859               }
 860               IdealLoopTree *u_loop = get_loop(c);
 861               assert (!n_loop->is_member(u_loop), "only the phi should have been a use in the loop");
 862               while(true) {
 863                 Node* next_c = find_non_split_ctrl(idom(c));
 864                 if (n_loop->is_member(get_loop(next_c))) {
 865                   break;
 866                 }
 867                 c = next_c;
 868               }
 869 
 870               Node* st = n->clone();
 871               st->set_req(0, c);
 872               _igvn.register_new_node_with_optimizer(st);
 873 
 874               set_ctrl(st, c);
 875               IdealLoopTree* new_loop = get_loop(c);
 876               assert(new_loop != n_loop, "should be moved out of loop");
 877               if (new_loop->_child == NULL) new_loop->_body.push(st);
 878 
 879               _igvn.replace_input_of(u, u->find_edge(n), st);
 880               --imax;
 881               --i;
 882             }
 883 
 884 
 885             assert(n->outcnt() == 0, "all uses should be gone");
 886             _igvn.replace_input_of(n, MemNode::Memory, C->top());
 887             // Disconnect the phi now. An empty phi can confuse other
 888             // optimizations in this pass of loop opts..
 889             if (phi->in(LoopNode::LoopBackControl) == phi) {
 890               _igvn.replace_node(phi, phi->in(LoopNode::EntryControl));
 891               n_loop->_body.yank(phi);
 892             }
 893           }
 894         }
 895       }
 896     }
 897   }
 898 }
 899 
 900 //------------------------------split_if_with_blocks_pre-----------------------
 901 // Do the real work in a non-recursive function.  Data nodes want to be
 902 // cloned in the pre-order so they can feed each other nicely.
 903 Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
 904   // Cloning these guys is unlikely to win
 905   int n_op = n->Opcode();
 906   if( n_op == Op_MergeMem ) return n;
 907   if( n->is_Proj() ) return n;
 908   // Do not clone-up CmpFXXX variations, as these are always
 909   // followed by a CmpI
 910   if( n->is_Cmp() ) return n;
 911   // Attempt to use a conditional move instead of a phi/branch
 912   if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
 913     Node *cmov = conditional_move( n );
 914     if( cmov ) return cmov;
 915   }
 916   if( n->is_CFG() || n->is_LoadStore() )
 917     return n;
 918   if( n_op == Op_Opaque1 ||     // Opaque nodes cannot be mod'd
 919       n_op == Op_Opaque2 ) {
 920     if( !C->major_progress() )   // If chance of no more loop opts...
 921       _igvn._worklist.push(n);  // maybe we'll remove them
 922     return n;
 923   }
 924 
 925   if( n->is_Con() ) return n;   // No cloning for Con nodes
 926 
 927   Node *n_ctrl = get_ctrl(n);
 928   if( !n_ctrl ) return n;       // Dead node
 929 
 930   Node* res = try_move_store_before_loop(n, n_ctrl);
 931   if (res != NULL) {
 932     return n;
 933   }
 934 
 935   // Attempt to remix address expressions for loop invariants
 936   Node *m = remix_address_expressions( n );
 937   if( m ) return m;
 938 
 939   // Determine if the Node has inputs from some local Phi.
 940   // Returns the block to clone thru.
 941   Node *n_blk = has_local_phi_input( n );
 942   if( !n_blk ) return n;
 943 
 944   // Do not clone the trip counter through on a CountedLoop
 945   // (messes up the canonical shape).
 946   if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
 947 
 948   // Check for having no control input; not pinned.  Allow
 949   // dominating control.
 950   if (n->in(0)) {
 951     Node *dom = idom(n_blk);
 952     if (dom_lca(n->in(0), dom) != n->in(0)) {
 953       return n;
 954     }
 955   }
 956   // Policy: when is it profitable.  You must get more wins than
 957   // policy before it is considered profitable.  Policy is usually 0,
 958   // so 1 win is considered profitable.  Big merges will require big
 959   // cloning, so get a larger policy.
 960   int policy = n_blk->req() >> 2;
 961 
 962   // If the loop is a candidate for range check elimination,
 963   // delay splitting through it's phi until a later loop optimization
 964   if (n_blk->is_CountedLoop()) {
 965     IdealLoopTree *lp = get_loop(n_blk);
 966     if (lp && lp->_rce_candidate) {
 967       return n;
 968     }
 969   }
 970 
 971   // Use same limit as split_if_with_blocks_post
 972   if( C->live_nodes() > 35000 ) return n; // Method too big
 973 
 974   // Split 'n' through the merge point if it is profitable
 975   Node *phi = split_thru_phi( n, n_blk, policy );
 976   if (!phi) return n;
 977 
 978   // Found a Phi to split thru!
 979   // Replace 'n' with the new phi
 980   split_mem_thru_phi(n, n_blk, phi);
 981   _igvn.replace_node( n, phi );
 982   // Moved a load around the loop, 'en-registering' something.
 983   if (n_blk->is_Loop() && n->is_Load() &&
 984       !phi->in(LoopNode::LoopBackControl)->is_Load())
 985     C->set_major_progress();
 986 
 987   // Moved a barrier around the loop, 'en-registering' something.
 988   if (n_blk->is_Loop() && n->is_ShenandoahBarrier() &&
 989       !phi->in(LoopNode::LoopBackControl)->is_ShenandoahBarrier())
 990     C->set_major_progress();
 991 
 992   return phi;
 993 }
 994 
 995 static bool merge_point_too_heavy(Compile* C, Node* region) {
 996   // Bail out if the region and its phis have too many users.
 997   int weight = 0;
 998   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
 999     weight += region->fast_out(i)->outcnt();
1000   }
1001   int nodes_left = C->max_node_limit() - C->live_nodes();
1002   if (weight * 8 > nodes_left) {
1003 #ifndef PRODUCT
1004     if (PrintOpto)
1005       tty->print_cr("*** Split-if bails out:  %d nodes, region weight %d", C->unique(), weight);
1006 #endif
1007     return true;
1008   } else {
1009     return false;
1010   }
1011 }
1012 
1013 static bool merge_point_safe(Node* region) {
1014   // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode
1015   // having a PhiNode input. This sidesteps the dangerous case where the split
1016   // ConvI2LNode may become TOP if the input Value() does not
1017   // overlap the ConvI2L range, leaving a node which may not dominate its
1018   // uses.
1019   // A better fix for this problem can be found in the BugTraq entry, but
1020   // expediency for Mantis demands this hack.
1021   // 6855164: If the merge point has a FastLockNode with a PhiNode input, we stop
1022   // split_if_with_blocks from splitting a block because we could not move around
1023   // the FastLockNode.
1024   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
1025     Node* n = region->fast_out(i);
1026     if (n->is_Phi()) {
1027       for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1028         Node* m = n->fast_out(j);
1029         if (m->is_FastLock())
1030           return false;
1031 #ifdef _LP64
1032         if (m->Opcode() == Op_ConvI2L)
1033           return false;
1034 #endif
1035       }
1036     }
1037   }
1038   return true;
1039 }
1040 
1041 
1042 //------------------------------place_near_use---------------------------------
1043 // Place some computation next to use but not inside inner loops.
1044 // For inner loop uses move it to the preheader area.
1045 Node *PhaseIdealLoop::place_near_use( Node *useblock ) const {
1046   IdealLoopTree *u_loop = get_loop( useblock );
1047   return (u_loop->_irreducible || u_loop->_child)
1048     ? useblock
1049     : u_loop->_head->in(LoopNode::EntryControl);
1050 }
1051 
1052 
1053 //------------------------------split_if_with_blocks_post----------------------
1054 // Do the real work in a non-recursive function.  CFG hackery wants to be
1055 // in the post-order, so it can dirty the I-DOM info and not use the dirtied
1056 // info.
1057 void PhaseIdealLoop::split_if_with_blocks_post( Node *n ) {
1058 
1059   // Cloning Cmp through Phi's involves the split-if transform.
1060   // FastLock is not used by an If
1061   if( n->is_Cmp() && !n->is_FastLock() ) {
1062     if( C->live_nodes() > 35000 ) return; // Method too big
1063 
1064     // Do not do 'split-if' if irreducible loops are present.
1065     if( _has_irreducible_loops )
1066       return;
1067 
1068     Node *n_ctrl = get_ctrl(n);
1069     // Determine if the Node has inputs from some local Phi.
1070     // Returns the block to clone thru.
1071     Node *n_blk = has_local_phi_input( n );
1072     if( n_blk != n_ctrl ) return;
1073 
1074     if( merge_point_too_heavy(C, n_ctrl) )
1075       return;
1076 
1077     if( n->outcnt() != 1 ) return; // Multiple bool's from 1 compare?
1078     Node *bol = n->unique_out();
1079     assert( bol->is_Bool(), "expect a bool here" );
1080     if( bol->outcnt() != 1 ) return;// Multiple branches from 1 compare?
1081     Node *iff = bol->unique_out();
1082 
1083     // Check some safety conditions
1084     if( iff->is_If() ) {        // Classic split-if?
1085       if( iff->in(0) != n_ctrl ) return; // Compare must be in same blk as if
1086     } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
1087       // Can't split CMove with different control edge.
1088       if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) return;
1089       if( get_ctrl(iff->in(2)) == n_ctrl ||
1090           get_ctrl(iff->in(3)) == n_ctrl )
1091         return;                 // Inputs not yet split-up
1092       if ( get_loop(n_ctrl) != get_loop(get_ctrl(iff)) ) {
1093         return;                 // Loop-invar test gates loop-varying CMOVE
1094       }
1095     } else {
1096       return;  // some other kind of node, such as an Allocate
1097     }
1098 
1099     // Do not do 'split-if' if some paths are dead.  First do dead code
1100     // elimination and then see if its still profitable.
1101     for( uint i = 1; i < n_ctrl->req(); i++ )
1102       if( n_ctrl->in(i) == C->top() )
1103         return;
1104 
1105     // When is split-if profitable?  Every 'win' on means some control flow
1106     // goes dead, so it's almost always a win.
1107     int policy = 0;
1108     // If trying to do a 'Split-If' at the loop head, it is only
1109     // profitable if the cmp folds up on BOTH paths.  Otherwise we
1110     // risk peeling a loop forever.
1111 
1112     // CNC - Disabled for now.  Requires careful handling of loop
1113     // body selection for the cloned code.  Also, make sure we check
1114     // for any input path not being in the same loop as n_ctrl.  For
1115     // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
1116     // because the alternative loop entry points won't be converted
1117     // into LoopNodes.
1118     IdealLoopTree *n_loop = get_loop(n_ctrl);
1119     for( uint j = 1; j < n_ctrl->req(); j++ )
1120       if( get_loop(n_ctrl->in(j)) != n_loop )
1121         return;
1122 
1123     // Check for safety of the merge point.
1124     if( !merge_point_safe(n_ctrl) ) {
1125       return;
1126     }
1127 
1128     // Split compare 'n' through the merge point if it is profitable
1129     Node *phi = split_thru_phi( n, n_ctrl, policy );
1130     if( !phi ) return;
1131 
1132     // Found a Phi to split thru!
1133     // Replace 'n' with the new phi
1134     _igvn.replace_node( n, phi );
1135 
1136     // Now split the bool up thru the phi
1137     Node *bolphi = split_thru_phi( bol, n_ctrl, -1 );
1138     guarantee(bolphi != NULL, "null boolean phi node");
1139 
1140     _igvn.replace_node( bol, bolphi );
1141     assert( iff->in(1) == bolphi, "" );
1142 
1143     if( bolphi->Value(&_igvn)->singleton() )
1144       return;
1145 
1146     // Conditional-move?  Must split up now
1147     if( !iff->is_If() ) {
1148       Node *cmovphi = split_thru_phi( iff, n_ctrl, -1 );
1149       _igvn.replace_node( iff, cmovphi );
1150       return;
1151     }
1152 
1153     // Now split the IF
1154     do_split_if( iff );
1155     return;
1156   }
1157 
1158   // Check for an IF ready to split; one that has its
1159   // condition codes input coming from a Phi at the block start.
1160   int n_op = n->Opcode();
1161 
1162   // Check for an IF being dominated by another IF same test
1163   if (n_op == Op_If) {
1164     Node *bol = n->in(1);
1165     uint max = bol->outcnt();
1166     // Check for same test used more than once?
1167     if (max > 1 && bol->is_Bool()) {
1168       // Search up IDOMs to see if this IF is dominated.
1169       Node *cutoff = get_ctrl(bol);
1170 
1171       // Now search up IDOMs till cutoff, looking for a dominating test
1172       Node *prevdom = n;
1173       Node *dom = idom(prevdom);
1174       while (dom != cutoff) {
1175         if (dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom) {
1176           // Replace the dominated test with an obvious true or false.
1177           // Place it on the IGVN worklist for later cleanup.
1178           C->set_major_progress();
1179           dominated_by(prevdom, n, false, true);
1180 #ifndef PRODUCT
1181           if( VerifyLoopOptimizations ) verify();
1182 #endif
1183           return;
1184         }
1185         prevdom = dom;
1186         dom = idom(prevdom);
1187       }
1188     }
1189   }
1190 
1191   // See if a shared loop-varying computation has no loop-varying uses.
1192   // Happens if something is only used for JVM state in uncommon trap exits,
1193   // like various versions of induction variable+offset.  Clone the
1194   // computation per usage to allow it to sink out of the loop.
1195   if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about)
1196     Node *n_ctrl = get_ctrl(n);
1197     IdealLoopTree *n_loop = get_loop(n_ctrl);
1198     if( n_loop != _ltree_root ) {
1199       DUIterator_Fast imax, i = n->fast_outs(imax);
1200       for (; i < imax; i++) {
1201         Node* u = n->fast_out(i);
1202         if( !has_ctrl(u) )     break; // Found control user
1203         IdealLoopTree *u_loop = get_loop(get_ctrl(u));
1204         if( u_loop == n_loop ) break; // Found loop-varying use
1205         if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop
1206         if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003
1207       }
1208       bool did_break = (i < imax);  // Did we break out of the previous loop?
1209       if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
1210         Node *late_load_ctrl = NULL;
1211         if (n->is_Load()) {
1212           // If n is a load, get and save the result from get_late_ctrl(),
1213           // to be later used in calculating the control for n's clones.
1214           clear_dom_lca_tags();
1215           late_load_ctrl = get_late_ctrl(n, n_ctrl);
1216         }
1217         // If n is a load, and the late control is the same as the current
1218         // control, then the cloning of n is a pointless exercise, because
1219         // GVN will ensure that we end up where we started.
1220         if (!n->is_Load() || late_load_ctrl != n_ctrl) {
1221           for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
1222             Node *u = n->last_out(j); // Clone private computation per use
1223             _igvn.rehash_node_delayed(u);
1224             Node *x = n->clone(); // Clone computation
1225             Node *x_ctrl = NULL;
1226             if( u->is_Phi() ) {
1227               // Replace all uses of normal nodes.  Replace Phi uses
1228               // individually, so the separate Nodes can sink down
1229               // different paths.
1230               uint k = 1;
1231               while( u->in(k) != n ) k++;
1232               u->set_req( k, x );
1233               // x goes next to Phi input path
1234               x_ctrl = u->in(0)->in(k);
1235               --j;
1236             } else {              // Normal use
1237               // Replace all uses
1238               for( uint k = 0; k < u->req(); k++ ) {
1239                 if( u->in(k) == n ) {
1240                   u->set_req( k, x );
1241                   --j;
1242                 }
1243               }
1244               x_ctrl = get_ctrl(u);
1245             }
1246 
1247             // Find control for 'x' next to use but not inside inner loops.
1248             // For inner loop uses get the preheader area.
1249             x_ctrl = place_near_use(x_ctrl);
1250 
1251             if (n->is_Load()) {
1252               // For loads, add a control edge to a CFG node outside of the loop
1253               // to force them to not combine and return back inside the loop
1254               // during GVN optimization (4641526).
1255               //
1256               // Because we are setting the actual control input, factor in
1257               // the result from get_late_ctrl() so we respect any
1258               // anti-dependences. (6233005).
1259               x_ctrl = dom_lca(late_load_ctrl, x_ctrl);
1260 
1261               // Don't allow the control input to be a CFG splitting node.
1262               // Such nodes should only have ProjNodes as outs, e.g. IfNode
1263               // should only have IfTrueNode and IfFalseNode (4985384).
1264               x_ctrl = find_non_split_ctrl(x_ctrl);
1265               assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
1266 
1267               x->set_req(0, x_ctrl);
1268             }
1269             register_new_node(x, x_ctrl);
1270 
1271             // Some institutional knowledge is needed here: 'x' is
1272             // yanked because if the optimizer runs GVN on it all the
1273             // cloned x's will common up and undo this optimization and
1274             // be forced back in the loop.  This is annoying because it
1275             // makes +VerifyOpto report false-positives on progress.  I
1276             // tried setting control edges on the x's to force them to
1277             // not combine, but the matching gets worried when it tries
1278             // to fold a StoreP and an AddP together (as part of an
1279             // address expression) and the AddP and StoreP have
1280             // different controls.
1281             if (!x->is_Load() && !x->is_DecodeNarrowPtr() && !x->is_ShenandoahBarrier()) _igvn._worklist.yank(x);
1282           }
1283           _igvn.remove_dead_node(n);
1284         }
1285       }
1286     }
1287   }
1288 
1289   try_move_store_after_loop(n);
1290 
1291   // Check for Opaque2's who's loop has disappeared - who's input is in the
1292   // same loop nest as their output.  Remove 'em, they are no longer useful.
1293   if( n_op == Op_Opaque2 &&
1294       n->in(1) != NULL &&
1295       get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
1296     _igvn.replace_node( n, n->in(1) );
1297   }
1298 }
1299 
1300 //------------------------------split_if_with_blocks---------------------------
1301 // Check for aggressive application of 'split-if' optimization,
1302 // using basic block level info.
1303 void PhaseIdealLoop::split_if_with_blocks( VectorSet &visited, Node_Stack &nstack ) {
1304   Node *n = C->root();
1305   visited.set(n->_idx); // first, mark node as visited
1306   // Do pre-visit work for root
1307   n = split_if_with_blocks_pre( n );
1308   uint cnt = n->outcnt();
1309   uint i   = 0;
1310   while (true) {
1311     // Visit all children
1312     if (i < cnt) {
1313       Node* use = n->raw_out(i);
1314       ++i;
1315       if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
1316         // Now do pre-visit work for this use
1317         use = split_if_with_blocks_pre( use );
1318         nstack.push(n, i); // Save parent and next use's index.
1319         n   = use;         // Process all children of current use.
1320         cnt = use->outcnt();
1321         i   = 0;
1322       }
1323     }
1324     else {
1325       // All of n's children have been processed, complete post-processing.
1326       if (cnt != 0 && !n->is_Con()) {
1327         assert(has_node(n), "no dead nodes");
1328         split_if_with_blocks_post( n );
1329       }
1330       if (nstack.is_empty()) {
1331         // Finished all nodes on stack.
1332         break;
1333       }
1334       // Get saved parent node and next use's index. Visit the rest of uses.
1335       n   = nstack.node();
1336       cnt = n->outcnt();
1337       i   = nstack.index();
1338       nstack.pop();
1339     }
1340   }
1341 }
1342 
1343 
1344 //=============================================================================
1345 //
1346 //                   C L O N E   A   L O O P   B O D Y
1347 //
1348 
1349 //------------------------------clone_iff--------------------------------------
1350 // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1351 // "Nearly" because all Nodes have been cloned from the original in the loop,
1352 // but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1353 // through the Phi recursively, and return a Bool.
1354 BoolNode *PhaseIdealLoop::clone_iff( PhiNode *phi, IdealLoopTree *loop ) {
1355 
1356   // Convert this Phi into a Phi merging Bools
1357   uint i;
1358   for( i = 1; i < phi->req(); i++ ) {
1359     Node *b = phi->in(i);
1360     if( b->is_Phi() ) {
1361       _igvn.replace_input_of(phi, i, clone_iff( b->as_Phi(), loop ));
1362     } else {
1363       assert( b->is_Bool(), "" );
1364     }
1365   }
1366 
1367   Node *sample_bool = phi->in(1);
1368   Node *sample_cmp  = sample_bool->in(1);
1369 
1370   // Make Phis to merge the Cmp's inputs.
1371   PhiNode *phi1 = new PhiNode( phi->in(0), Type::TOP );
1372   PhiNode *phi2 = new PhiNode( phi->in(0), Type::TOP );
1373   for( i = 1; i < phi->req(); i++ ) {
1374     Node *n1 = phi->in(i)->in(1)->in(1);
1375     Node *n2 = phi->in(i)->in(1)->in(2);
1376     phi1->set_req( i, n1 );
1377     phi2->set_req( i, n2 );
1378     phi1->set_type( phi1->type()->meet_speculative(n1->bottom_type()));
1379     phi2->set_type( phi2->type()->meet_speculative(n2->bottom_type()));
1380   }
1381   // See if these Phis have been made before.
1382   // Register with optimizer
1383   Node *hit1 = _igvn.hash_find_insert(phi1);
1384   if( hit1 ) {                  // Hit, toss just made Phi
1385     _igvn.remove_dead_node(phi1); // Remove new phi
1386     assert( hit1->is_Phi(), "" );
1387     phi1 = (PhiNode*)hit1;      // Use existing phi
1388   } else {                      // Miss
1389     _igvn.register_new_node_with_optimizer(phi1);
1390   }
1391   Node *hit2 = _igvn.hash_find_insert(phi2);
1392   if( hit2 ) {                  // Hit, toss just made Phi
1393     _igvn.remove_dead_node(phi2); // Remove new phi
1394     assert( hit2->is_Phi(), "" );
1395     phi2 = (PhiNode*)hit2;      // Use existing phi
1396   } else {                      // Miss
1397     _igvn.register_new_node_with_optimizer(phi2);
1398   }
1399   // Register Phis with loop/block info
1400   set_ctrl(phi1, phi->in(0));
1401   set_ctrl(phi2, phi->in(0));
1402   // Make a new Cmp
1403   Node *cmp = sample_cmp->clone();
1404   cmp->set_req( 1, phi1 );
1405   cmp->set_req( 2, phi2 );
1406   _igvn.register_new_node_with_optimizer(cmp);
1407   set_ctrl(cmp, phi->in(0));
1408 
1409   // Make a new Bool
1410   Node *b = sample_bool->clone();
1411   b->set_req(1,cmp);
1412   _igvn.register_new_node_with_optimizer(b);
1413   set_ctrl(b, phi->in(0));
1414 
1415   assert( b->is_Bool(), "" );
1416   return (BoolNode*)b;
1417 }
1418 
1419 //------------------------------clone_bool-------------------------------------
1420 // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1421 // "Nearly" because all Nodes have been cloned from the original in the loop,
1422 // but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1423 // through the Phi recursively, and return a Bool.
1424 CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) {
1425   uint i;
1426   // Convert this Phi into a Phi merging Bools
1427   for( i = 1; i < phi->req(); i++ ) {
1428     Node *b = phi->in(i);
1429     if( b->is_Phi() ) {
1430       _igvn.replace_input_of(phi, i, clone_bool( b->as_Phi(), loop ));
1431     } else {
1432       assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
1433     }
1434   }
1435 
1436   Node *sample_cmp = phi->in(1);
1437 
1438   // Make Phis to merge the Cmp's inputs.
1439   PhiNode *phi1 = new PhiNode( phi->in(0), Type::TOP );
1440   PhiNode *phi2 = new PhiNode( phi->in(0), Type::TOP );
1441   for( uint j = 1; j < phi->req(); j++ ) {
1442     Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP
1443     Node *n1, *n2;
1444     if( cmp_top->is_Cmp() ) {
1445       n1 = cmp_top->in(1);
1446       n2 = cmp_top->in(2);
1447     } else {
1448       n1 = n2 = cmp_top;
1449     }
1450     phi1->set_req( j, n1 );
1451     phi2->set_req( j, n2 );
1452     phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type()));
1453     phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type()));
1454   }
1455 
1456   // See if these Phis have been made before.
1457   // Register with optimizer
1458   Node *hit1 = _igvn.hash_find_insert(phi1);
1459   if( hit1 ) {                  // Hit, toss just made Phi
1460     _igvn.remove_dead_node(phi1); // Remove new phi
1461     assert( hit1->is_Phi(), "" );
1462     phi1 = (PhiNode*)hit1;      // Use existing phi
1463   } else {                      // Miss
1464     _igvn.register_new_node_with_optimizer(phi1);
1465   }
1466   Node *hit2 = _igvn.hash_find_insert(phi2);
1467   if( hit2 ) {                  // Hit, toss just made Phi
1468     _igvn.remove_dead_node(phi2); // Remove new phi
1469     assert( hit2->is_Phi(), "" );
1470     phi2 = (PhiNode*)hit2;      // Use existing phi
1471   } else {                      // Miss
1472     _igvn.register_new_node_with_optimizer(phi2);
1473   }
1474   // Register Phis with loop/block info
1475   set_ctrl(phi1, phi->in(0));
1476   set_ctrl(phi2, phi->in(0));
1477   // Make a new Cmp
1478   Node *cmp = sample_cmp->clone();
1479   cmp->set_req( 1, phi1 );
1480   cmp->set_req( 2, phi2 );
1481   _igvn.register_new_node_with_optimizer(cmp);
1482   set_ctrl(cmp, phi->in(0));
1483 
1484   assert( cmp->is_Cmp(), "" );
1485   return (CmpNode*)cmp;
1486 }
1487 
1488 //------------------------------sink_use---------------------------------------
1489 // If 'use' was in the loop-exit block, it now needs to be sunk
1490 // below the post-loop merge point.
1491 void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
1492   if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) {
1493     set_ctrl(use, post_loop);
1494     for (DUIterator j = use->outs(); use->has_out(j); j++)
1495       sink_use(use->out(j), post_loop);
1496   }
1497 }
1498 
1499 //------------------------------clone_loop-------------------------------------
1500 //
1501 //                   C L O N E   A   L O O P   B O D Y
1502 //
1503 // This is the basic building block of the loop optimizations.  It clones an
1504 // entire loop body.  It makes an old_new loop body mapping; with this mapping
1505 // you can find the new-loop equivalent to an old-loop node.  All new-loop
1506 // nodes are exactly equal to their old-loop counterparts, all edges are the
1507 // same.  All exits from the old-loop now have a RegionNode that merges the
1508 // equivalent new-loop path.  This is true even for the normal "loop-exit"
1509 // condition.  All uses of loop-invariant old-loop values now come from (one
1510 // or more) Phis that merge their new-loop equivalents.
1511 //
1512 // This operation leaves the graph in an illegal state: there are two valid
1513 // control edges coming from the loop pre-header to both loop bodies.  I'll
1514 // definitely have to hack the graph after running this transform.
1515 //
1516 // From this building block I will further edit edges to perform loop peeling
1517 // or loop unrolling or iteration splitting (Range-Check-Elimination), etc.
1518 //
1519 // Parameter side_by_size_idom:
1520 //   When side_by_size_idom is NULL, the dominator tree is constructed for
1521 //      the clone loop to dominate the original.  Used in construction of
1522 //      pre-main-post loop sequence.
1523 //   When nonnull, the clone and original are side-by-side, both are
1524 //      dominated by the side_by_side_idom node.  Used in construction of
1525 //      unswitched loops.
1526 void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
1527                                  Node* side_by_side_idom) {
1528 
1529 #ifndef PRODUCT
1530   if (C->do_vector_loop() && PrintOpto) {
1531     const char* mname = C->method()->name()->as_quoted_ascii();
1532     if (mname != NULL) {
1533       tty->print("PhaseIdealLoop::clone_loop: for vectorize method %s\n", mname);
1534     }
1535   }
1536 #endif
1537 
1538   CloneMap& cm = C->clone_map();
1539   Dict* dict = cm.dict();
1540   if (C->do_vector_loop()) {
1541     cm.set_clone_idx(cm.max_gen()+1);
1542 #ifndef PRODUCT
1543     if (PrintOpto) {
1544       tty->print_cr("PhaseIdealLoop::clone_loop: _clone_idx %d", cm.clone_idx());
1545       loop->dump_head();
1546     }
1547 #endif
1548   }
1549 
1550   // Step 1: Clone the loop body.  Make the old->new mapping.
1551   uint i;
1552   for( i = 0; i < loop->_body.size(); i++ ) {
1553     Node *old = loop->_body.at(i);
1554     Node *nnn = old->clone();
1555     old_new.map( old->_idx, nnn );
1556     if (C->do_vector_loop()) {
1557       cm.verify_insert_and_clone(old, nnn, cm.clone_idx());
1558     }
1559     _igvn.register_new_node_with_optimizer(nnn);
1560   }
1561 
1562 
1563   // Step 2: Fix the edges in the new body.  If the old input is outside the
1564   // loop use it.  If the old input is INside the loop, use the corresponding
1565   // new node instead.
1566   for( i = 0; i < loop->_body.size(); i++ ) {
1567     Node *old = loop->_body.at(i);
1568     Node *nnn = old_new[old->_idx];
1569     // Fix CFG/Loop controlling the new node
1570     if (has_ctrl(old)) {
1571       set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
1572     } else {
1573       set_loop(nnn, loop->_parent);
1574       if (old->outcnt() > 0) {
1575         set_idom( nnn, old_new[idom(old)->_idx], dd );
1576       }
1577     }
1578     // Correct edges to the new node
1579     for( uint j = 0; j < nnn->req(); j++ ) {
1580         Node *n = nnn->in(j);
1581         if( n ) {
1582           IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n );
1583           if( loop->is_member( old_in_loop ) )
1584             nnn->set_req(j, old_new[n->_idx]);
1585         }
1586     }
1587     _igvn.hash_find_insert(nnn);
1588   }
1589   Node *newhead = old_new[loop->_head->_idx];
1590   set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
1591 
1592 
1593   // Step 3: Now fix control uses.  Loop varying control uses have already
1594   // been fixed up (as part of all input edges in Step 2).  Loop invariant
1595   // control uses must be either an IfFalse or an IfTrue.  Make a merge
1596   // point to merge the old and new IfFalse/IfTrue nodes; make the use
1597   // refer to this.
1598   ResourceArea *area = Thread::current()->resource_area();
1599   Node_List worklist(area);
1600   uint new_counter = C->unique();
1601   for( i = 0; i < loop->_body.size(); i++ ) {
1602     Node* old = loop->_body.at(i);
1603     if( !old->is_CFG() ) continue;
1604     Node* nnn = old_new[old->_idx];
1605 
1606     // Copy uses to a worklist, so I can munge the def-use info
1607     // with impunity.
1608     for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1609       worklist.push(old->fast_out(j));
1610 
1611     while( worklist.size() ) {  // Visit all uses
1612       Node *use = worklist.pop();
1613       if (!has_node(use))  continue; // Ignore dead nodes
1614       IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1615       if( !loop->is_member( use_loop ) && use->is_CFG() ) {
1616         // Both OLD and USE are CFG nodes here.
1617         assert( use->is_Proj(), "" );
1618 
1619         // Clone the loop exit control projection
1620         Node *newuse = use->clone();
1621         if (C->do_vector_loop()) {
1622           cm.verify_insert_and_clone(use, newuse, cm.clone_idx());
1623         }
1624         newuse->set_req(0,nnn);
1625         _igvn.register_new_node_with_optimizer(newuse);
1626         set_loop(newuse, use_loop);
1627         set_idom(newuse, nnn, dom_depth(nnn) + 1 );
1628 
1629         // We need a Region to merge the exit from the peeled body and the
1630         // exit from the old loop body.
1631         RegionNode *r = new RegionNode(3);
1632         // Map the old use to the new merge point
1633         old_new.map( use->_idx, r );
1634         uint dd_r = MIN2(dom_depth(newuse),dom_depth(use));
1635         assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" );
1636 
1637         // The original user of 'use' uses 'r' instead.
1638         for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
1639           Node* useuse = use->last_out(l);
1640           _igvn.rehash_node_delayed(useuse);
1641           uint uses_found = 0;
1642           if( useuse->in(0) == use ) {
1643             useuse->set_req(0, r);
1644             uses_found++;
1645             if( useuse->is_CFG() ) {
1646               assert( dom_depth(useuse) > dd_r, "" );
1647               set_idom(useuse, r, dom_depth(useuse));
1648             }
1649           }
1650           for( uint k = 1; k < useuse->req(); k++ ) {
1651             if( useuse->in(k) == use ) {
1652               useuse->set_req(k, r);
1653               uses_found++;
1654             }
1655           }
1656           l -= uses_found;    // we deleted 1 or more copies of this edge
1657         }
1658 
1659         // Now finish up 'r'
1660         r->set_req( 1, newuse );
1661         r->set_req( 2,    use );
1662         _igvn.register_new_node_with_optimizer(r);
1663         set_loop(r, use_loop);
1664         set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r);
1665       } // End of if a loop-exit test
1666     }
1667   }
1668 
1669   // Step 4: If loop-invariant use is not control, it must be dominated by a
1670   // loop exit IfFalse/IfTrue.  Find "proper" loop exit.  Make a Region
1671   // there if needed.  Make a Phi there merging old and new used values.
1672   Node_List *split_if_set = NULL;
1673   Node_List *split_bool_set = NULL;
1674   Node_List *split_cex_set = NULL;
1675   for( i = 0; i < loop->_body.size(); i++ ) {
1676     Node* old = loop->_body.at(i);
1677     Node* nnn = old_new[old->_idx];
1678     // Copy uses to a worklist, so I can munge the def-use info
1679     // with impunity.
1680     for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1681       worklist.push(old->fast_out(j));
1682 
1683     while( worklist.size() ) {
1684       Node *use = worklist.pop();
1685       if (!has_node(use))  continue; // Ignore dead nodes
1686       if (use->in(0) == C->top())  continue;
1687       IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1688       // Check for data-use outside of loop - at least one of OLD or USE
1689       // must not be a CFG node.
1690       if( !loop->is_member( use_loop ) && (!old->is_CFG() || !use->is_CFG())) {
1691 
1692         // If the Data use is an IF, that means we have an IF outside of the
1693         // loop that is switching on a condition that is set inside of the
1694         // loop.  Happens if people set a loop-exit flag; then test the flag
1695         // in the loop to break the loop, then test is again outside of the
1696         // loop to determine which way the loop exited.
1697         // Loop predicate If node connects to Bool node through Opaque1 node.
1698         if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use)) {
1699           // Since this code is highly unlikely, we lazily build the worklist
1700           // of such Nodes to go split.
1701           if( !split_if_set )
1702             split_if_set = new Node_List(area);
1703           split_if_set->push(use);
1704         }
1705         if( use->is_Bool() ) {
1706           if( !split_bool_set )
1707             split_bool_set = new Node_List(area);
1708           split_bool_set->push(use);
1709         }
1710         if( use->Opcode() == Op_CreateEx ) {
1711           if( !split_cex_set )
1712             split_cex_set = new Node_List(area);
1713           split_cex_set->push(use);
1714         }
1715 
1716 
1717         // Get "block" use is in
1718         uint idx = 0;
1719         while( use->in(idx) != old ) idx++;
1720         Node *prev = use->is_CFG() ? use : get_ctrl(use);
1721         assert( !loop->is_member( get_loop( prev ) ), "" );
1722         Node *cfg = prev->_idx >= new_counter
1723           ? prev->in(2)
1724           : idom(prev);
1725         if( use->is_Phi() )     // Phi use is in prior block
1726           cfg = prev->in(idx);  // NOT in block of Phi itself
1727         if (cfg->is_top()) {    // Use is dead?
1728           _igvn.replace_input_of(use, idx, C->top());
1729           continue;
1730         }
1731 
1732         while( !loop->is_member( get_loop( cfg ) ) ) {
1733           prev = cfg;
1734           cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
1735         }
1736         // If the use occurs after merging several exits from the loop, then
1737         // old value must have dominated all those exits.  Since the same old
1738         // value was used on all those exits we did not need a Phi at this
1739         // merge point.  NOW we do need a Phi here.  Each loop exit value
1740         // is now merged with the peeled body exit; each exit gets its own
1741         // private Phi and those Phis need to be merged here.
1742         Node *phi;
1743         if( prev->is_Region() ) {
1744           if( idx == 0  && use->Opcode() != Op_ShenandoahWBMemProj) {      // Updating control edge?
1745             phi = prev;         // Just use existing control
1746           } else {              // Else need a new Phi
1747             phi = PhiNode::make( prev, old );
1748             // Now recursively fix up the new uses of old!
1749             uint first = use->Opcode() != Op_ShenandoahWBMemProj ? 1 : 0;
1750             for( uint i = first; i < prev->req(); i++ ) {
1751               worklist.push(phi); // Onto worklist once for each 'old' input
1752             }
1753           }
1754         } else {
1755           // Get new RegionNode merging old and new loop exits
1756           prev = old_new[prev->_idx];
1757           assert( prev, "just made this in step 7" );
1758           if( idx == 0 && use->Opcode() != Op_ShenandoahWBMemProj) {      // Updating control edge?
1759             phi = prev;         // Just use existing control
1760           } else {              // Else need a new Phi
1761             // Make a new Phi merging data values properly
1762             phi = PhiNode::make( prev, old );
1763             phi->set_req( 1, nnn );
1764           }
1765         }
1766         // If inserting a new Phi, check for prior hits
1767         if( idx != 0 ) {
1768           Node *hit = _igvn.hash_find_insert(phi);
1769           if( hit == NULL ) {
1770            _igvn.register_new_node_with_optimizer(phi); // Register new phi
1771           } else {                                      // or
1772             // Remove the new phi from the graph and use the hit
1773             _igvn.remove_dead_node(phi);
1774             phi = hit;                                  // Use existing phi
1775           }
1776           set_ctrl(phi, prev);
1777         }
1778         // Make 'use' use the Phi instead of the old loop body exit value
1779         _igvn.replace_input_of(use, idx, phi);
1780         if( use->_idx >= new_counter ) { // If updating new phis
1781           // Not needed for correctness, but prevents a weak assert
1782           // in AddPNode from tripping (when we end up with different
1783           // base & derived Phis that will become the same after
1784           // IGVN does CSE).
1785           Node *hit = _igvn.hash_find_insert(use);
1786           if( hit )             // Go ahead and re-hash for hits.
1787             _igvn.replace_node( use, hit );
1788         }
1789 
1790         // If 'use' was in the loop-exit block, it now needs to be sunk
1791         // below the post-loop merge point.
1792         sink_use( use, prev );
1793       }
1794     }
1795   }
1796 
1797   // Check for IFs that need splitting/cloning.  Happens if an IF outside of
1798   // the loop uses a condition set in the loop.  The original IF probably
1799   // takes control from one or more OLD Regions (which in turn get from NEW
1800   // Regions).  In any case, there will be a set of Phis for each merge point
1801   // from the IF up to where the original BOOL def exists the loop.
1802   if( split_if_set ) {
1803     while( split_if_set->size() ) {
1804       Node *iff = split_if_set->pop();
1805       if( iff->in(1)->is_Phi() ) {
1806         BoolNode *b = clone_iff( iff->in(1)->as_Phi(), loop );
1807         _igvn.replace_input_of(iff, 1, b);
1808       }
1809     }
1810   }
1811   if( split_bool_set ) {
1812     while( split_bool_set->size() ) {
1813       Node *b = split_bool_set->pop();
1814       Node *phi = b->in(1);
1815       assert( phi->is_Phi(), "" );
1816       CmpNode *cmp = clone_bool( (PhiNode*)phi, loop );
1817       _igvn.replace_input_of(b, 1, cmp);
1818     }
1819   }
1820   if( split_cex_set ) {
1821     while( split_cex_set->size() ) {
1822       Node *b = split_cex_set->pop();
1823       assert( b->in(0)->is_Region(), "" );
1824       assert( b->in(1)->is_Phi(), "" );
1825       assert( b->in(0)->in(0) == b->in(1)->in(0), "" );
1826       split_up( b, b->in(0), NULL );
1827     }
1828   }
1829 
1830 }
1831 
1832 
1833 //---------------------- stride_of_possible_iv -------------------------------------
1834 // Looks for an iff/bool/comp with one operand of the compare
1835 // being a cycle involving an add and a phi,
1836 // with an optional truncation (left-shift followed by a right-shift)
1837 // of the add. Returns zero if not an iv.
1838 int PhaseIdealLoop::stride_of_possible_iv(Node* iff) {
1839   Node* trunc1 = NULL;
1840   Node* trunc2 = NULL;
1841   const TypeInt* ttype = NULL;
1842   if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) {
1843     return 0;
1844   }
1845   BoolNode* bl = iff->in(1)->as_Bool();
1846   Node* cmp = bl->in(1);
1847   if (!cmp || cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU) {
1848     return 0;
1849   }
1850   // Must have an invariant operand
1851   if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) {
1852     return 0;
1853   }
1854   Node* add2 = NULL;
1855   Node* cmp1 = cmp->in(1);
1856   if (cmp1->is_Phi()) {
1857     // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) )))
1858     Node* phi = cmp1;
1859     for (uint i = 1; i < phi->req(); i++) {
1860       Node* in = phi->in(i);
1861       Node* add = CountedLoopNode::match_incr_with_optional_truncation(in,
1862                                 &trunc1, &trunc2, &ttype);
1863       if (add && add->in(1) == phi) {
1864         add2 = add->in(2);
1865         break;
1866       }
1867     }
1868   } else {
1869     // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) )))
1870     Node* addtrunc = cmp1;
1871     Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc,
1872                                 &trunc1, &trunc2, &ttype);
1873     if (add && add->in(1)->is_Phi()) {
1874       Node* phi = add->in(1);
1875       for (uint i = 1; i < phi->req(); i++) {
1876         if (phi->in(i) == addtrunc) {
1877           add2 = add->in(2);
1878           break;
1879         }
1880       }
1881     }
1882   }
1883   if (add2 != NULL) {
1884     const TypeInt* add2t = _igvn.type(add2)->is_int();
1885     if (add2t->is_con()) {
1886       return add2t->get_con();
1887     }
1888   }
1889   return 0;
1890 }
1891 
1892 
1893 //---------------------- stay_in_loop -------------------------------------
1894 // Return the (unique) control output node that's in the loop (if it exists.)
1895 Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) {
1896   Node* unique = NULL;
1897   if (!n) return NULL;
1898   for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
1899     Node* use = n->fast_out(i);
1900     if (!has_ctrl(use) && loop->is_member(get_loop(use))) {
1901       if (unique != NULL) {
1902         return NULL;
1903       }
1904       unique = use;
1905     }
1906   }
1907   return unique;
1908 }
1909 
1910 //------------------------------ register_node -------------------------------------
1911 // Utility to register node "n" with PhaseIdealLoop
1912 void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) {
1913   _igvn.register_new_node_with_optimizer(n);
1914   loop->_body.push(n);
1915   if (n->is_CFG()) {
1916     set_loop(n, loop);
1917     set_idom(n, pred, ddepth);
1918   } else {
1919     set_ctrl(n, pred);
1920   }
1921 }
1922 
1923 //------------------------------ proj_clone -------------------------------------
1924 // Utility to create an if-projection
1925 ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) {
1926   ProjNode* c = p->clone()->as_Proj();
1927   c->set_req(0, iff);
1928   return c;
1929 }
1930 
1931 //------------------------------ short_circuit_if -------------------------------------
1932 // Force the iff control output to be the live_proj
1933 Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) {
1934   guarantee(live_proj != NULL, "null projection");
1935   int proj_con = live_proj->_con;
1936   assert(proj_con == 0 || proj_con == 1, "false or true projection");
1937   Node *con = _igvn.intcon(proj_con);
1938   set_ctrl(con, C->root());
1939   if (iff) {
1940     iff->set_req(1, con);
1941   }
1942   return con;
1943 }
1944 
1945 //------------------------------ insert_if_before_proj -------------------------------------
1946 // Insert a new if before an if projection (* - new node)
1947 //
1948 // before
1949 //           if(test)
1950 //           /     \
1951 //          v       v
1952 //    other-proj   proj (arg)
1953 //
1954 // after
1955 //           if(test)
1956 //           /     \
1957 //          /       v
1958 //         |      * proj-clone
1959 //         v          |
1960 //    other-proj      v
1961 //                * new_if(relop(cmp[IU](left,right)))
1962 //                  /  \
1963 //                 v    v
1964 //         * new-proj  proj
1965 //         (returned)
1966 //
1967 ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) {
1968   IfNode* iff = proj->in(0)->as_If();
1969   IdealLoopTree *loop = get_loop(proj);
1970   ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
1971   int ddepth = dom_depth(proj);
1972 
1973   _igvn.rehash_node_delayed(iff);
1974   _igvn.rehash_node_delayed(proj);
1975 
1976   proj->set_req(0, NULL);  // temporary disconnect
1977   ProjNode* proj2 = proj_clone(proj, iff);
1978   register_node(proj2, loop, iff, ddepth);
1979 
1980   Node* cmp = Signed ? (Node*) new CmpINode(left, right) : (Node*) new CmpUNode(left, right);
1981   register_node(cmp, loop, proj2, ddepth);
1982 
1983   BoolNode* bol = new BoolNode(cmp, relop);
1984   register_node(bol, loop, proj2, ddepth);
1985 
1986   IfNode* new_if = new IfNode(proj2, bol, iff->_prob, iff->_fcnt);
1987   register_node(new_if, loop, proj2, ddepth);
1988 
1989   proj->set_req(0, new_if); // reattach
1990   set_idom(proj, new_if, ddepth);
1991 
1992   ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj();
1993   guarantee(new_exit != NULL, "null exit node");
1994   register_node(new_exit, get_loop(other_proj), new_if, ddepth);
1995 
1996   return new_exit;
1997 }
1998 
1999 //------------------------------ insert_region_before_proj -------------------------------------
2000 // Insert a region before an if projection (* - new node)
2001 //
2002 // before
2003 //           if(test)
2004 //          /      |
2005 //         v       |
2006 //       proj      v
2007 //               other-proj
2008 //
2009 // after
2010 //           if(test)
2011 //          /      |
2012 //         v       |
2013 // * proj-clone    v
2014 //         |     other-proj
2015 //         v
2016 // * new-region
2017 //         |
2018 //         v
2019 // *      dum_if
2020 //       /     \
2021 //      v       \
2022 // * dum-proj    v
2023 //              proj
2024 //
2025 RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
2026   IfNode* iff = proj->in(0)->as_If();
2027   IdealLoopTree *loop = get_loop(proj);
2028   ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
2029   int ddepth = dom_depth(proj);
2030 
2031   _igvn.rehash_node_delayed(iff);
2032   _igvn.rehash_node_delayed(proj);
2033 
2034   proj->set_req(0, NULL);  // temporary disconnect
2035   ProjNode* proj2 = proj_clone(proj, iff);
2036   register_node(proj2, loop, iff, ddepth);
2037 
2038   RegionNode* reg = new RegionNode(2);
2039   reg->set_req(1, proj2);
2040   register_node(reg, loop, iff, ddepth);
2041 
2042   IfNode* dum_if = new IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt);
2043   register_node(dum_if, loop, reg, ddepth);
2044 
2045   proj->set_req(0, dum_if); // reattach
2046   set_idom(proj, dum_if, ddepth);
2047 
2048   ProjNode* dum_proj = proj_clone(other_proj, dum_if);
2049   register_node(dum_proj, loop, dum_if, ddepth);
2050 
2051   return reg;
2052 }
2053 
2054 //------------------------------ insert_cmpi_loop_exit -------------------------------------
2055 // Clone a signed compare loop exit from an unsigned compare and
2056 // insert it before the unsigned cmp on the stay-in-loop path.
2057 // All new nodes inserted in the dominator tree between the original
2058 // if and it's projections.  The original if test is replaced with
2059 // a constant to force the stay-in-loop path.
2060 //
2061 // This is done to make sure that the original if and it's projections
2062 // still dominate the same set of control nodes, that the ctrl() relation
2063 // from data nodes to them is preserved, and that their loop nesting is
2064 // preserved.
2065 //
2066 // before
2067 //          if(i <u limit)    unsigned compare loop exit
2068 //         /       |
2069 //        v        v
2070 //   exit-proj   stay-in-loop-proj
2071 //
2072 // after
2073 //          if(stay-in-loop-const)  original if
2074 //         /       |
2075 //        /        v
2076 //       /  if(i <  limit)    new signed test
2077 //      /  /       |
2078 //     /  /        v
2079 //    /  /  if(i <u limit)    new cloned unsigned test
2080 //   /  /   /      |
2081 //   v  v  v       |
2082 //    region       |
2083 //        |        |
2084 //      dum-if     |
2085 //     /  |        |
2086 // ether  |        |
2087 //        v        v
2088 //   exit-proj   stay-in-loop-proj
2089 //
2090 IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
2091   const bool Signed   = true;
2092   const bool Unsigned = false;
2093 
2094   BoolNode* bol = if_cmpu->in(1)->as_Bool();
2095   if (bol->_test._test != BoolTest::lt) return NULL;
2096   CmpNode* cmpu = bol->in(1)->as_Cmp();
2097   if (cmpu->Opcode() != Op_CmpU) return NULL;
2098   int stride = stride_of_possible_iv(if_cmpu);
2099   if (stride == 0) return NULL;
2100 
2101   Node* lp_proj = stay_in_loop(if_cmpu, loop);
2102   guarantee(lp_proj != NULL, "null loop node");
2103 
2104   ProjNode* lp_continue = lp_proj->as_Proj();
2105   ProjNode* lp_exit     = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj();
2106 
2107   Node* limit = NULL;
2108   if (stride > 0) {
2109     limit = cmpu->in(2);
2110   } else {
2111     limit = _igvn.makecon(TypeInt::ZERO);
2112     set_ctrl(limit, C->root());
2113   }
2114   // Create a new region on the exit path
2115   RegionNode* reg = insert_region_before_proj(lp_exit);
2116   guarantee(reg != NULL, "null region node");
2117 
2118   // Clone the if-cmpu-true-false using a signed compare
2119   BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
2120   ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
2121   reg->add_req(cmpi_exit);
2122 
2123   // Clone the if-cmpu-true-false
2124   BoolTest::mask rel_u = bol->_test._test;
2125   ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue);
2126   reg->add_req(cmpu_exit);
2127 
2128   // Force original if to stay in loop.
2129   short_circuit_if(if_cmpu, lp_continue);
2130 
2131   return cmpi_exit->in(0)->as_If();
2132 }
2133 
2134 //------------------------------ remove_cmpi_loop_exit -------------------------------------
2135 // Remove a previously inserted signed compare loop exit.
2136 void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) {
2137   Node* lp_proj = stay_in_loop(if_cmp, loop);
2138   assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI &&
2139          stay_in_loop(lp_proj, loop)->is_If() &&
2140          stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu");
2141   Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO);
2142   set_ctrl(con, C->root());
2143   if_cmp->set_req(1, con);
2144 }
2145 
2146 //------------------------------ scheduled_nodelist -------------------------------------
2147 // Create a post order schedule of nodes that are in the
2148 // "member" set.  The list is returned in "sched".
2149 // The first node in "sched" is the loop head, followed by
2150 // nodes which have no inputs in the "member" set, and then
2151 // followed by the nodes that have an immediate input dependence
2152 // on a node in "sched".
2153 void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) {
2154 
2155   assert(member.test(loop->_head->_idx), "loop head must be in member set");
2156   Arena *a = Thread::current()->resource_area();
2157   VectorSet visited(a);
2158   Node_Stack nstack(a, loop->_body.size());
2159 
2160   Node* n  = loop->_head;  // top of stack is cached in "n"
2161   uint idx = 0;
2162   visited.set(n->_idx);
2163 
2164   // Initially push all with no inputs from within member set
2165   for(uint i = 0; i < loop->_body.size(); i++ ) {
2166     Node *elt = loop->_body.at(i);
2167     if (member.test(elt->_idx)) {
2168       bool found = false;
2169       for (uint j = 0; j < elt->req(); j++) {
2170         Node* def = elt->in(j);
2171         if (def && member.test(def->_idx) && def != elt) {
2172           found = true;
2173           break;
2174         }
2175       }
2176       if (!found && elt != loop->_head) {
2177         nstack.push(n, idx);
2178         n = elt;
2179         assert(!visited.test(n->_idx), "not seen yet");
2180         visited.set(n->_idx);
2181       }
2182     }
2183   }
2184 
2185   // traverse out's that are in the member set
2186   while (true) {
2187     if (idx < n->outcnt()) {
2188       Node* use = n->raw_out(idx);
2189       idx++;
2190       if (!visited.test_set(use->_idx)) {
2191         if (member.test(use->_idx)) {
2192           nstack.push(n, idx);
2193           n = use;
2194           idx = 0;
2195         }
2196       }
2197     } else {
2198       // All outputs processed
2199       sched.push(n);
2200       if (nstack.is_empty()) break;
2201       n   = nstack.node();
2202       idx = nstack.index();
2203       nstack.pop();
2204     }
2205   }
2206 }
2207 
2208 
2209 //------------------------------ has_use_in_set -------------------------------------
2210 // Has a use in the vector set
2211 bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) {
2212   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2213     Node* use = n->fast_out(j);
2214     if (vset.test(use->_idx)) {
2215       return true;
2216     }
2217   }
2218   return false;
2219 }
2220 
2221 
2222 //------------------------------ has_use_internal_to_set -------------------------------------
2223 // Has use internal to the vector set (ie. not in a phi at the loop head)
2224 bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) {
2225   Node* head  = loop->_head;
2226   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2227     Node* use = n->fast_out(j);
2228     if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) {
2229       return true;
2230     }
2231   }
2232   return false;
2233 }
2234 
2235 
2236 //------------------------------ clone_for_use_outside_loop -------------------------------------
2237 // clone "n" for uses that are outside of loop
2238 int PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
2239   int cloned = 0;
2240   assert(worklist.size() == 0, "should be empty");
2241   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2242     Node* use = n->fast_out(j);
2243     if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
2244       worklist.push(use);
2245     }
2246   }
2247   while( worklist.size() ) {
2248     Node *use = worklist.pop();
2249     if (!has_node(use) || use->in(0) == C->top()) continue;
2250     uint j;
2251     for (j = 0; j < use->req(); j++) {
2252       if (use->in(j) == n) break;
2253     }
2254     assert(j < use->req(), "must be there");
2255 
2256     // clone "n" and insert it between the inputs of "n" and the use outside the loop
2257     Node* n_clone = n->clone();
2258     _igvn.replace_input_of(use, j, n_clone);
2259     cloned++;
2260     Node* use_c;
2261     if (!use->is_Phi()) {
2262       use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
2263     } else {
2264       // Use in a phi is considered a use in the associated predecessor block
2265       use_c = use->in(0)->in(j);
2266     }
2267     set_ctrl(n_clone, use_c);
2268     assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
2269     get_loop(use_c)->_body.push(n_clone);
2270     _igvn.register_new_node_with_optimizer(n_clone);
2271 #if !defined(PRODUCT)
2272     if (TracePartialPeeling) {
2273       tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
2274     }
2275 #endif
2276   }
2277   return cloned;
2278 }
2279 
2280 
2281 //------------------------------ clone_for_special_use_inside_loop -------------------------------------
2282 // clone "n" for special uses that are in the not_peeled region.
2283 // If these def-uses occur in separate blocks, the code generator
2284 // marks the method as not compilable.  For example, if a "BoolNode"
2285 // is in a different basic block than the "IfNode" that uses it, then
2286 // the compilation is aborted in the code generator.
2287 void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
2288                                                         VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
2289   if (n->is_Phi() || n->is_Load()) {
2290     return;
2291   }
2292   assert(worklist.size() == 0, "should be empty");
2293   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2294     Node* use = n->fast_out(j);
2295     if ( not_peel.test(use->_idx) &&
2296          (use->is_If() || use->is_CMove() || use->is_Bool()) &&
2297          use->in(1) == n)  {
2298       worklist.push(use);
2299     }
2300   }
2301   if (worklist.size() > 0) {
2302     // clone "n" and insert it between inputs of "n" and the use
2303     Node* n_clone = n->clone();
2304     loop->_body.push(n_clone);
2305     _igvn.register_new_node_with_optimizer(n_clone);
2306     set_ctrl(n_clone, get_ctrl(n));
2307     sink_list.push(n_clone);
2308     not_peel <<= n_clone->_idx;  // add n_clone to not_peel set.
2309 #if !defined(PRODUCT)
2310     if (TracePartialPeeling) {
2311       tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx);
2312     }
2313 #endif
2314     while( worklist.size() ) {
2315       Node *use = worklist.pop();
2316       _igvn.rehash_node_delayed(use);
2317       for (uint j = 1; j < use->req(); j++) {
2318         if (use->in(j) == n) {
2319           use->set_req(j, n_clone);
2320         }
2321       }
2322     }
2323   }
2324 }
2325 
2326 
2327 //------------------------------ insert_phi_for_loop -------------------------------------
2328 // Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
2329 void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) {
2330   Node *phi = PhiNode::make(lp, back_edge_val);
2331   phi->set_req(LoopNode::EntryControl, lp_entry_val);
2332   // Use existing phi if it already exists
2333   Node *hit = _igvn.hash_find_insert(phi);
2334   if( hit == NULL ) {
2335     _igvn.register_new_node_with_optimizer(phi);
2336     set_ctrl(phi, lp);
2337   } else {
2338     // Remove the new phi from the graph and use the hit
2339     _igvn.remove_dead_node(phi);
2340     phi = hit;
2341   }
2342   _igvn.replace_input_of(use, idx, phi);
2343 }
2344 
2345 #ifdef ASSERT
2346 //------------------------------ is_valid_loop_partition -------------------------------------
2347 // Validate the loop partition sets: peel and not_peel
2348 bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list,
2349                                               VectorSet& not_peel ) {
2350   uint i;
2351   // Check that peel_list entries are in the peel set
2352   for (i = 0; i < peel_list.size(); i++) {
2353     if (!peel.test(peel_list.at(i)->_idx)) {
2354       return false;
2355     }
2356   }
2357   // Check at loop members are in one of peel set or not_peel set
2358   for (i = 0; i < loop->_body.size(); i++ ) {
2359     Node *def  = loop->_body.at(i);
2360     uint di = def->_idx;
2361     // Check that peel set elements are in peel_list
2362     if (peel.test(di)) {
2363       if (not_peel.test(di)) {
2364         return false;
2365       }
2366       // Must be in peel_list also
2367       bool found = false;
2368       for (uint j = 0; j < peel_list.size(); j++) {
2369         if (peel_list.at(j)->_idx == di) {
2370           found = true;
2371           break;
2372         }
2373       }
2374       if (!found) {
2375         return false;
2376       }
2377     } else if (not_peel.test(di)) {
2378       if (peel.test(di)) {
2379         return false;
2380       }
2381     } else {
2382       return false;
2383     }
2384   }
2385   return true;
2386 }
2387 
2388 //------------------------------ is_valid_clone_loop_exit_use -------------------------------------
2389 // Ensure a use outside of loop is of the right form
2390 bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) {
2391   Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2392   return (use->is_Phi() &&
2393           use_c->is_Region() && use_c->req() == 3 &&
2394           (use_c->in(exit_idx)->Opcode() == Op_IfTrue ||
2395            use_c->in(exit_idx)->Opcode() == Op_IfFalse ||
2396            use_c->in(exit_idx)->Opcode() == Op_JumpProj) &&
2397           loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) );
2398 }
2399 
2400 //------------------------------ is_valid_clone_loop_form -------------------------------------
2401 // Ensure that all uses outside of loop are of the right form
2402 bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
2403                                                uint orig_exit_idx, uint clone_exit_idx) {
2404   uint len = peel_list.size();
2405   for (uint i = 0; i < len; i++) {
2406     Node *def = peel_list.at(i);
2407 
2408     for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2409       Node *use = def->fast_out(j);
2410       Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2411       if (!loop->is_member(get_loop(use_c))) {
2412         // use is not in the loop, check for correct structure
2413         if (use->in(0) == def) {
2414           // Okay
2415         } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) {
2416           return false;
2417         }
2418       }
2419     }
2420   }
2421   return true;
2422 }
2423 #endif
2424 
2425 //------------------------------ partial_peel -------------------------------------
2426 // Partially peel (aka loop rotation) the top portion of a loop (called
2427 // the peel section below) by cloning it and placing one copy just before
2428 // the new loop head and the other copy at the bottom of the new loop.
2429 //
2430 //    before                       after                where it came from
2431 //
2432 //    stmt1                        stmt1
2433 //  loop:                          stmt2                     clone
2434 //    stmt2                        if condA goto exitA       clone
2435 //    if condA goto exitA        new_loop:                   new
2436 //    stmt3                        stmt3                     clone
2437 //    if !condB goto loop          if condB goto exitB       clone
2438 //  exitB:                         stmt2                     orig
2439 //    stmt4                        if !condA goto new_loop   orig
2440 //  exitA:                         goto exitA
2441 //                               exitB:
2442 //                                 stmt4
2443 //                               exitA:
2444 //
2445 // Step 1: find the cut point: an exit test on probable
2446 //         induction variable.
2447 // Step 2: schedule (with cloning) operations in the peel
2448 //         section that can be executed after the cut into
2449 //         the section that is not peeled.  This may need
2450 //         to clone operations into exit blocks.  For
2451 //         instance, a reference to A[i] in the not-peel
2452 //         section and a reference to B[i] in an exit block
2453 //         may cause a left-shift of i by 2 to be placed
2454 //         in the peel block.  This step will clone the left
2455 //         shift into the exit block and sink the left shift
2456 //         from the peel to the not-peel section.
2457 // Step 3: clone the loop, retarget the control, and insert
2458 //         phis for values that are live across the new loop
2459 //         head.  This is very dependent on the graph structure
2460 //         from clone_loop.  It creates region nodes for
2461 //         exit control and associated phi nodes for values
2462 //         flow out of the loop through that exit.  The region
2463 //         node is dominated by the clone's control projection.
2464 //         So the clone's peel section is placed before the
2465 //         new loop head, and the clone's not-peel section is
2466 //         forms the top part of the new loop.  The original
2467 //         peel section forms the tail of the new loop.
2468 // Step 4: update the dominator tree and recompute the
2469 //         dominator depth.
2470 //
2471 //                   orig
2472 //
2473 //                   stmt1
2474 //                     |
2475 //                     v
2476 //               loop predicate
2477 //                     |
2478 //                     v
2479 //                   loop<----+
2480 //                     |      |
2481 //                   stmt2    |
2482 //                     |      |
2483 //                     v      |
2484 //                    ifA     |
2485 //                   / |      |
2486 //                  v  v      |
2487 //               false true   ^  <-- last_peel
2488 //               /     |      |
2489 //              /   ===|==cut |
2490 //             /     stmt3    |  <-- first_not_peel
2491 //            /        |      |
2492 //            |        v      |
2493 //            v       ifB     |
2494 //          exitA:   / \      |
2495 //                  /   \     |
2496 //                 v     v    |
2497 //               false true   |
2498 //               /       \    |
2499 //              /         ----+
2500 //             |
2501 //             v
2502 //           exitB:
2503 //           stmt4
2504 //
2505 //
2506 //            after clone loop
2507 //
2508 //                   stmt1
2509 //                     |
2510 //                     v
2511 //               loop predicate
2512 //                 /       \
2513 //        clone   /         \   orig
2514 //               /           \
2515 //              /             \
2516 //             v               v
2517 //   +---->loop                loop<----+
2518 //   |      |                    |      |
2519 //   |    stmt2                stmt2    |
2520 //   |      |                    |      |
2521 //   |      v                    v      |
2522 //   |      ifA                 ifA     |
2523 //   |      | \                / |      |
2524 //   |      v  v              v  v      |
2525 //   ^    true  false      false true   ^  <-- last_peel
2526 //   |      |   ^   \       /    |      |
2527 //   | cut==|==  \   \     /  ===|==cut |
2528 //   |    stmt3   \   \   /    stmt3    |  <-- first_not_peel
2529 //   |      |    dom   | |       |      |
2530 //   |      v      \  1v v2      v      |
2531 //   |      ifB     regionA     ifB     |
2532 //   |      / \        |       / \      |
2533 //   |     /   \       v      /   \     |
2534 //   |    v     v    exitA:  v     v    |
2535 //   |    true  false      false true   |
2536 //   |    /     ^   \      /       \    |
2537 //   +----       \   \    /         ----+
2538 //               dom  \  /
2539 //                 \  1v v2
2540 //                  regionB
2541 //                     |
2542 //                     v
2543 //                   exitB:
2544 //                   stmt4
2545 //
2546 //
2547 //           after partial peel
2548 //
2549 //                  stmt1
2550 //                     |
2551 //                     v
2552 //               loop predicate
2553 //                 /
2554 //        clone   /             orig
2555 //               /          TOP
2556 //              /             \
2557 //             v               v
2558 //    TOP->loop                loop----+
2559 //          |                    |      |
2560 //        stmt2                stmt2    |
2561 //          |                    |      |
2562 //          v                    v      |
2563 //          ifA                 ifA     |
2564 //          | \                / |      |
2565 //          v  v              v  v      |
2566 //        true  false      false true   |     <-- last_peel
2567 //          |   ^   \       /    +------|---+
2568 //  +->newloop   \   \     /  === ==cut |   |
2569 //  |     stmt3   \   \   /     TOP     |   |
2570 //  |       |    dom   | |      stmt3   |   | <-- first_not_peel
2571 //  |       v      \  1v v2      v      |   |
2572 //  |       ifB     regionA     ifB     ^   v
2573 //  |       / \        |       / \      |   |
2574 //  |      /   \       v      /   \     |   |
2575 //  |     v     v    exitA:  v     v    |   |
2576 //  |     true  false      false true   |   |
2577 //  |     /     ^   \      /       \    |   |
2578 //  |    |       \   \    /         v   |   |
2579 //  |    |       dom  \  /         TOP  |   |
2580 //  |    |         \  1v v2             |   |
2581 //  ^    v          regionB             |   |
2582 //  |    |             |                |   |
2583 //  |    |             v                ^   v
2584 //  |    |           exitB:             |   |
2585 //  |    |           stmt4              |   |
2586 //  |    +------------>-----------------+   |
2587 //  |                                       |
2588 //  +-----------------<---------------------+
2589 //
2590 //
2591 //              final graph
2592 //
2593 //                  stmt1
2594 //                    |
2595 //                    v
2596 //               loop predicate
2597 //                    |
2598 //                    v
2599 //                  stmt2 clone
2600 //                    |
2601 //                    v
2602 //         ........> ifA clone
2603 //         :        / |
2604 //        dom      /  |
2605 //         :      v   v
2606 //         :  false   true
2607 //         :  |       |
2608 //         :  |       v
2609 //         :  |    newloop<-----+
2610 //         :  |        |        |
2611 //         :  |     stmt3 clone |
2612 //         :  |        |        |
2613 //         :  |        v        |
2614 //         :  |       ifB       |
2615 //         :  |      / \        |
2616 //         :  |     v   v       |
2617 //         :  |  false true     |
2618 //         :  |   |     |       |
2619 //         :  |   v    stmt2    |
2620 //         :  | exitB:  |       |
2621 //         :  | stmt4   v       |
2622 //         :  |       ifA orig  |
2623 //         :  |      /  \       |
2624 //         :  |     /    \      |
2625 //         :  |    v     v      |
2626 //         :  |  false  true    |
2627 //         :  |  /        \     |
2628 //         :  v  v         -----+
2629 //          RegionA
2630 //             |
2631 //             v
2632 //           exitA
2633 //
2634 bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
2635 
2636   assert(!loop->_head->is_CountedLoop(), "Non-counted loop only");
2637   if (!loop->_head->is_Loop()) {
2638     return false;  }
2639 
2640   LoopNode *head  = loop->_head->as_Loop();
2641 
2642   if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
2643     return false;
2644   }
2645 
2646   // Check for complex exit control
2647   for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
2648     Node *n = loop->_body.at(ii);
2649     int opc = n->Opcode();
2650     if (n->is_Call()        ||
2651         opc == Op_Catch     ||
2652         opc == Op_CatchProj ||
2653         opc == Op_Jump      ||
2654         opc == Op_JumpProj) {
2655 #if !defined(PRODUCT)
2656       if (TracePartialPeeling) {
2657         tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
2658       }
2659 #endif
2660       return false;
2661     }
2662   }
2663 
2664   int dd = dom_depth(head);
2665 
2666   // Step 1: find cut point
2667 
2668   // Walk up dominators to loop head looking for first loop exit
2669   // which is executed on every path thru loop.
2670   IfNode *peel_if = NULL;
2671   IfNode *peel_if_cmpu = NULL;
2672 
2673   Node *iff = loop->tail();
2674   while( iff != head ) {
2675     if( iff->is_If() ) {
2676       Node *ctrl = get_ctrl(iff->in(1));
2677       if (ctrl->is_top()) return false; // Dead test on live IF.
2678       // If loop-varying exit-test, check for induction variable
2679       if( loop->is_member(get_loop(ctrl)) &&
2680           loop->is_loop_exit(iff) &&
2681           is_possible_iv_test(iff)) {
2682         Node* cmp = iff->in(1)->in(1);
2683         if (cmp->Opcode() == Op_CmpI) {
2684           peel_if = iff->as_If();
2685         } else {
2686           assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
2687           peel_if_cmpu = iff->as_If();
2688         }
2689       }
2690     }
2691     iff = idom(iff);
2692   }
2693   // Prefer signed compare over unsigned compare.
2694   IfNode* new_peel_if = NULL;
2695   if (peel_if == NULL) {
2696     if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
2697       return false;   // No peel point found
2698     }
2699     new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
2700     if (new_peel_if == NULL) {
2701       return false;   // No peel point found
2702     }
2703     peel_if = new_peel_if;
2704   }
2705   Node* last_peel        = stay_in_loop(peel_if, loop);
2706   Node* first_not_peeled = stay_in_loop(last_peel, loop);
2707   if (first_not_peeled == NULL || first_not_peeled == head) {
2708     return false;
2709   }
2710 
2711 #if !defined(PRODUCT)
2712   if (TraceLoopOpts) {
2713     tty->print("PartialPeel  ");
2714     loop->dump_head();
2715   }
2716 
2717   if (TracePartialPeeling) {
2718     tty->print_cr("before partial peel one iteration");
2719     Node_List wl;
2720     Node* t = head->in(2);
2721     while (true) {
2722       wl.push(t);
2723       if (t == head) break;
2724       t = idom(t);
2725     }
2726     while (wl.size() > 0) {
2727       Node* tt = wl.pop();
2728       tt->dump();
2729       if (tt == last_peel) tty->print_cr("-- cut --");
2730     }
2731   }
2732 #endif
2733   ResourceArea *area = Thread::current()->resource_area();
2734   VectorSet peel(area);
2735   VectorSet not_peel(area);
2736   Node_List peel_list(area);
2737   Node_List worklist(area);
2738   Node_List sink_list(area);
2739 
2740   // Set of cfg nodes to peel are those that are executable from
2741   // the head through last_peel.
2742   assert(worklist.size() == 0, "should be empty");
2743   worklist.push(head);
2744   peel.set(head->_idx);
2745   while (worklist.size() > 0) {
2746     Node *n = worklist.pop();
2747     if (n != last_peel) {
2748       for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2749         Node* use = n->fast_out(j);
2750         if (use->is_CFG() &&
2751             loop->is_member(get_loop(use)) &&
2752             !peel.test_set(use->_idx)) {
2753           worklist.push(use);
2754         }
2755       }
2756     }
2757   }
2758 
2759   // Set of non-cfg nodes to peel are those that are control
2760   // dependent on the cfg nodes.
2761   uint i;
2762   for(i = 0; i < loop->_body.size(); i++ ) {
2763     Node *n = loop->_body.at(i);
2764     Node *n_c = has_ctrl(n) ? get_ctrl(n) : n;
2765     if (peel.test(n_c->_idx)) {
2766       peel.set(n->_idx);
2767     } else {
2768       not_peel.set(n->_idx);
2769     }
2770   }
2771 
2772   // Step 2: move operations from the peeled section down into the
2773   //         not-peeled section
2774 
2775   // Get a post order schedule of nodes in the peel region
2776   // Result in right-most operand.
2777   scheduled_nodelist(loop, peel, peel_list );
2778 
2779   assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
2780 
2781   // For future check for too many new phis
2782   uint old_phi_cnt = 0;
2783   for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
2784     Node* use = head->fast_out(j);
2785     if (use->is_Phi()) old_phi_cnt++;
2786   }
2787 
2788 #if !defined(PRODUCT)
2789   if (TracePartialPeeling) {
2790     tty->print_cr("\npeeled list");
2791   }
2792 #endif
2793 
2794   // Evacuate nodes in peel region into the not_peeled region if possible
2795   uint new_phi_cnt = 0;
2796   uint cloned_for_outside_use = 0;
2797   for (i = 0; i < peel_list.size();) {
2798     Node* n = peel_list.at(i);
2799 #if !defined(PRODUCT)
2800   if (TracePartialPeeling) n->dump();
2801 #endif
2802     bool incr = true;
2803     if ( !n->is_CFG() ) {
2804 
2805       if ( has_use_in_set(n, not_peel) ) {
2806 
2807         // If not used internal to the peeled region,
2808         // move "n" from peeled to not_peeled region.
2809 
2810         if ( !has_use_internal_to_set(n, peel, loop) ) {
2811 
2812           // if not pinned and not a load (which maybe anti-dependent on a store)
2813           // and not a CMove (Matcher expects only bool->cmove).
2814           if ( n->in(0) == NULL && !n->is_Load() && !n->is_CMove() ) {
2815             cloned_for_outside_use += clone_for_use_outside_loop( loop, n, worklist );
2816             sink_list.push(n);
2817             peel     >>= n->_idx; // delete n from peel set.
2818             not_peel <<= n->_idx; // add n to not_peel set.
2819             peel_list.remove(i);
2820             incr = false;
2821 #if !defined(PRODUCT)
2822             if (TracePartialPeeling) {
2823               tty->print_cr("sink to not_peeled region: %d newbb: %d",
2824                             n->_idx, get_ctrl(n)->_idx);
2825             }
2826 #endif
2827           }
2828         } else {
2829           // Otherwise check for special def-use cases that span
2830           // the peel/not_peel boundary such as bool->if
2831           clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist );
2832           new_phi_cnt++;
2833         }
2834       }
2835     }
2836     if (incr) i++;
2837   }
2838 
2839   if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) {
2840 #if !defined(PRODUCT)
2841     if (TracePartialPeeling) {
2842       tty->print_cr("\nToo many new phis: %d  old %d new cmpi: %c",
2843                     new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F');
2844     }
2845 #endif
2846     if (new_peel_if != NULL) {
2847       remove_cmpi_loop_exit(new_peel_if, loop);
2848     }
2849     // Inhibit more partial peeling on this loop
2850     assert(!head->is_partial_peel_loop(), "not partial peeled");
2851     head->mark_partial_peel_failed();
2852     if (cloned_for_outside_use > 0) {
2853       // Terminate this round of loop opts because
2854       // the graph outside this loop was changed.
2855       C->set_major_progress();
2856       return true;
2857     }
2858     return false;
2859   }
2860 
2861   // Step 3: clone loop, retarget control, and insert new phis
2862 
2863   // Create new loop head for new phis and to hang
2864   // the nodes being moved (sinked) from the peel region.
2865   LoopNode* new_head = new LoopNode(last_peel, last_peel);
2866   new_head->set_unswitch_count(head->unswitch_count()); // Preserve
2867   _igvn.register_new_node_with_optimizer(new_head);
2868   assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled");
2869   _igvn.replace_input_of(first_not_peeled, 0, new_head);
2870   set_loop(new_head, loop);
2871   loop->_body.push(new_head);
2872   not_peel.set(new_head->_idx);
2873   set_idom(new_head, last_peel, dom_depth(first_not_peeled));
2874   set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled));
2875 
2876   while (sink_list.size() > 0) {
2877     Node* n = sink_list.pop();
2878     set_ctrl(n, new_head);
2879   }
2880 
2881   assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
2882 
2883   clone_loop( loop, old_new, dd );
2884 
2885   const uint clone_exit_idx = 1;
2886   const uint orig_exit_idx  = 2;
2887   assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop");
2888 
2889   Node* head_clone             = old_new[head->_idx];
2890   LoopNode* new_head_clone     = old_new[new_head->_idx]->as_Loop();
2891   Node* orig_tail_clone        = head_clone->in(2);
2892 
2893   // Add phi if "def" node is in peel set and "use" is not
2894 
2895   for(i = 0; i < peel_list.size(); i++ ) {
2896     Node *def  = peel_list.at(i);
2897     if (!def->is_CFG()) {
2898       for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2899         Node *use = def->fast_out(j);
2900         if (has_node(use) && use->in(0) != C->top() &&
2901             (!peel.test(use->_idx) ||
2902              (use->is_Phi() && use->in(0) == head)) ) {
2903           worklist.push(use);
2904         }
2905       }
2906       while( worklist.size() ) {
2907         Node *use = worklist.pop();
2908         for (uint j = 1; j < use->req(); j++) {
2909           Node* n = use->in(j);
2910           if (n == def) {
2911 
2912             // "def" is in peel set, "use" is not in peel set
2913             // or "use" is in the entry boundary (a phi) of the peel set
2914 
2915             Node* use_c = has_ctrl(use) ? get_ctrl(use) : use;
2916 
2917             if ( loop->is_member(get_loop( use_c )) ) {
2918               // use is in loop
2919               if (old_new[use->_idx] != NULL) { // null for dead code
2920                 Node* use_clone = old_new[use->_idx];
2921                 _igvn.replace_input_of(use, j, C->top());
2922                 insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone );
2923               }
2924             } else {
2925               assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format");
2926               // use is not in the loop, check if the live range includes the cut
2927               Node* lp_if = use_c->in(orig_exit_idx)->in(0);
2928               if (not_peel.test(lp_if->_idx)) {
2929                 assert(j == orig_exit_idx, "use from original loop");
2930                 insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone );
2931               }
2932             }
2933           }
2934         }
2935       }
2936     }
2937   }
2938 
2939   // Step 3b: retarget control
2940 
2941   // Redirect control to the new loop head if a cloned node in
2942   // the not_peeled region has control that points into the peeled region.
2943   // This necessary because the cloned peeled region will be outside
2944   // the loop.
2945   //                            from    to
2946   //          cloned-peeled    <---+
2947   //    new_head_clone:            |    <--+
2948   //          cloned-not_peeled  in(0)    in(0)
2949   //          orig-peeled
2950 
2951   for(i = 0; i < loop->_body.size(); i++ ) {
2952     Node *n = loop->_body.at(i);
2953     if (!n->is_CFG()           && n->in(0) != NULL        &&
2954         not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) {
2955       Node* n_clone = old_new[n->_idx];
2956       _igvn.replace_input_of(n_clone, 0, new_head_clone);
2957     }
2958   }
2959 
2960   // Backedge of the surviving new_head (the clone) is original last_peel
2961   _igvn.replace_input_of(new_head_clone, LoopNode::LoopBackControl, last_peel);
2962 
2963   // Cut first node in original not_peel set
2964   _igvn.rehash_node_delayed(new_head);                     // Multiple edge updates:
2965   new_head->set_req(LoopNode::EntryControl,    C->top());  //   use rehash_node_delayed / set_req instead of
2966   new_head->set_req(LoopNode::LoopBackControl, C->top());  //   multiple replace_input_of calls
2967 
2968   // Copy head_clone back-branch info to original head
2969   // and remove original head's loop entry and
2970   // clone head's back-branch
2971   _igvn.rehash_node_delayed(head); // Multiple edge updates
2972   head->set_req(LoopNode::EntryControl,    head_clone->in(LoopNode::LoopBackControl));
2973   head->set_req(LoopNode::LoopBackControl, C->top());
2974   _igvn.replace_input_of(head_clone, LoopNode::LoopBackControl, C->top());
2975 
2976   // Similarly modify the phis
2977   for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) {
2978     Node* use = head->fast_out(k);
2979     if (use->is_Phi() && use->outcnt() > 0) {
2980       Node* use_clone = old_new[use->_idx];
2981       _igvn.rehash_node_delayed(use); // Multiple edge updates
2982       use->set_req(LoopNode::EntryControl,    use_clone->in(LoopNode::LoopBackControl));
2983       use->set_req(LoopNode::LoopBackControl, C->top());
2984       _igvn.replace_input_of(use_clone, LoopNode::LoopBackControl, C->top());
2985     }
2986   }
2987 
2988   // Step 4: update dominator tree and dominator depth
2989 
2990   set_idom(head, orig_tail_clone, dd);
2991   recompute_dom_depth();
2992 
2993   // Inhibit more partial peeling on this loop
2994   new_head_clone->set_partial_peel_loop();
2995   C->set_major_progress();
2996   loop->record_for_igvn();
2997 
2998 #if !defined(PRODUCT)
2999   if (TracePartialPeeling) {
3000     tty->print_cr("\nafter partial peel one iteration");
3001     Node_List wl(area);
3002     Node* t = last_peel;
3003     while (true) {
3004       wl.push(t);
3005       if (t == head_clone) break;
3006       t = idom(t);
3007     }
3008     while (wl.size() > 0) {
3009       Node* tt = wl.pop();
3010       if (tt == head) tty->print_cr("orig head");
3011       else if (tt == new_head_clone) tty->print_cr("new head");
3012       else if (tt == head_clone) tty->print_cr("clone head");
3013       tt->dump();
3014     }
3015   }
3016 #endif
3017   return true;
3018 }
3019 
3020 //------------------------------reorg_offsets----------------------------------
3021 // Reorganize offset computations to lower register pressure.  Mostly
3022 // prevent loop-fallout uses of the pre-incremented trip counter (which are
3023 // then alive with the post-incremented trip counter forcing an extra
3024 // register move)
3025 void PhaseIdealLoop::reorg_offsets(IdealLoopTree *loop) {
3026   // Perform it only for canonical counted loops.
3027   // Loop's shape could be messed up by iteration_split_impl.
3028   if (!loop->_head->is_CountedLoop())
3029     return;
3030   if (!loop->_head->as_Loop()->is_valid_counted_loop())
3031     return;
3032 
3033   CountedLoopNode *cl = loop->_head->as_CountedLoop();
3034   CountedLoopEndNode *cle = cl->loopexit();
3035   Node *exit = cle->proj_out(false);
3036   Node *phi = cl->phi();
3037 
3038   // Check for the special case of folks using the pre-incremented
3039   // trip-counter on the fall-out path (forces the pre-incremented
3040   // and post-incremented trip counter to be live at the same time).
3041   // Fix this by adjusting to use the post-increment trip counter.
3042 
3043   bool progress = true;
3044   while (progress) {
3045     progress = false;
3046     for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
3047       Node* use = phi->fast_out(i);   // User of trip-counter
3048       if (!has_ctrl(use))  continue;
3049       Node *u_ctrl = get_ctrl(use);
3050       if (use->is_Phi()) {
3051         u_ctrl = NULL;
3052         for (uint j = 1; j < use->req(); j++)
3053           if (use->in(j) == phi)
3054             u_ctrl = dom_lca(u_ctrl, use->in(0)->in(j));
3055       }
3056       IdealLoopTree *u_loop = get_loop(u_ctrl);
3057       // Look for loop-invariant use
3058       if (u_loop == loop) continue;
3059       if (loop->is_member(u_loop)) continue;
3060       // Check that use is live out the bottom.  Assuming the trip-counter
3061       // update is right at the bottom, uses of of the loop middle are ok.
3062       if (dom_lca(exit, u_ctrl) != exit) continue;
3063       // Hit!  Refactor use to use the post-incremented tripcounter.
3064       // Compute a post-increment tripcounter.
3065       Node *opaq = new Opaque2Node( C, cle->incr() );
3066       register_new_node(opaq, exit);
3067       Node *neg_stride = _igvn.intcon(-cle->stride_con());
3068       set_ctrl(neg_stride, C->root());
3069       Node *post = new AddINode( opaq, neg_stride);
3070       register_new_node(post, exit);
3071       _igvn.rehash_node_delayed(use);
3072       for (uint j = 1; j < use->req(); j++) {
3073         if (use->in(j) == phi)
3074           use->set_req(j, post);
3075       }
3076       // Since DU info changed, rerun loop
3077       progress = true;
3078       break;
3079     }
3080   }
3081 
3082 }