1 /*
   2  * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "ci/ciMethodData.hpp"
  27 #include "compiler/compileLog.hpp"
  28 #include "libadt/vectset.hpp"
  29 #include "memory/allocation.inline.hpp"
  30 #include "opto/addnode.hpp"
  31 #include "opto/callnode.hpp"
  32 #include "opto/connode.hpp"
  33 #include "opto/divnode.hpp"
  34 #include "opto/idealGraphPrinter.hpp"
  35 #include "opto/loopnode.hpp"
  36 #include "opto/mulnode.hpp"
  37 #include "opto/rootnode.hpp"
  38 #include "opto/superword.hpp"
  39 
  40 //=============================================================================
  41 //------------------------------is_loop_iv-------------------------------------
  42 // Determine if a node is Counted loop induction variable.
  43 // The method is declared in node.hpp.
  44 const Node* Node::is_loop_iv() const {
  45   if (this->is_Phi() && !this->as_Phi()->is_copy() &&
  46       this->as_Phi()->region()->is_CountedLoop() &&
  47       this->as_Phi()->region()->as_CountedLoop()->phi() == this) {
  48     return this;
  49   } else {
  50     return NULL;
  51   }
  52 }
  53 
  54 //=============================================================================
  55 //------------------------------dump_spec--------------------------------------
  56 // Dump special per-node info
  57 #ifndef PRODUCT
  58 void LoopNode::dump_spec(outputStream *st) const {
  59   if (is_inner_loop()) st->print( "inner " );
  60   if (is_partial_peel_loop()) st->print( "partial_peel " );
  61   if (partial_peel_has_failed()) st->print( "partial_peel_failed " );
  62 }
  63 #endif
  64 
  65 //------------------------------is_valid_counted_loop-------------------------
  66 bool LoopNode::is_valid_counted_loop() const {
  67   if (is_CountedLoop()) {
  68     CountedLoopNode*    l  = as_CountedLoop();
  69     CountedLoopEndNode* le = l->loopexit();
  70     if (le != NULL &&
  71         le->proj_out(1 /* true */) == l->in(LoopNode::LoopBackControl)) {
  72       Node* phi  = l->phi();
  73       Node* exit = le->proj_out(0 /* false */);
  74       if (exit != NULL && exit->Opcode() == Op_IfFalse &&
  75           phi != NULL && phi->is_Phi() &&
  76           phi->in(LoopNode::LoopBackControl) == l->incr() &&
  77           le->loopnode() == l && le->stride_is_con()) {
  78         return true;
  79       }
  80     }
  81   }
  82   return false;
  83 }
  84 
  85 //------------------------------get_early_ctrl---------------------------------
  86 // Compute earliest legal control
  87 Node *PhaseIdealLoop::get_early_ctrl( Node *n ) {
  88   assert( !n->is_Phi() && !n->is_CFG(), "this code only handles data nodes" );
  89   uint i;
  90   Node *early;
  91   if (n->in(0) && !n->is_expensive()) {
  92     early = n->in(0);
  93     if (!early->is_CFG()) // Might be a non-CFG multi-def
  94       early = get_ctrl(early);        // So treat input as a straight data input
  95     i = 1;
  96   } else {
  97     early = get_ctrl(n->in(1));
  98     i = 2;
  99   }
 100   uint e_d = dom_depth(early);
 101   assert( early, "" );
 102   for (; i < n->req(); i++) {
 103     Node *cin = get_ctrl(n->in(i));
 104     assert( cin, "" );
 105     // Keep deepest dominator depth
 106     uint c_d = dom_depth(cin);
 107     if (c_d > e_d) {           // Deeper guy?
 108       early = cin;              // Keep deepest found so far
 109       e_d = c_d;
 110     } else if (c_d == e_d &&    // Same depth?
 111                early != cin) { // If not equal, must use slower algorithm
 112       // If same depth but not equal, one _must_ dominate the other
 113       // and we want the deeper (i.e., dominated) guy.
 114       Node *n1 = early;
 115       Node *n2 = cin;
 116       while (1) {
 117         n1 = idom(n1);          // Walk up until break cycle
 118         n2 = idom(n2);
 119         if (n1 == cin ||        // Walked early up to cin
 120             dom_depth(n2) < c_d)
 121           break;                // early is deeper; keep him
 122         if (n2 == early ||      // Walked cin up to early
 123             dom_depth(n1) < c_d) {
 124           early = cin;          // cin is deeper; keep him
 125           break;
 126         }
 127       }
 128       e_d = dom_depth(early);   // Reset depth register cache
 129     }
 130   }
 131 
 132   // Return earliest legal location
 133   assert(early == find_non_split_ctrl(early), "unexpected early control");
 134 
 135   if (n->is_expensive()) {
 136     assert(n->in(0), "should have control input");
 137     early = get_early_ctrl_for_expensive(n, early);
 138   }
 139 
 140   return early;
 141 }
 142 
 143 //------------------------------get_early_ctrl_for_expensive---------------------------------
 144 // Move node up the dominator tree as high as legal while still beneficial
 145 Node *PhaseIdealLoop::get_early_ctrl_for_expensive(Node *n, Node* earliest) {
 146   assert(n->in(0) && n->is_expensive(), "expensive node with control input here");
 147   assert(OptimizeExpensiveOps, "optimization off?");
 148 
 149   Node* ctl = n->in(0);
 150   assert(ctl->is_CFG(), "expensive input 0 must be cfg");
 151   uint min_dom_depth = dom_depth(earliest);
 152 #ifdef ASSERT
 153   if (!is_dominator(ctl, earliest) && !is_dominator(earliest, ctl)) {
 154     dump_bad_graph("Bad graph detected in get_early_ctrl_for_expensive", n, earliest, ctl);
 155     assert(false, "Bad graph detected in get_early_ctrl_for_expensive");
 156   }
 157 #endif
 158   if (dom_depth(ctl) < min_dom_depth) {
 159     return earliest;
 160   }
 161 
 162   while (1) {
 163     Node *next = ctl;
 164     // Moving the node out of a loop on the projection of a If
 165     // confuses loop predication. So once we hit a Loop in a If branch
 166     // that doesn't branch to an UNC, we stop. The code that process
 167     // expensive nodes will notice the loop and skip over it to try to
 168     // move the node further up.
 169     if (ctl->is_CountedLoop() && ctl->in(1) != NULL && ctl->in(1)->in(0) != NULL && ctl->in(1)->in(0)->is_If()) {
 170       if (!ctl->in(1)->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none)) {
 171         break;
 172       }
 173       next = idom(ctl->in(1)->in(0));
 174     } else if (ctl->is_Proj()) {
 175       // We only move it up along a projection if the projection is
 176       // the single control projection for its parent: same code path,
 177       // if it's a If with UNC or fallthrough of a call.
 178       Node* parent_ctl = ctl->in(0);
 179       if (parent_ctl == NULL) {
 180         break;
 181       } else if (parent_ctl->is_CountedLoopEnd() && parent_ctl->as_CountedLoopEnd()->loopnode() != NULL) {
 182         next = parent_ctl->as_CountedLoopEnd()->loopnode()->init_control();
 183       } else if (parent_ctl->is_If()) {
 184         if (!ctl->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none)) {
 185           break;
 186         }
 187         assert(idom(ctl) == parent_ctl, "strange");
 188         next = idom(parent_ctl);
 189       } else if (ctl->is_CatchProj()) {
 190         if (ctl->as_Proj()->_con != CatchProjNode::fall_through_index) {
 191           break;
 192         }
 193         assert(parent_ctl->in(0)->in(0)->is_Call(), "strange graph");
 194         next = parent_ctl->in(0)->in(0)->in(0);
 195       } else {
 196         // Check if parent control has a single projection (this
 197         // control is the only possible successor of the parent
 198         // control). If so, we can try to move the node above the
 199         // parent control.
 200         int nb_ctl_proj = 0;
 201         for (DUIterator_Fast imax, i = parent_ctl->fast_outs(imax); i < imax; i++) {
 202           Node *p = parent_ctl->fast_out(i);
 203           if (p->is_Proj() && p->is_CFG()) {
 204             nb_ctl_proj++;
 205             if (nb_ctl_proj > 1) {
 206               break;
 207             }
 208           }
 209         }
 210 
 211         if (nb_ctl_proj > 1) {
 212           break;
 213         }
 214         assert(parent_ctl->is_Start() || parent_ctl->is_MemBar() || parent_ctl->is_Call(), "unexpected node");
 215         assert(idom(ctl) == parent_ctl, "strange");
 216         next = idom(parent_ctl);
 217       }
 218     } else {
 219       next = idom(ctl);
 220     }
 221     if (next->is_Root() || next->is_Start() || dom_depth(next) < min_dom_depth) {
 222       break;
 223     }
 224     ctl = next;
 225   }
 226 
 227   if (ctl != n->in(0)) {
 228     _igvn.hash_delete(n);
 229     n->set_req(0, ctl);
 230     _igvn.hash_insert(n);
 231   }
 232 
 233   return ctl;
 234 }
 235 
 236 
 237 //------------------------------set_early_ctrl---------------------------------
 238 // Set earliest legal control
 239 void PhaseIdealLoop::set_early_ctrl( Node *n ) {
 240   Node *early = get_early_ctrl(n);
 241 
 242   // Record earliest legal location
 243   set_ctrl(n, early);
 244 }
 245 
 246 //------------------------------set_subtree_ctrl-------------------------------
 247 // set missing _ctrl entries on new nodes
 248 void PhaseIdealLoop::set_subtree_ctrl( Node *n ) {
 249   // Already set?  Get out.
 250   if( _nodes[n->_idx] ) return;
 251   // Recursively set _nodes array to indicate where the Node goes
 252   uint i;
 253   for( i = 0; i < n->req(); ++i ) {
 254     Node *m = n->in(i);
 255     if( m && m != C->root() )
 256       set_subtree_ctrl( m );
 257   }
 258 
 259   // Fixup self
 260   set_early_ctrl( n );
 261 }
 262 
 263 //------------------------------is_counted_loop--------------------------------
 264 bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
 265   PhaseGVN *gvn = &_igvn;
 266 
 267   // Counted loop head must be a good RegionNode with only 3 not NULL
 268   // control input edges: Self, Entry, LoopBack.
 269   if (x->in(LoopNode::Self) == NULL || x->req() != 3 || loop->_irreducible) {
 270     return false;
 271   }
 272   Node *init_control = x->in(LoopNode::EntryControl);
 273   Node *back_control = x->in(LoopNode::LoopBackControl);
 274   if (init_control == NULL || back_control == NULL)    // Partially dead
 275     return false;
 276   // Must also check for TOP when looking for a dead loop
 277   if (init_control->is_top() || back_control->is_top())
 278     return false;
 279 
 280   // Allow funny placement of Safepoint
 281   if (back_control->Opcode() == Op_SafePoint)
 282     back_control = back_control->in(TypeFunc::Control);
 283 
 284   // Controlling test for loop
 285   Node *iftrue = back_control;
 286   uint iftrue_op = iftrue->Opcode();
 287   if (iftrue_op != Op_IfTrue &&
 288       iftrue_op != Op_IfFalse)
 289     // I have a weird back-control.  Probably the loop-exit test is in
 290     // the middle of the loop and I am looking at some trailing control-flow
 291     // merge point.  To fix this I would have to partially peel the loop.
 292     return false; // Obscure back-control
 293 
 294   // Get boolean guarding loop-back test
 295   Node *iff = iftrue->in(0);
 296   if (get_loop(iff) != loop || !iff->in(1)->is_Bool())
 297     return false;
 298   BoolNode *test = iff->in(1)->as_Bool();
 299   BoolTest::mask bt = test->_test._test;
 300   float cl_prob = iff->as_If()->_prob;
 301   if (iftrue_op == Op_IfFalse) {
 302     bt = BoolTest(bt).negate();
 303     cl_prob = 1.0 - cl_prob;
 304   }
 305   // Get backedge compare
 306   Node *cmp = test->in(1);
 307   int cmp_op = cmp->Opcode();
 308   if (cmp_op != Op_CmpI)
 309     return false;                // Avoid pointer & float compares
 310 
 311   // Find the trip-counter increment & limit.  Limit must be loop invariant.
 312   Node *incr  = cmp->in(1);
 313   Node *limit = cmp->in(2);
 314 
 315   // ---------
 316   // need 'loop()' test to tell if limit is loop invariant
 317   // ---------
 318 
 319   if (!is_member(loop, get_ctrl(incr))) { // Swapped trip counter and limit?
 320     Node *tmp = incr;            // Then reverse order into the CmpI
 321     incr = limit;
 322     limit = tmp;
 323     bt = BoolTest(bt).commute(); // And commute the exit test
 324   }
 325   if (is_member(loop, get_ctrl(limit))) // Limit must be loop-invariant
 326     return false;
 327   if (!is_member(loop, get_ctrl(incr))) // Trip counter must be loop-variant
 328     return false;
 329 
 330   Node* phi_incr = NULL;
 331   // Trip-counter increment must be commutative & associative.
 332   if (incr->is_Phi()) {
 333     if (incr->as_Phi()->region() != x || incr->req() != 3)
 334       return false; // Not simple trip counter expression
 335     phi_incr = incr;
 336     incr = phi_incr->in(LoopNode::LoopBackControl); // Assume incr is on backedge of Phi
 337     if (!is_member(loop, get_ctrl(incr))) // Trip counter must be loop-variant
 338       return false;
 339   }
 340 
 341   Node* trunc1 = NULL;
 342   Node* trunc2 = NULL;
 343   const TypeInt* iv_trunc_t = NULL;
 344   if (!(incr = CountedLoopNode::match_incr_with_optional_truncation(incr, &trunc1, &trunc2, &iv_trunc_t))) {
 345     return false; // Funny increment opcode
 346   }
 347   assert(incr->Opcode() == Op_AddI, "wrong increment code");
 348 
 349   // Get merge point
 350   Node *xphi = incr->in(1);
 351   Node *stride = incr->in(2);
 352   if (!stride->is_Con()) {     // Oops, swap these
 353     if (!xphi->is_Con())       // Is the other guy a constant?
 354       return false;             // Nope, unknown stride, bail out
 355     Node *tmp = xphi;           // 'incr' is commutative, so ok to swap
 356     xphi = stride;
 357     stride = tmp;
 358   }
 359   // Stride must be constant
 360   int stride_con = stride->get_int();
 361   if (stride_con == 0)
 362     return false; // missed some peephole opt
 363 
 364   if (!xphi->is_Phi())
 365     return false; // Too much math on the trip counter
 366   if (phi_incr != NULL && phi_incr != xphi)
 367     return false;
 368   PhiNode *phi = xphi->as_Phi();
 369 
 370   // Phi must be of loop header; backedge must wrap to increment
 371   if (phi->region() != x)
 372     return false;
 373   if (trunc1 == NULL && phi->in(LoopNode::LoopBackControl) != incr ||
 374       trunc1 != NULL && phi->in(LoopNode::LoopBackControl) != trunc1) {
 375     return false;
 376   }
 377   Node *init_trip = phi->in(LoopNode::EntryControl);
 378 
 379   // If iv trunc type is smaller than int, check for possible wrap.
 380   if (!TypeInt::INT->higher_equal(iv_trunc_t)) {
 381     assert(trunc1 != NULL, "must have found some truncation");
 382 
 383     // Get a better type for the phi (filtered thru if's)
 384     const TypeInt* phi_ft = filtered_type(phi);
 385 
 386     // Can iv take on a value that will wrap?
 387     //
 388     // Ensure iv's limit is not within "stride" of the wrap value.
 389     //
 390     // Example for "short" type
 391     //    Truncation ensures value is in the range -32768..32767 (iv_trunc_t)
 392     //    If the stride is +10, then the last value of the induction
 393     //    variable before the increment (phi_ft->_hi) must be
 394     //    <= 32767 - 10 and (phi_ft->_lo) must be >= -32768 to
 395     //    ensure no truncation occurs after the increment.
 396 
 397     if (stride_con > 0) {
 398       if (iv_trunc_t->_hi - phi_ft->_hi < stride_con ||
 399           iv_trunc_t->_lo > phi_ft->_lo) {
 400         return false;  // truncation may occur
 401       }
 402     } else if (stride_con < 0) {
 403       if (iv_trunc_t->_lo - phi_ft->_lo > stride_con ||
 404           iv_trunc_t->_hi < phi_ft->_hi) {
 405         return false;  // truncation may occur
 406       }
 407     }
 408     // No possibility of wrap so truncation can be discarded
 409     // Promote iv type to Int
 410   } else {
 411     assert(trunc1 == NULL && trunc2 == NULL, "no truncation for int");
 412   }
 413 
 414   // If the condition is inverted and we will be rolling
 415   // through MININT to MAXINT, then bail out.
 416   if (bt == BoolTest::eq || // Bail out, but this loop trips at most twice!
 417       // Odd stride
 418       bt == BoolTest::ne && stride_con != 1 && stride_con != -1 ||
 419       // Count down loop rolls through MAXINT
 420       (bt == BoolTest::le || bt == BoolTest::lt) && stride_con < 0 ||
 421       // Count up loop rolls through MININT
 422       (bt == BoolTest::ge || bt == BoolTest::gt) && stride_con > 0) {
 423     return false; // Bail out
 424   }
 425 
 426   const TypeInt* init_t = gvn->type(init_trip)->is_int();
 427   const TypeInt* limit_t = gvn->type(limit)->is_int();
 428 
 429   if (stride_con > 0) {
 430     jlong init_p = (jlong)init_t->_lo + stride_con;
 431     if (init_p > (jlong)max_jint || init_p > (jlong)limit_t->_hi)
 432       return false; // cyclic loop or this loop trips only once
 433   } else {
 434     jlong init_p = (jlong)init_t->_hi + stride_con;
 435     if (init_p < (jlong)min_jint || init_p < (jlong)limit_t->_lo)
 436       return false; // cyclic loop or this loop trips only once
 437   }
 438 
 439   if (phi_incr != NULL) {
 440     // check if there is a possiblity of IV overflowing after the first increment
 441     if (stride_con > 0) {
 442       if (init_t->_hi > max_jint - stride_con) {
 443         return false;
 444       }
 445     } else {
 446       if (init_t->_lo < min_jint - stride_con) {
 447         return false;
 448       }
 449     }
 450   }
 451 
 452   // =================================================
 453   // ---- SUCCESS!   Found A Trip-Counted Loop!  -----
 454   //
 455   assert(x->Opcode() == Op_Loop, "regular loops only");
 456   C->print_method(PHASE_BEFORE_CLOOPS, 3);
 457 
 458   Node *hook = new (C) Node(6);
 459 
 460   if (LoopLimitCheck) {
 461 
 462   // ===================================================
 463   // Generate loop limit check to avoid integer overflow
 464   // in cases like next (cyclic loops):
 465   //
 466   // for (i=0; i <= max_jint; i++) {}
 467   // for (i=0; i <  max_jint; i+=2) {}
 468   //
 469   //
 470   // Limit check predicate depends on the loop test:
 471   //
 472   // for(;i != limit; i++)       --> limit <= (max_jint)
 473   // for(;i <  limit; i+=stride) --> limit <= (max_jint - stride + 1)
 474   // for(;i <= limit; i+=stride) --> limit <= (max_jint - stride    )
 475   //
 476 
 477   // Check if limit is excluded to do more precise int overflow check.
 478   bool incl_limit = (bt == BoolTest::le || bt == BoolTest::ge);
 479   int stride_m  = stride_con - (incl_limit ? 0 : (stride_con > 0 ? 1 : -1));
 480 
 481   // If compare points directly to the phi we need to adjust
 482   // the compare so that it points to the incr. Limit have
 483   // to be adjusted to keep trip count the same and the
 484   // adjusted limit should be checked for int overflow.
 485   if (phi_incr != NULL) {
 486     stride_m  += stride_con;
 487   }
 488 
 489   if (limit->is_Con()) {
 490     int limit_con = limit->get_int();
 491     if ((stride_con > 0 && limit_con > (max_jint - stride_m)) ||
 492         (stride_con < 0 && limit_con < (min_jint - stride_m))) {
 493       // Bailout: it could be integer overflow.
 494       return false;
 495     }
 496   } else if ((stride_con > 0 && limit_t->_hi <= (max_jint - stride_m)) ||
 497              (stride_con < 0 && limit_t->_lo >= (min_jint - stride_m))) {
 498       // Limit's type may satisfy the condition, for example,
 499       // when it is an array length.
 500   } else {
 501     // Generate loop's limit check.
 502     // Loop limit check predicate should be near the loop.
 503     ProjNode *limit_check_proj = find_predicate_insertion_point(init_control, Deoptimization::Reason_loop_limit_check);
 504     if (!limit_check_proj) {
 505       // The limit check predicate is not generated if this method trapped here before.
 506 #ifdef ASSERT
 507       if (TraceLoopLimitCheck) {
 508         tty->print("missing loop limit check:");
 509         loop->dump_head();
 510         x->dump(1);
 511       }
 512 #endif
 513       return false;
 514     }
 515 
 516     IfNode* check_iff = limit_check_proj->in(0)->as_If();
 517     Node* cmp_limit;
 518     Node* bol;
 519 
 520     if (stride_con > 0) {
 521       cmp_limit = new (C) CmpINode(limit, _igvn.intcon(max_jint - stride_m));
 522       bol = new (C) BoolNode(cmp_limit, BoolTest::le);
 523     } else {
 524       cmp_limit = new (C) CmpINode(limit, _igvn.intcon(min_jint - stride_m));
 525       bol = new (C) BoolNode(cmp_limit, BoolTest::ge);
 526     }
 527     cmp_limit = _igvn.register_new_node_with_optimizer(cmp_limit);
 528     bol = _igvn.register_new_node_with_optimizer(bol);
 529     set_subtree_ctrl(bol);
 530 
 531     // Replace condition in original predicate but preserve Opaque node
 532     // so that previous predicates could be found.
 533     assert(check_iff->in(1)->Opcode() == Op_Conv2B &&
 534            check_iff->in(1)->in(1)->Opcode() == Op_Opaque1, "");
 535     Node* opq = check_iff->in(1)->in(1);
 536     _igvn.hash_delete(opq);
 537     opq->set_req(1, bol);
 538     // Update ctrl.
 539     set_ctrl(opq, check_iff->in(0));
 540     set_ctrl(check_iff->in(1), check_iff->in(0));
 541 
 542 #ifndef PRODUCT
 543     // report that the loop predication has been actually performed
 544     // for this loop
 545     if (TraceLoopLimitCheck) {
 546       tty->print_cr("Counted Loop Limit Check generated:");
 547       debug_only( bol->dump(2); )
 548     }
 549 #endif
 550   }
 551 
 552   if (phi_incr != NULL) {
 553     // If compare points directly to the phi we need to adjust
 554     // the compare so that it points to the incr. Limit have
 555     // to be adjusted to keep trip count the same and we
 556     // should avoid int overflow.
 557     //
 558     //   i = init; do {} while(i++ < limit);
 559     // is converted to
 560     //   i = init; do {} while(++i < limit+1);
 561     //
 562     limit = gvn->transform(new (C) AddINode(limit, stride));
 563   }
 564 
 565   // Now we need to canonicalize loop condition.
 566   if (bt == BoolTest::ne) {
 567     assert(stride_con == 1 || stride_con == -1, "simple increment only");
 568     // 'ne' can be replaced with 'lt' only when init < limit.
 569     if (stride_con > 0 && init_t->_hi < limit_t->_lo)
 570       bt = BoolTest::lt;
 571     // 'ne' can be replaced with 'gt' only when init > limit.
 572     if (stride_con < 0 && init_t->_lo > limit_t->_hi)
 573       bt = BoolTest::gt;
 574   }
 575 
 576   if (incl_limit) {
 577     // The limit check guaranties that 'limit <= (max_jint - stride)' so
 578     // we can convert 'i <= limit' to 'i < limit+1' since stride != 0.
 579     //
 580     Node* one = (stride_con > 0) ? gvn->intcon( 1) : gvn->intcon(-1);
 581     limit = gvn->transform(new (C) AddINode(limit, one));
 582     if (bt == BoolTest::le)
 583       bt = BoolTest::lt;
 584     else if (bt == BoolTest::ge)
 585       bt = BoolTest::gt;
 586     else
 587       ShouldNotReachHere();
 588   }
 589   set_subtree_ctrl( limit );
 590 
 591   } else { // LoopLimitCheck
 592 
 593   // If compare points to incr, we are ok.  Otherwise the compare
 594   // can directly point to the phi; in this case adjust the compare so that
 595   // it points to the incr by adjusting the limit.
 596   if (cmp->in(1) == phi || cmp->in(2) == phi)
 597     limit = gvn->transform(new (C) AddINode(limit,stride));
 598 
 599   // trip-count for +-tive stride should be: (limit - init_trip + stride - 1)/stride.
 600   // Final value for iterator should be: trip_count * stride + init_trip.
 601   Node *one_p = gvn->intcon( 1);
 602   Node *one_m = gvn->intcon(-1);
 603 
 604   Node *trip_count = NULL;
 605   switch( bt ) {
 606   case BoolTest::eq:
 607     ShouldNotReachHere();
 608   case BoolTest::ne:            // Ahh, the case we desire
 609     if (stride_con == 1)
 610       trip_count = gvn->transform(new (C) SubINode(limit,init_trip));
 611     else if (stride_con == -1)
 612       trip_count = gvn->transform(new (C) SubINode(init_trip,limit));
 613     else
 614       ShouldNotReachHere();
 615     set_subtree_ctrl(trip_count);
 616     //_loop.map(trip_count->_idx,loop(limit));
 617     break;
 618   case BoolTest::le:            // Maybe convert to '<' case
 619     limit = gvn->transform(new (C) AddINode(limit,one_p));
 620     set_subtree_ctrl( limit );
 621     hook->init_req(4, limit);
 622 
 623     bt = BoolTest::lt;
 624     // Make the new limit be in the same loop nest as the old limit
 625     //_loop.map(limit->_idx,limit_loop);
 626     // Fall into next case
 627   case BoolTest::lt: {          // Maybe convert to '!=' case
 628     if (stride_con < 0) // Count down loop rolls through MAXINT
 629       ShouldNotReachHere();
 630     Node *range = gvn->transform(new (C) SubINode(limit,init_trip));
 631     set_subtree_ctrl( range );
 632     hook->init_req(0, range);
 633 
 634     Node *bias  = gvn->transform(new (C) AddINode(range,stride));
 635     set_subtree_ctrl( bias );
 636     hook->init_req(1, bias);
 637 
 638     Node *bias1 = gvn->transform(new (C) AddINode(bias,one_m));
 639     set_subtree_ctrl( bias1 );
 640     hook->init_req(2, bias1);
 641 
 642     trip_count  = gvn->transform(new (C) DivINode(0,bias1,stride));
 643     set_subtree_ctrl( trip_count );
 644     hook->init_req(3, trip_count);
 645     break;
 646   }
 647 
 648   case BoolTest::ge:            // Maybe convert to '>' case
 649     limit = gvn->transform(new (C) AddINode(limit,one_m));
 650     set_subtree_ctrl( limit );
 651     hook->init_req(4 ,limit);
 652 
 653     bt = BoolTest::gt;
 654     // Make the new limit be in the same loop nest as the old limit
 655     //_loop.map(limit->_idx,limit_loop);
 656     // Fall into next case
 657   case BoolTest::gt: {          // Maybe convert to '!=' case
 658     if (stride_con > 0) // count up loop rolls through MININT
 659       ShouldNotReachHere();
 660     Node *range = gvn->transform(new (C) SubINode(limit,init_trip));
 661     set_subtree_ctrl( range );
 662     hook->init_req(0, range);
 663 
 664     Node *bias  = gvn->transform(new (C) AddINode(range,stride));
 665     set_subtree_ctrl( bias );
 666     hook->init_req(1, bias);
 667 
 668     Node *bias1 = gvn->transform(new (C) AddINode(bias,one_p));
 669     set_subtree_ctrl( bias1 );
 670     hook->init_req(2, bias1);
 671 
 672     trip_count  = gvn->transform(new (C) DivINode(0,bias1,stride));
 673     set_subtree_ctrl( trip_count );
 674     hook->init_req(3, trip_count);
 675     break;
 676   }
 677   } // switch( bt )
 678 
 679   Node *span = gvn->transform(new (C) MulINode(trip_count,stride));
 680   set_subtree_ctrl( span );
 681   hook->init_req(5, span);
 682 
 683   limit = gvn->transform(new (C) AddINode(span,init_trip));
 684   set_subtree_ctrl( limit );
 685 
 686   } // LoopLimitCheck
 687 
 688   // Check for SafePoint on backedge and remove
 689   Node *sfpt = x->in(LoopNode::LoopBackControl);
 690   if (sfpt->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt)) {
 691     lazy_replace( sfpt, iftrue );
 692     if (loop->_safepts != NULL) {
 693       loop->_safepts->yank(sfpt);
 694     }
 695     loop->_tail = iftrue;
 696   }
 697 
 698   // Build a canonical trip test.
 699   // Clone code, as old values may be in use.
 700   incr = incr->clone();
 701   incr->set_req(1,phi);
 702   incr->set_req(2,stride);
 703   incr = _igvn.register_new_node_with_optimizer(incr);
 704   set_early_ctrl( incr );
 705   _igvn.hash_delete(phi);
 706   phi->set_req_X( LoopNode::LoopBackControl, incr, &_igvn );
 707 
 708   // If phi type is more restrictive than Int, raise to
 709   // Int to prevent (almost) infinite recursion in igvn
 710   // which can only handle integer types for constants or minint..maxint.
 711   if (!TypeInt::INT->higher_equal(phi->bottom_type())) {
 712     Node* nphi = PhiNode::make(phi->in(0), phi->in(LoopNode::EntryControl), TypeInt::INT);
 713     nphi->set_req(LoopNode::LoopBackControl, phi->in(LoopNode::LoopBackControl));
 714     nphi = _igvn.register_new_node_with_optimizer(nphi);
 715     set_ctrl(nphi, get_ctrl(phi));
 716     _igvn.replace_node(phi, nphi);
 717     phi = nphi->as_Phi();
 718   }
 719   cmp = cmp->clone();
 720   cmp->set_req(1,incr);
 721   cmp->set_req(2,limit);
 722   cmp = _igvn.register_new_node_with_optimizer(cmp);
 723   set_ctrl(cmp, iff->in(0));
 724 
 725   test = test->clone()->as_Bool();
 726   (*(BoolTest*)&test->_test)._test = bt;
 727   test->set_req(1,cmp);
 728   _igvn.register_new_node_with_optimizer(test);
 729   set_ctrl(test, iff->in(0));
 730 
 731   // Replace the old IfNode with a new LoopEndNode
 732   Node *lex = _igvn.register_new_node_with_optimizer(new (C) CountedLoopEndNode( iff->in(0), test, cl_prob, iff->as_If()->_fcnt ));
 733   IfNode *le = lex->as_If();
 734   uint dd = dom_depth(iff);
 735   set_idom(le, le->in(0), dd); // Update dominance for loop exit
 736   set_loop(le, loop);
 737 
 738   // Get the loop-exit control
 739   Node *iffalse = iff->as_If()->proj_out(!(iftrue_op == Op_IfTrue));
 740 
 741   // Need to swap loop-exit and loop-back control?
 742   if (iftrue_op == Op_IfFalse) {
 743     Node *ift2=_igvn.register_new_node_with_optimizer(new (C) IfTrueNode (le));
 744     Node *iff2=_igvn.register_new_node_with_optimizer(new (C) IfFalseNode(le));
 745 
 746     loop->_tail = back_control = ift2;
 747     set_loop(ift2, loop);
 748     set_loop(iff2, get_loop(iffalse));
 749 
 750     // Lazy update of 'get_ctrl' mechanism.
 751     lazy_replace_proj( iffalse, iff2 );
 752     lazy_replace_proj( iftrue,  ift2 );
 753 
 754     // Swap names
 755     iffalse = iff2;
 756     iftrue  = ift2;
 757   } else {
 758     _igvn.hash_delete(iffalse);
 759     _igvn.hash_delete(iftrue);
 760     iffalse->set_req_X( 0, le, &_igvn );
 761     iftrue ->set_req_X( 0, le, &_igvn );
 762   }
 763 
 764   set_idom(iftrue,  le, dd+1);
 765   set_idom(iffalse, le, dd+1);
 766   assert(iff->outcnt() == 0, "should be dead now");
 767   lazy_replace( iff, le ); // fix 'get_ctrl'
 768 
 769   // Now setup a new CountedLoopNode to replace the existing LoopNode
 770   CountedLoopNode *l = new (C) CountedLoopNode(init_control, back_control);
 771   l->set_unswitch_count(x->as_Loop()->unswitch_count()); // Preserve
 772   // The following assert is approximately true, and defines the intention
 773   // of can_be_counted_loop.  It fails, however, because phase->type
 774   // is not yet initialized for this loop and its parts.
 775   //assert(l->can_be_counted_loop(this), "sanity");
 776   _igvn.register_new_node_with_optimizer(l);
 777   set_loop(l, loop);
 778   loop->_head = l;
 779   // Fix all data nodes placed at the old loop head.
 780   // Uses the lazy-update mechanism of 'get_ctrl'.
 781   lazy_replace( x, l );
 782   set_idom(l, init_control, dom_depth(x));
 783 
 784   // Check for immediately preceding SafePoint and remove
 785   Node *sfpt2 = le->in(0);
 786   if (sfpt2->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt2)) {
 787     lazy_replace( sfpt2, sfpt2->in(TypeFunc::Control));
 788     if (loop->_safepts != NULL) {
 789       loop->_safepts->yank(sfpt2);
 790     }
 791   }
 792 
 793   // Free up intermediate goo
 794   _igvn.remove_dead_node(hook);
 795 
 796 #ifdef ASSERT
 797   assert(l->is_valid_counted_loop(), "counted loop shape is messed up");
 798   assert(l == loop->_head && l->phi() == phi && l->loopexit() == lex, "" );
 799 #endif
 800 #ifndef PRODUCT
 801   if (TraceLoopOpts) {
 802     tty->print("Counted      ");
 803     loop->dump_head();
 804   }
 805 #endif
 806 
 807   C->print_method(PHASE_AFTER_CLOOPS, 3);
 808 
 809   return true;
 810 }
 811 
 812 //----------------------exact_limit-------------------------------------------
 813 Node* PhaseIdealLoop::exact_limit( IdealLoopTree *loop ) {
 814   assert(loop->_head->is_CountedLoop(), "");
 815   CountedLoopNode *cl = loop->_head->as_CountedLoop();
 816   assert(cl->is_valid_counted_loop(), "");
 817 
 818   if (!LoopLimitCheck || ABS(cl->stride_con()) == 1 ||
 819       cl->limit()->Opcode() == Op_LoopLimit) {
 820     // Old code has exact limit (it could be incorrect in case of int overflow).
 821     // Loop limit is exact with stride == 1. And loop may already have exact limit.
 822     return cl->limit();
 823   }
 824   Node *limit = NULL;
 825 #ifdef ASSERT
 826   BoolTest::mask bt = cl->loopexit()->test_trip();
 827   assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected");
 828 #endif
 829   if (cl->has_exact_trip_count()) {
 830     // Simple case: loop has constant boundaries.
 831     // Use jlongs to avoid integer overflow.
 832     int stride_con = cl->stride_con();
 833     jlong  init_con = cl->init_trip()->get_int();
 834     jlong limit_con = cl->limit()->get_int();
 835     julong trip_cnt = cl->trip_count();
 836     jlong final_con = init_con + trip_cnt*stride_con;
 837     int final_int = (int)final_con;
 838     // The final value should be in integer range since the loop
 839     // is counted and the limit was checked for overflow.
 840     assert(final_con == (jlong)final_int, "final value should be integer");
 841     limit = _igvn.intcon(final_int);
 842   } else {
 843     // Create new LoopLimit node to get exact limit (final iv value).
 844     limit = new (C) LoopLimitNode(C, cl->init_trip(), cl->limit(), cl->stride());
 845     register_new_node(limit, cl->in(LoopNode::EntryControl));
 846   }
 847   assert(limit != NULL, "sanity");
 848   return limit;
 849 }
 850 
 851 //------------------------------Ideal------------------------------------------
 852 // Return a node which is more "ideal" than the current node.
 853 // Attempt to convert into a counted-loop.
 854 Node *LoopNode::Ideal(PhaseGVN *phase, bool can_reshape) {
 855   if (!can_be_counted_loop(phase)) {
 856     phase->C->set_major_progress();
 857   }
 858   return RegionNode::Ideal(phase, can_reshape);
 859 }
 860 
 861 
 862 //=============================================================================
 863 //------------------------------Ideal------------------------------------------
 864 // Return a node which is more "ideal" than the current node.
 865 // Attempt to convert into a counted-loop.
 866 Node *CountedLoopNode::Ideal(PhaseGVN *phase, bool can_reshape) {
 867   return RegionNode::Ideal(phase, can_reshape);
 868 }
 869 
 870 //------------------------------dump_spec--------------------------------------
 871 // Dump special per-node info
 872 #ifndef PRODUCT
 873 void CountedLoopNode::dump_spec(outputStream *st) const {
 874   LoopNode::dump_spec(st);
 875   if (stride_is_con()) {
 876     st->print("stride: %d ",stride_con());
 877   }
 878   if (is_pre_loop ()) st->print("pre of N%d" , _main_idx);
 879   if (is_main_loop()) st->print("main of N%d", _idx);
 880   if (is_post_loop()) st->print("post of N%d", _main_idx);
 881 }
 882 #endif
 883 
 884 //=============================================================================
 885 int CountedLoopEndNode::stride_con() const {
 886   return stride()->bottom_type()->is_int()->get_con();
 887 }
 888 
 889 //=============================================================================
 890 //------------------------------Value-----------------------------------------
 891 const Type *LoopLimitNode::Value( PhaseTransform *phase ) const {
 892   const Type* init_t   = phase->type(in(Init));
 893   const Type* limit_t  = phase->type(in(Limit));
 894   const Type* stride_t = phase->type(in(Stride));
 895   // Either input is TOP ==> the result is TOP
 896   if (init_t   == Type::TOP) return Type::TOP;
 897   if (limit_t  == Type::TOP) return Type::TOP;
 898   if (stride_t == Type::TOP) return Type::TOP;
 899 
 900   int stride_con = stride_t->is_int()->get_con();
 901   if (stride_con == 1)
 902     return NULL;  // Identity
 903 
 904   if (init_t->is_int()->is_con() && limit_t->is_int()->is_con()) {
 905     // Use jlongs to avoid integer overflow.
 906     jlong init_con   =  init_t->is_int()->get_con();
 907     jlong limit_con  = limit_t->is_int()->get_con();
 908     int  stride_m   = stride_con - (stride_con > 0 ? 1 : -1);
 909     jlong trip_count = (limit_con - init_con + stride_m)/stride_con;
 910     jlong final_con  = init_con + stride_con*trip_count;
 911     int final_int = (int)final_con;
 912     // The final value should be in integer range since the loop
 913     // is counted and the limit was checked for overflow.
 914     assert(final_con == (jlong)final_int, "final value should be integer");
 915     return TypeInt::make(final_int);
 916   }
 917 
 918   return bottom_type(); // TypeInt::INT
 919 }
 920 
 921 //------------------------------Ideal------------------------------------------
 922 // Return a node which is more "ideal" than the current node.
 923 Node *LoopLimitNode::Ideal(PhaseGVN *phase, bool can_reshape) {
 924   if (phase->type(in(Init))   == Type::TOP ||
 925       phase->type(in(Limit))  == Type::TOP ||
 926       phase->type(in(Stride)) == Type::TOP)
 927     return NULL;  // Dead
 928 
 929   int stride_con = phase->type(in(Stride))->is_int()->get_con();
 930   if (stride_con == 1)
 931     return NULL;  // Identity
 932 
 933   if (in(Init)->is_Con() && in(Limit)->is_Con())
 934     return NULL;  // Value
 935 
 936   // Delay following optimizations until all loop optimizations
 937   // done to keep Ideal graph simple.
 938   if (!can_reshape || phase->C->major_progress())
 939     return NULL;
 940 
 941   const TypeInt* init_t  = phase->type(in(Init) )->is_int();
 942   const TypeInt* limit_t = phase->type(in(Limit))->is_int();
 943   int stride_p;
 944   jlong lim, ini;
 945   julong max;
 946   if (stride_con > 0) {
 947     stride_p = stride_con;
 948     lim = limit_t->_hi;
 949     ini = init_t->_lo;
 950     max = (julong)max_jint;
 951   } else {
 952     stride_p = -stride_con;
 953     lim = init_t->_hi;
 954     ini = limit_t->_lo;
 955     max = (julong)min_jint;
 956   }
 957   julong range = lim - ini + stride_p;
 958   if (range <= max) {
 959     // Convert to integer expression if it is not overflow.
 960     Node* stride_m = phase->intcon(stride_con - (stride_con > 0 ? 1 : -1));
 961     Node *range = phase->transform(new (phase->C) SubINode(in(Limit), in(Init)));
 962     Node *bias  = phase->transform(new (phase->C) AddINode(range, stride_m));
 963     Node *trip  = phase->transform(new (phase->C) DivINode(0, bias, in(Stride)));
 964     Node *span  = phase->transform(new (phase->C) MulINode(trip, in(Stride)));
 965     return new (phase->C) AddINode(span, in(Init)); // exact limit
 966   }
 967 
 968   if (is_power_of_2(stride_p) ||                // divisor is 2^n
 969       !Matcher::has_match_rule(Op_LoopLimit)) { // or no specialized Mach node?
 970     // Convert to long expression to avoid integer overflow
 971     // and let igvn optimizer convert this division.
 972     //
 973     Node*   init   = phase->transform( new (phase->C) ConvI2LNode(in(Init)));
 974     Node*  limit   = phase->transform( new (phase->C) ConvI2LNode(in(Limit)));
 975     Node* stride   = phase->longcon(stride_con);
 976     Node* stride_m = phase->longcon(stride_con - (stride_con > 0 ? 1 : -1));
 977 
 978     Node *range = phase->transform(new (phase->C) SubLNode(limit, init));
 979     Node *bias  = phase->transform(new (phase->C) AddLNode(range, stride_m));
 980     Node *span;
 981     if (stride_con > 0 && is_power_of_2(stride_p)) {
 982       // bias >= 0 if stride >0, so if stride is 2^n we can use &(-stride)
 983       // and avoid generating rounding for division. Zero trip guard should
 984       // guarantee that init < limit but sometimes the guard is missing and
 985       // we can get situation when init > limit. Note, for the empty loop
 986       // optimization zero trip guard is generated explicitly which leaves
 987       // only RCE predicate where exact limit is used and the predicate
 988       // will simply fail forcing recompilation.
 989       Node* neg_stride   = phase->longcon(-stride_con);
 990       span = phase->transform(new (phase->C) AndLNode(bias, neg_stride));
 991     } else {
 992       Node *trip  = phase->transform(new (phase->C) DivLNode(0, bias, stride));
 993       span = phase->transform(new (phase->C) MulLNode(trip, stride));
 994     }
 995     // Convert back to int
 996     Node *span_int = phase->transform(new (phase->C) ConvL2INode(span));
 997     return new (phase->C) AddINode(span_int, in(Init)); // exact limit
 998   }
 999 
1000   return NULL;    // No progress
1001 }
1002 
1003 //------------------------------Identity---------------------------------------
1004 // If stride == 1 return limit node.
1005 Node *LoopLimitNode::Identity( PhaseTransform *phase ) {
1006   int stride_con = phase->type(in(Stride))->is_int()->get_con();
1007   if (stride_con == 1 || stride_con == -1)
1008     return in(Limit);
1009   return this;
1010 }
1011 
1012 //=============================================================================
1013 //----------------------match_incr_with_optional_truncation--------------------
1014 // Match increment with optional truncation:
1015 // CHAR: (i+1)&0x7fff, BYTE: ((i+1)<<8)>>8, or SHORT: ((i+1)<<16)>>16
1016 // Return NULL for failure. Success returns the increment node.
1017 Node* CountedLoopNode::match_incr_with_optional_truncation(
1018                       Node* expr, Node** trunc1, Node** trunc2, const TypeInt** trunc_type) {
1019   // Quick cutouts:
1020   if (expr == NULL || expr->req() != 3)  return NULL;
1021 
1022   Node *t1 = NULL;
1023   Node *t2 = NULL;
1024   const TypeInt* trunc_t = TypeInt::INT;
1025   Node* n1 = expr;
1026   int   n1op = n1->Opcode();
1027 
1028   // Try to strip (n1 & M) or (n1 << N >> N) from n1.
1029   if (n1op == Op_AndI &&
1030       n1->in(2)->is_Con() &&
1031       n1->in(2)->bottom_type()->is_int()->get_con() == 0x7fff) {
1032     // %%% This check should match any mask of 2**K-1.
1033     t1 = n1;
1034     n1 = t1->in(1);
1035     n1op = n1->Opcode();
1036     trunc_t = TypeInt::CHAR;
1037   } else if (n1op == Op_RShiftI &&
1038              n1->in(1) != NULL &&
1039              n1->in(1)->Opcode() == Op_LShiftI &&
1040              n1->in(2) == n1->in(1)->in(2) &&
1041              n1->in(2)->is_Con()) {
1042     jint shift = n1->in(2)->bottom_type()->is_int()->get_con();
1043     // %%% This check should match any shift in [1..31].
1044     if (shift == 16 || shift == 8) {
1045       t1 = n1;
1046       t2 = t1->in(1);
1047       n1 = t2->in(1);
1048       n1op = n1->Opcode();
1049       if (shift == 16) {
1050         trunc_t = TypeInt::SHORT;
1051       } else if (shift == 8) {
1052         trunc_t = TypeInt::BYTE;
1053       }
1054     }
1055   }
1056 
1057   // If (maybe after stripping) it is an AddI, we won:
1058   if (n1op == Op_AddI) {
1059     *trunc1 = t1;
1060     *trunc2 = t2;
1061     *trunc_type = trunc_t;
1062     return n1;
1063   }
1064 
1065   // failed
1066   return NULL;
1067 }
1068 
1069 
1070 //------------------------------filtered_type--------------------------------
1071 // Return a type based on condition control flow
1072 // A successful return will be a type that is restricted due
1073 // to a series of dominating if-tests, such as:
1074 //    if (i < 10) {
1075 //       if (i > 0) {
1076 //          here: "i" type is [1..10)
1077 //       }
1078 //    }
1079 // or a control flow merge
1080 //    if (i < 10) {
1081 //       do {
1082 //          phi( , ) -- at top of loop type is [min_int..10)
1083 //         i = ?
1084 //       } while ( i < 10)
1085 //
1086 const TypeInt* PhaseIdealLoop::filtered_type( Node *n, Node* n_ctrl) {
1087   assert(n && n->bottom_type()->is_int(), "must be int");
1088   const TypeInt* filtered_t = NULL;
1089   if (!n->is_Phi()) {
1090     assert(n_ctrl != NULL || n_ctrl == C->top(), "valid control");
1091     filtered_t = filtered_type_from_dominators(n, n_ctrl);
1092 
1093   } else {
1094     Node* phi    = n->as_Phi();
1095     Node* region = phi->in(0);
1096     assert(n_ctrl == NULL || n_ctrl == region, "ctrl parameter must be region");
1097     if (region && region != C->top()) {
1098       for (uint i = 1; i < phi->req(); i++) {
1099         Node* val   = phi->in(i);
1100         Node* use_c = region->in(i);
1101         const TypeInt* val_t = filtered_type_from_dominators(val, use_c);
1102         if (val_t != NULL) {
1103           if (filtered_t == NULL) {
1104             filtered_t = val_t;
1105           } else {
1106             filtered_t = filtered_t->meet(val_t)->is_int();
1107           }
1108         }
1109       }
1110     }
1111   }
1112   const TypeInt* n_t = _igvn.type(n)->is_int();
1113   if (filtered_t != NULL) {
1114     n_t = n_t->join(filtered_t)->is_int();
1115   }
1116   return n_t;
1117 }
1118 
1119 
1120 //------------------------------filtered_type_from_dominators--------------------------------
1121 // Return a possibly more restrictive type for val based on condition control flow of dominators
1122 const TypeInt* PhaseIdealLoop::filtered_type_from_dominators( Node* val, Node *use_ctrl) {
1123   if (val->is_Con()) {
1124      return val->bottom_type()->is_int();
1125   }
1126   uint if_limit = 10; // Max number of dominating if's visited
1127   const TypeInt* rtn_t = NULL;
1128 
1129   if (use_ctrl && use_ctrl != C->top()) {
1130     Node* val_ctrl = get_ctrl(val);
1131     uint val_dom_depth = dom_depth(val_ctrl);
1132     Node* pred = use_ctrl;
1133     uint if_cnt = 0;
1134     while (if_cnt < if_limit) {
1135       if ((pred->Opcode() == Op_IfTrue || pred->Opcode() == Op_IfFalse)) {
1136         if_cnt++;
1137         const TypeInt* if_t = IfNode::filtered_int_type(&_igvn, val, pred);
1138         if (if_t != NULL) {
1139           if (rtn_t == NULL) {
1140             rtn_t = if_t;
1141           } else {
1142             rtn_t = rtn_t->join(if_t)->is_int();
1143           }
1144         }
1145       }
1146       pred = idom(pred);
1147       if (pred == NULL || pred == C->top()) {
1148         break;
1149       }
1150       // Stop if going beyond definition block of val
1151       if (dom_depth(pred) < val_dom_depth) {
1152         break;
1153       }
1154     }
1155   }
1156   return rtn_t;
1157 }
1158 
1159 
1160 //------------------------------dump_spec--------------------------------------
1161 // Dump special per-node info
1162 #ifndef PRODUCT
1163 void CountedLoopEndNode::dump_spec(outputStream *st) const {
1164   if( in(TestValue)->is_Bool() ) {
1165     BoolTest bt( test_trip()); // Added this for g++.
1166 
1167     st->print("[");
1168     bt.dump_on(st);
1169     st->print("]");
1170   }
1171   st->print(" ");
1172   IfNode::dump_spec(st);
1173 }
1174 #endif
1175 
1176 //=============================================================================
1177 //------------------------------is_member--------------------------------------
1178 // Is 'l' a member of 'this'?
1179 int IdealLoopTree::is_member( const IdealLoopTree *l ) const {
1180   while( l->_nest > _nest ) l = l->_parent;
1181   return l == this;
1182 }
1183 
1184 //------------------------------set_nest---------------------------------------
1185 // Set loop tree nesting depth.  Accumulate _has_call bits.
1186 int IdealLoopTree::set_nest( uint depth ) {
1187   _nest = depth;
1188   int bits = _has_call;
1189   if( _child ) bits |= _child->set_nest(depth+1);
1190   if( bits ) _has_call = 1;
1191   if( _next  ) bits |= _next ->set_nest(depth  );
1192   return bits;
1193 }
1194 
1195 //------------------------------split_fall_in----------------------------------
1196 // Split out multiple fall-in edges from the loop header.  Move them to a
1197 // private RegionNode before the loop.  This becomes the loop landing pad.
1198 void IdealLoopTree::split_fall_in( PhaseIdealLoop *phase, int fall_in_cnt ) {
1199   PhaseIterGVN &igvn = phase->_igvn;
1200   uint i;
1201 
1202   // Make a new RegionNode to be the landing pad.
1203   Node *landing_pad = new (phase->C) RegionNode( fall_in_cnt+1 );
1204   phase->set_loop(landing_pad,_parent);
1205   // Gather all the fall-in control paths into the landing pad
1206   uint icnt = fall_in_cnt;
1207   uint oreq = _head->req();
1208   for( i = oreq-1; i>0; i-- )
1209     if( !phase->is_member( this, _head->in(i) ) )
1210       landing_pad->set_req(icnt--,_head->in(i));
1211 
1212   // Peel off PhiNode edges as well
1213   for (DUIterator_Fast jmax, j = _head->fast_outs(jmax); j < jmax; j++) {
1214     Node *oj = _head->fast_out(j);
1215     if( oj->is_Phi() ) {
1216       PhiNode* old_phi = oj->as_Phi();
1217       assert( old_phi->region() == _head, "" );
1218       igvn.hash_delete(old_phi);   // Yank from hash before hacking edges
1219       Node *p = PhiNode::make_blank(landing_pad, old_phi);
1220       uint icnt = fall_in_cnt;
1221       for( i = oreq-1; i>0; i-- ) {
1222         if( !phase->is_member( this, _head->in(i) ) ) {
1223           p->init_req(icnt--, old_phi->in(i));
1224           // Go ahead and clean out old edges from old phi
1225           old_phi->del_req(i);
1226         }
1227       }
1228       // Search for CSE's here, because ZKM.jar does a lot of
1229       // loop hackery and we need to be a little incremental
1230       // with the CSE to avoid O(N^2) node blow-up.
1231       Node *p2 = igvn.hash_find_insert(p); // Look for a CSE
1232       if( p2 ) {                // Found CSE
1233         p->destruct();          // Recover useless new node
1234         p = p2;                 // Use old node
1235       } else {
1236         igvn.register_new_node_with_optimizer(p, old_phi);
1237       }
1238       // Make old Phi refer to new Phi.
1239       old_phi->add_req(p);
1240       // Check for the special case of making the old phi useless and
1241       // disappear it.  In JavaGrande I have a case where this useless
1242       // Phi is the loop limit and prevents recognizing a CountedLoop
1243       // which in turn prevents removing an empty loop.
1244       Node *id_old_phi = old_phi->Identity( &igvn );
1245       if( id_old_phi != old_phi ) { // Found a simple identity?
1246         // Note that I cannot call 'replace_node' here, because
1247         // that will yank the edge from old_phi to the Region and
1248         // I'm mid-iteration over the Region's uses.
1249         for (DUIterator_Last imin, i = old_phi->last_outs(imin); i >= imin; ) {
1250           Node* use = old_phi->last_out(i);
1251           igvn.rehash_node_delayed(use);
1252           uint uses_found = 0;
1253           for (uint j = 0; j < use->len(); j++) {
1254             if (use->in(j) == old_phi) {
1255               if (j < use->req()) use->set_req (j, id_old_phi);
1256               else                use->set_prec(j, id_old_phi);
1257               uses_found++;
1258             }
1259           }
1260           i -= uses_found;    // we deleted 1 or more copies of this edge
1261         }
1262       }
1263       igvn._worklist.push(old_phi);
1264     }
1265   }
1266   // Finally clean out the fall-in edges from the RegionNode
1267   for( i = oreq-1; i>0; i-- ) {
1268     if( !phase->is_member( this, _head->in(i) ) ) {
1269       _head->del_req(i);
1270     }
1271   }
1272   // Transform landing pad
1273   igvn.register_new_node_with_optimizer(landing_pad, _head);
1274   // Insert landing pad into the header
1275   _head->add_req(landing_pad);
1276 }
1277 
1278 //------------------------------split_outer_loop-------------------------------
1279 // Split out the outermost loop from this shared header.
1280 void IdealLoopTree::split_outer_loop( PhaseIdealLoop *phase ) {
1281   PhaseIterGVN &igvn = phase->_igvn;
1282 
1283   // Find index of outermost loop; it should also be my tail.
1284   uint outer_idx = 1;
1285   while( _head->in(outer_idx) != _tail ) outer_idx++;
1286 
1287   // Make a LoopNode for the outermost loop.
1288   Node *ctl = _head->in(LoopNode::EntryControl);
1289   Node *outer = new (phase->C) LoopNode( ctl, _head->in(outer_idx) );
1290   outer = igvn.register_new_node_with_optimizer(outer, _head);
1291   phase->set_created_loop_node();
1292 
1293   // Outermost loop falls into '_head' loop
1294   _head->set_req(LoopNode::EntryControl, outer);
1295   _head->del_req(outer_idx);
1296   // Split all the Phis up between '_head' loop and 'outer' loop.
1297   for (DUIterator_Fast jmax, j = _head->fast_outs(jmax); j < jmax; j++) {
1298     Node *out = _head->fast_out(j);
1299     if( out->is_Phi() ) {
1300       PhiNode *old_phi = out->as_Phi();
1301       assert( old_phi->region() == _head, "" );
1302       Node *phi = PhiNode::make_blank(outer, old_phi);
1303       phi->init_req(LoopNode::EntryControl,    old_phi->in(LoopNode::EntryControl));
1304       phi->init_req(LoopNode::LoopBackControl, old_phi->in(outer_idx));
1305       phi = igvn.register_new_node_with_optimizer(phi, old_phi);
1306       // Make old Phi point to new Phi on the fall-in path
1307       igvn.replace_input_of(old_phi, LoopNode::EntryControl, phi);
1308       old_phi->del_req(outer_idx);
1309     }
1310   }
1311 
1312   // Use the new loop head instead of the old shared one
1313   _head = outer;
1314   phase->set_loop(_head, this);
1315 }
1316 
1317 //------------------------------fix_parent-------------------------------------
1318 static void fix_parent( IdealLoopTree *loop, IdealLoopTree *parent ) {
1319   loop->_parent = parent;
1320   if( loop->_child ) fix_parent( loop->_child, loop   );
1321   if( loop->_next  ) fix_parent( loop->_next , parent );
1322 }
1323 
1324 //------------------------------estimate_path_freq-----------------------------
1325 static float estimate_path_freq( Node *n ) {
1326   // Try to extract some path frequency info
1327   IfNode *iff;
1328   for( int i = 0; i < 50; i++ ) { // Skip through a bunch of uncommon tests
1329     uint nop = n->Opcode();
1330     if( nop == Op_SafePoint ) {   // Skip any safepoint
1331       n = n->in(0);
1332       continue;
1333     }
1334     if( nop == Op_CatchProj ) {   // Get count from a prior call
1335       // Assume call does not always throw exceptions: means the call-site
1336       // count is also the frequency of the fall-through path.
1337       assert( n->is_CatchProj(), "" );
1338       if( ((CatchProjNode*)n)->_con != CatchProjNode::fall_through_index )
1339         return 0.0f;            // Assume call exception path is rare
1340       Node *call = n->in(0)->in(0)->in(0);
1341       assert( call->is_Call(), "expect a call here" );
1342       const JVMState *jvms = ((CallNode*)call)->jvms();
1343       ciMethodData* methodData = jvms->method()->method_data();
1344       if (!methodData->is_mature())  return 0.0f; // No call-site data
1345       ciProfileData* data = methodData->bci_to_data(jvms->bci());
1346       if ((data == NULL) || !data->is_CounterData()) {
1347         // no call profile available, try call's control input
1348         n = n->in(0);
1349         continue;
1350       }
1351       return data->as_CounterData()->count()/FreqCountInvocations;
1352     }
1353     // See if there's a gating IF test
1354     Node *n_c = n->in(0);
1355     if( !n_c->is_If() ) break;       // No estimate available
1356     iff = n_c->as_If();
1357     if( iff->_fcnt != COUNT_UNKNOWN )   // Have a valid count?
1358       // Compute how much count comes on this path
1359       return ((nop == Op_IfTrue) ? iff->_prob : 1.0f - iff->_prob) * iff->_fcnt;
1360     // Have no count info.  Skip dull uncommon-trap like branches.
1361     if( (nop == Op_IfTrue  && iff->_prob < PROB_LIKELY_MAG(5)) ||
1362         (nop == Op_IfFalse && iff->_prob > PROB_UNLIKELY_MAG(5)) )
1363       break;
1364     // Skip through never-taken branch; look for a real loop exit.
1365     n = iff->in(0);
1366   }
1367   return 0.0f;                  // No estimate available
1368 }
1369 
1370 //------------------------------merge_many_backedges---------------------------
1371 // Merge all the backedges from the shared header into a private Region.
1372 // Feed that region as the one backedge to this loop.
1373 void IdealLoopTree::merge_many_backedges( PhaseIdealLoop *phase ) {
1374   uint i;
1375 
1376   // Scan for the top 2 hottest backedges
1377   float hotcnt = 0.0f;
1378   float warmcnt = 0.0f;
1379   uint hot_idx = 0;
1380   // Loop starts at 2 because slot 1 is the fall-in path
1381   for( i = 2; i < _head->req(); i++ ) {
1382     float cnt = estimate_path_freq(_head->in(i));
1383     if( cnt > hotcnt ) {       // Grab hottest path
1384       warmcnt = hotcnt;
1385       hotcnt = cnt;
1386       hot_idx = i;
1387     } else if( cnt > warmcnt ) { // And 2nd hottest path
1388       warmcnt = cnt;
1389     }
1390   }
1391 
1392   // See if the hottest backedge is worthy of being an inner loop
1393   // by being much hotter than the next hottest backedge.
1394   if( hotcnt <= 0.0001 ||
1395       hotcnt < 2.0*warmcnt ) hot_idx = 0;// No hot backedge
1396 
1397   // Peel out the backedges into a private merge point; peel
1398   // them all except optionally hot_idx.
1399   PhaseIterGVN &igvn = phase->_igvn;
1400 
1401   Node *hot_tail = NULL;
1402   // Make a Region for the merge point
1403   Node *r = new (phase->C) RegionNode(1);
1404   for( i = 2; i < _head->req(); i++ ) {
1405     if( i != hot_idx )
1406       r->add_req( _head->in(i) );
1407     else hot_tail = _head->in(i);
1408   }
1409   igvn.register_new_node_with_optimizer(r, _head);
1410   // Plug region into end of loop _head, followed by hot_tail
1411   while( _head->req() > 3 ) _head->del_req( _head->req()-1 );
1412   _head->set_req(2, r);
1413   if( hot_idx ) _head->add_req(hot_tail);
1414 
1415   // Split all the Phis up between '_head' loop and the Region 'r'
1416   for (DUIterator_Fast jmax, j = _head->fast_outs(jmax); j < jmax; j++) {
1417     Node *out = _head->fast_out(j);
1418     if( out->is_Phi() ) {
1419       PhiNode* n = out->as_Phi();
1420       igvn.hash_delete(n);      // Delete from hash before hacking edges
1421       Node *hot_phi = NULL;
1422       Node *phi = new (phase->C) PhiNode(r, n->type(), n->adr_type());
1423       // Check all inputs for the ones to peel out
1424       uint j = 1;
1425       for( uint i = 2; i < n->req(); i++ ) {
1426         if( i != hot_idx )
1427           phi->set_req( j++, n->in(i) );
1428         else hot_phi = n->in(i);
1429       }
1430       // Register the phi but do not transform until whole place transforms
1431       igvn.register_new_node_with_optimizer(phi, n);
1432       // Add the merge phi to the old Phi
1433       while( n->req() > 3 ) n->del_req( n->req()-1 );
1434       n->set_req(2, phi);
1435       if( hot_idx ) n->add_req(hot_phi);
1436     }
1437   }
1438 
1439 
1440   // Insert a new IdealLoopTree inserted below me.  Turn it into a clone
1441   // of self loop tree.  Turn self into a loop headed by _head and with
1442   // tail being the new merge point.
1443   IdealLoopTree *ilt = new IdealLoopTree( phase, _head, _tail );
1444   phase->set_loop(_tail,ilt);   // Adjust tail
1445   _tail = r;                    // Self's tail is new merge point
1446   phase->set_loop(r,this);
1447   ilt->_child = _child;         // New guy has my children
1448   _child = ilt;                 // Self has new guy as only child
1449   ilt->_parent = this;          // new guy has self for parent
1450   ilt->_nest = _nest;           // Same nesting depth (for now)
1451 
1452   // Starting with 'ilt', look for child loop trees using the same shared
1453   // header.  Flatten these out; they will no longer be loops in the end.
1454   IdealLoopTree **pilt = &_child;
1455   while( ilt ) {
1456     if( ilt->_head == _head ) {
1457       uint i;
1458       for( i = 2; i < _head->req(); i++ )
1459         if( _head->in(i) == ilt->_tail )
1460           break;                // Still a loop
1461       if( i == _head->req() ) { // No longer a loop
1462         // Flatten ilt.  Hang ilt's "_next" list from the end of
1463         // ilt's '_child' list.  Move the ilt's _child up to replace ilt.
1464         IdealLoopTree **cp = &ilt->_child;
1465         while( *cp ) cp = &(*cp)->_next;   // Find end of child list
1466         *cp = ilt->_next;       // Hang next list at end of child list
1467         *pilt = ilt->_child;    // Move child up to replace ilt
1468         ilt->_head = NULL;      // Flag as a loop UNIONED into parent
1469         ilt = ilt->_child;      // Repeat using new ilt
1470         continue;               // do not advance over ilt->_child
1471       }
1472       assert( ilt->_tail == hot_tail, "expected to only find the hot inner loop here" );
1473       phase->set_loop(_head,ilt);
1474     }
1475     pilt = &ilt->_child;        // Advance to next
1476     ilt = *pilt;
1477   }
1478 
1479   if( _child ) fix_parent( _child, this );
1480 }
1481 
1482 //------------------------------beautify_loops---------------------------------
1483 // Split shared headers and insert loop landing pads.
1484 // Insert a LoopNode to replace the RegionNode.
1485 // Return TRUE if loop tree is structurally changed.
1486 bool IdealLoopTree::beautify_loops( PhaseIdealLoop *phase ) {
1487   bool result = false;
1488   // Cache parts in locals for easy
1489   PhaseIterGVN &igvn = phase->_igvn;
1490 
1491   igvn.hash_delete(_head);      // Yank from hash before hacking edges
1492 
1493   // Check for multiple fall-in paths.  Peel off a landing pad if need be.
1494   int fall_in_cnt = 0;
1495   for( uint i = 1; i < _head->req(); i++ )
1496     if( !phase->is_member( this, _head->in(i) ) )
1497       fall_in_cnt++;
1498   assert( fall_in_cnt, "at least 1 fall-in path" );
1499   if( fall_in_cnt > 1 )         // Need a loop landing pad to merge fall-ins
1500     split_fall_in( phase, fall_in_cnt );
1501 
1502   // Swap inputs to the _head and all Phis to move the fall-in edge to
1503   // the left.
1504   fall_in_cnt = 1;
1505   while( phase->is_member( this, _head->in(fall_in_cnt) ) )
1506     fall_in_cnt++;
1507   if( fall_in_cnt > 1 ) {
1508     // Since I am just swapping inputs I do not need to update def-use info
1509     Node *tmp = _head->in(1);
1510     _head->set_req( 1, _head->in(fall_in_cnt) );
1511     _head->set_req( fall_in_cnt, tmp );
1512     // Swap also all Phis
1513     for (DUIterator_Fast imax, i = _head->fast_outs(imax); i < imax; i++) {
1514       Node* phi = _head->fast_out(i);
1515       if( phi->is_Phi() ) {
1516         igvn.hash_delete(phi); // Yank from hash before hacking edges
1517         tmp = phi->in(1);
1518         phi->set_req( 1, phi->in(fall_in_cnt) );
1519         phi->set_req( fall_in_cnt, tmp );
1520       }
1521     }
1522   }
1523   assert( !phase->is_member( this, _head->in(1) ), "left edge is fall-in" );
1524   assert(  phase->is_member( this, _head->in(2) ), "right edge is loop" );
1525 
1526   // If I am a shared header (multiple backedges), peel off the many
1527   // backedges into a private merge point and use the merge point as
1528   // the one true backedge.
1529   if( _head->req() > 3 ) {
1530     // Merge the many backedges into a single backedge but leave
1531     // the hottest backedge as separate edge for the following peel.
1532     merge_many_backedges( phase );
1533     result = true;
1534   }
1535 
1536   // If I have one hot backedge, peel off myself loop.
1537   // I better be the outermost loop.
1538   if (_head->req() > 3 && !_irreducible) {
1539     split_outer_loop( phase );
1540     result = true;
1541 
1542   } else if (!_head->is_Loop() && !_irreducible) {
1543     // Make a new LoopNode to replace the old loop head
1544     Node *l = new (phase->C) LoopNode( _head->in(1), _head->in(2) );
1545     l = igvn.register_new_node_with_optimizer(l, _head);
1546     phase->set_created_loop_node();
1547     // Go ahead and replace _head
1548     phase->_igvn.replace_node( _head, l );
1549     _head = l;
1550     phase->set_loop(_head, this);
1551   }
1552 
1553   // Now recursively beautify nested loops
1554   if( _child ) result |= _child->beautify_loops( phase );
1555   if( _next  ) result |= _next ->beautify_loops( phase );
1556   return result;
1557 }
1558 
1559 //------------------------------allpaths_check_safepts----------------------------
1560 // Allpaths backwards scan from loop tail, terminating each path at first safepoint
1561 // encountered.  Helper for check_safepts.
1562 void IdealLoopTree::allpaths_check_safepts(VectorSet &visited, Node_List &stack) {
1563   assert(stack.size() == 0, "empty stack");
1564   stack.push(_tail);
1565   visited.Clear();
1566   visited.set(_tail->_idx);
1567   while (stack.size() > 0) {
1568     Node* n = stack.pop();
1569     if (n->is_Call() && n->as_Call()->guaranteed_safepoint()) {
1570       // Terminate this path
1571     } else if (n->Opcode() == Op_SafePoint) {
1572       if (_phase->get_loop(n) != this) {
1573         if (_required_safept == NULL) _required_safept = new Node_List();
1574         _required_safept->push(n);  // save the one closest to the tail
1575       }
1576       // Terminate this path
1577     } else {
1578       uint start = n->is_Region() ? 1 : 0;
1579       uint end   = n->is_Region() && !n->is_Loop() ? n->req() : start + 1;
1580       for (uint i = start; i < end; i++) {
1581         Node* in = n->in(i);
1582         assert(in->is_CFG(), "must be");
1583         if (!visited.test_set(in->_idx) && is_member(_phase->get_loop(in))) {
1584           stack.push(in);
1585         }
1586       }
1587     }
1588   }
1589 }
1590 
1591 //------------------------------check_safepts----------------------------
1592 // Given dominators, try to find loops with calls that must always be
1593 // executed (call dominates loop tail).  These loops do not need non-call
1594 // safepoints (ncsfpt).
1595 //
1596 // A complication is that a safepoint in a inner loop may be needed
1597 // by an outer loop. In the following, the inner loop sees it has a
1598 // call (block 3) on every path from the head (block 2) to the
1599 // backedge (arc 3->2).  So it deletes the ncsfpt (non-call safepoint)
1600 // in block 2, _but_ this leaves the outer loop without a safepoint.
1601 //
1602 //          entry  0
1603 //                 |
1604 //                 v
1605 // outer 1,2    +->1
1606 //              |  |
1607 //              |  v
1608 //              |  2<---+  ncsfpt in 2
1609 //              |_/|\   |
1610 //                 | v  |
1611 // inner 2,3      /  3  |  call in 3
1612 //               /   |  |
1613 //              v    +--+
1614 //        exit  4
1615 //
1616 //
1617 // This method creates a list (_required_safept) of ncsfpt nodes that must
1618 // be protected is created for each loop. When a ncsfpt maybe deleted, it
1619 // is first looked for in the lists for the outer loops of the current loop.
1620 //
1621 // The insights into the problem:
1622 //  A) counted loops are okay
1623 //  B) innermost loops are okay (only an inner loop can delete
1624 //     a ncsfpt needed by an outer loop)
1625 //  C) a loop is immune from an inner loop deleting a safepoint
1626 //     if the loop has a call on the idom-path
1627 //  D) a loop is also immune if it has a ncsfpt (non-call safepoint) on the
1628 //     idom-path that is not in a nested loop
1629 //  E) otherwise, an ncsfpt on the idom-path that is nested in an inner
1630 //     loop needs to be prevented from deletion by an inner loop
1631 //
1632 // There are two analyses:
1633 //  1) The first, and cheaper one, scans the loop body from
1634 //     tail to head following the idom (immediate dominator)
1635 //     chain, looking for the cases (C,D,E) above.
1636 //     Since inner loops are scanned before outer loops, there is summary
1637 //     information about inner loops.  Inner loops can be skipped over
1638 //     when the tail of an inner loop is encountered.
1639 //
1640 //  2) The second, invoked if the first fails to find a call or ncsfpt on
1641 //     the idom path (which is rare), scans all predecessor control paths
1642 //     from the tail to the head, terminating a path when a call or sfpt
1643 //     is encountered, to find the ncsfpt's that are closest to the tail.
1644 //
1645 void IdealLoopTree::check_safepts(VectorSet &visited, Node_List &stack) {
1646   // Bottom up traversal
1647   IdealLoopTree* ch = _child;
1648   if (_child) _child->check_safepts(visited, stack);
1649   if (_next)  _next ->check_safepts(visited, stack);
1650 
1651   if (!_head->is_CountedLoop() && !_has_sfpt && _parent != NULL && !_irreducible) {
1652     bool  has_call         = false; // call on dom-path
1653     bool  has_local_ncsfpt = false; // ncsfpt on dom-path at this loop depth
1654     Node* nonlocal_ncsfpt  = NULL;  // ncsfpt on dom-path at a deeper depth
1655     // Scan the dom-path nodes from tail to head
1656     for (Node* n = tail(); n != _head; n = _phase->idom(n)) {
1657       if (n->is_Call() && n->as_Call()->guaranteed_safepoint()) {
1658         has_call = true;
1659         _has_sfpt = 1;          // Then no need for a safept!
1660         break;
1661       } else if (n->Opcode() == Op_SafePoint) {
1662         if (_phase->get_loop(n) == this) {
1663           has_local_ncsfpt = true;
1664           break;
1665         }
1666         if (nonlocal_ncsfpt == NULL) {
1667           nonlocal_ncsfpt = n; // save the one closest to the tail
1668         }
1669       } else {
1670         IdealLoopTree* nlpt = _phase->get_loop(n);
1671         if (this != nlpt) {
1672           // If at an inner loop tail, see if the inner loop has already
1673           // recorded seeing a call on the dom-path (and stop.)  If not,
1674           // jump to the head of the inner loop.
1675           assert(is_member(nlpt), "nested loop");
1676           Node* tail = nlpt->_tail;
1677           if (tail->in(0)->is_If()) tail = tail->in(0);
1678           if (n == tail) {
1679             // If inner loop has call on dom-path, so does outer loop
1680             if (nlpt->_has_sfpt) {
1681               has_call = true;
1682               _has_sfpt = 1;
1683               break;
1684             }
1685             // Skip to head of inner loop
1686             assert(_phase->is_dominator(_head, nlpt->_head), "inner head dominated by outer head");
1687             n = nlpt->_head;
1688           }
1689         }
1690       }
1691     }
1692     // Record safept's that this loop needs preserved when an
1693     // inner loop attempts to delete it's safepoints.
1694     if (_child != NULL && !has_call && !has_local_ncsfpt) {
1695       if (nonlocal_ncsfpt != NULL) {
1696         if (_required_safept == NULL) _required_safept = new Node_List();
1697         _required_safept->push(nonlocal_ncsfpt);
1698       } else {
1699         // Failed to find a suitable safept on the dom-path.  Now use
1700         // an all paths walk from tail to head, looking for safepoints to preserve.
1701         allpaths_check_safepts(visited, stack);
1702       }
1703     }
1704   }
1705 }
1706 
1707 //---------------------------is_deleteable_safept----------------------------
1708 // Is safept not required by an outer loop?
1709 bool PhaseIdealLoop::is_deleteable_safept(Node* sfpt) {
1710   assert(sfpt->Opcode() == Op_SafePoint, "");
1711   IdealLoopTree* lp = get_loop(sfpt)->_parent;
1712   while (lp != NULL) {
1713     Node_List* sfpts = lp->_required_safept;
1714     if (sfpts != NULL) {
1715       for (uint i = 0; i < sfpts->size(); i++) {
1716         if (sfpt == sfpts->at(i))
1717           return false;
1718       }
1719     }
1720     lp = lp->_parent;
1721   }
1722   return true;
1723 }
1724 
1725 //---------------------------replace_parallel_iv-------------------------------
1726 // Replace parallel induction variable (parallel to trip counter)
1727 void PhaseIdealLoop::replace_parallel_iv(IdealLoopTree *loop) {
1728   assert(loop->_head->is_CountedLoop(), "");
1729   CountedLoopNode *cl = loop->_head->as_CountedLoop();
1730   if (!cl->is_valid_counted_loop())
1731     return;         // skip malformed counted loop
1732   Node *incr = cl->incr();
1733   if (incr == NULL)
1734     return;         // Dead loop?
1735   Node *init = cl->init_trip();
1736   Node *phi  = cl->phi();
1737   int stride_con = cl->stride_con();
1738 
1739   // Visit all children, looking for Phis
1740   for (DUIterator i = cl->outs(); cl->has_out(i); i++) {
1741     Node *out = cl->out(i);
1742     // Look for other phis (secondary IVs). Skip dead ones
1743     if (!out->is_Phi() || out == phi || !has_node(out))
1744       continue;
1745     PhiNode* phi2 = out->as_Phi();
1746     Node *incr2 = phi2->in( LoopNode::LoopBackControl );
1747     // Look for induction variables of the form:  X += constant
1748     if (phi2->region() != loop->_head ||
1749         incr2->req() != 3 ||
1750         incr2->in(1) != phi2 ||
1751         incr2 == incr ||
1752         incr2->Opcode() != Op_AddI ||
1753         !incr2->in(2)->is_Con())
1754       continue;
1755 
1756     // Check for parallel induction variable (parallel to trip counter)
1757     // via an affine function.  In particular, count-down loops with
1758     // count-up array indices are common. We only RCE references off
1759     // the trip-counter, so we need to convert all these to trip-counter
1760     // expressions.
1761     Node *init2 = phi2->in( LoopNode::EntryControl );
1762     int stride_con2 = incr2->in(2)->get_int();
1763 
1764     // The general case here gets a little tricky.  We want to find the
1765     // GCD of all possible parallel IV's and make a new IV using this
1766     // GCD for the loop.  Then all possible IVs are simple multiples of
1767     // the GCD.  In practice, this will cover very few extra loops.
1768     // Instead we require 'stride_con2' to be a multiple of 'stride_con',
1769     // where +/-1 is the common case, but other integer multiples are
1770     // also easy to handle.
1771     int ratio_con = stride_con2/stride_con;
1772 
1773     if ((ratio_con * stride_con) == stride_con2) { // Check for exact
1774 #ifndef PRODUCT
1775       if (TraceLoopOpts) {
1776         tty->print("Parallel IV: %d ", phi2->_idx);
1777         loop->dump_head();
1778       }
1779 #endif
1780       // Convert to using the trip counter.  The parallel induction
1781       // variable differs from the trip counter by a loop-invariant
1782       // amount, the difference between their respective initial values.
1783       // It is scaled by the 'ratio_con'.
1784       Node* ratio = _igvn.intcon(ratio_con);
1785       set_ctrl(ratio, C->root());
1786       Node* ratio_init = new (C) MulINode(init, ratio);
1787       _igvn.register_new_node_with_optimizer(ratio_init, init);
1788       set_early_ctrl(ratio_init);
1789       Node* diff = new (C) SubINode(init2, ratio_init);
1790       _igvn.register_new_node_with_optimizer(diff, init2);
1791       set_early_ctrl(diff);
1792       Node* ratio_idx = new (C) MulINode(phi, ratio);
1793       _igvn.register_new_node_with_optimizer(ratio_idx, phi);
1794       set_ctrl(ratio_idx, cl);
1795       Node* add = new (C) AddINode(ratio_idx, diff);
1796       _igvn.register_new_node_with_optimizer(add);
1797       set_ctrl(add, cl);
1798       _igvn.replace_node( phi2, add );
1799       // Sometimes an induction variable is unused
1800       if (add->outcnt() == 0) {
1801         _igvn.remove_dead_node(add);
1802       }
1803       --i; // deleted this phi; rescan starting with next position
1804       continue;
1805     }
1806   }
1807 }
1808 
1809 //------------------------------counted_loop-----------------------------------
1810 // Convert to counted loops where possible
1811 void IdealLoopTree::counted_loop( PhaseIdealLoop *phase ) {
1812 
1813   // For grins, set the inner-loop flag here
1814   if (!_child) {
1815     if (_head->is_Loop()) _head->as_Loop()->set_inner_loop();
1816   }
1817 
1818   if (_head->is_CountedLoop() ||
1819       phase->is_counted_loop(_head, this)) {
1820     _has_sfpt = 1;              // Indicate we do not need a safepoint here
1821 
1822     // Look for safepoints to remove.
1823     Node_List* sfpts = _safepts;
1824     if (sfpts != NULL) {
1825       for (uint i = 0; i < sfpts->size(); i++) {
1826         Node* n = sfpts->at(i);
1827         assert(phase->get_loop(n) == this, "");
1828         if (phase->is_deleteable_safept(n)) {
1829           phase->lazy_replace(n, n->in(TypeFunc::Control));
1830         }
1831       }
1832     }
1833 
1834     // Look for induction variables
1835     phase->replace_parallel_iv(this);
1836 
1837   } else if (_parent != NULL && !_irreducible) {
1838     // Not a counted loop.
1839     // Look for a safepoint on the idom-path.
1840     Node* sfpt = tail();
1841     for (; sfpt != _head; sfpt = phase->idom(sfpt)) {
1842       if (sfpt->Opcode() == Op_SafePoint && phase->get_loop(sfpt) == this)
1843         break; // Found one
1844     }
1845     // Delete other safepoints in this loop.
1846     Node_List* sfpts = _safepts;
1847     if (sfpts != NULL && sfpt != _head && sfpt->Opcode() == Op_SafePoint) {
1848       for (uint i = 0; i < sfpts->size(); i++) {
1849         Node* n = sfpts->at(i);
1850         assert(phase->get_loop(n) == this, "");
1851         if (n != sfpt && phase->is_deleteable_safept(n)) {
1852           phase->lazy_replace(n, n->in(TypeFunc::Control));
1853         }
1854       }
1855     }
1856   }
1857 
1858   // Recursively
1859   if (_child) _child->counted_loop( phase );
1860   if (_next)  _next ->counted_loop( phase );
1861 }
1862 
1863 #ifndef PRODUCT
1864 //------------------------------dump_head--------------------------------------
1865 // Dump 1 liner for loop header info
1866 void IdealLoopTree::dump_head( ) const {
1867   for (uint i=0; i<_nest; i++)
1868     tty->print("  ");
1869   tty->print("Loop: N%d/N%d ",_head->_idx,_tail->_idx);
1870   if (_irreducible) tty->print(" IRREDUCIBLE");
1871   Node* entry = _head->in(LoopNode::EntryControl);
1872   if (LoopLimitCheck) {
1873     Node* predicate = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
1874     if (predicate != NULL ) {
1875       tty->print(" limit_check");
1876       entry = entry->in(0)->in(0);
1877     }
1878   }
1879   if (UseLoopPredicate) {
1880     entry = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
1881     if (entry != NULL) {
1882       tty->print(" predicated");
1883     }
1884   }
1885   if (_head->is_CountedLoop()) {
1886     CountedLoopNode *cl = _head->as_CountedLoop();
1887     tty->print(" counted");
1888 
1889     Node* init_n = cl->init_trip();
1890     if (init_n  != NULL &&  init_n->is_Con())
1891       tty->print(" [%d,", cl->init_trip()->get_int());
1892     else
1893       tty->print(" [int,");
1894     Node* limit_n = cl->limit();
1895     if (limit_n  != NULL &&  limit_n->is_Con())
1896       tty->print("%d),", cl->limit()->get_int());
1897     else
1898       tty->print("int),");
1899     int stride_con  = cl->stride_con();
1900     if (stride_con > 0) tty->print("+");
1901     tty->print("%d", stride_con);
1902 
1903     tty->print(" (%d iters) ", (int)cl->profile_trip_cnt());
1904 
1905     if (cl->is_pre_loop ()) tty->print(" pre" );
1906     if (cl->is_main_loop()) tty->print(" main");
1907     if (cl->is_post_loop()) tty->print(" post");
1908   }
1909   tty->cr();
1910 }
1911 
1912 //------------------------------dump-------------------------------------------
1913 // Dump loops by loop tree
1914 void IdealLoopTree::dump( ) const {
1915   dump_head();
1916   if (_child) _child->dump();
1917   if (_next)  _next ->dump();
1918 }
1919 
1920 #endif
1921 
1922 static void log_loop_tree(IdealLoopTree* root, IdealLoopTree* loop, CompileLog* log) {
1923   if (loop == root) {
1924     if (loop->_child != NULL) {
1925       log->begin_head("loop_tree");
1926       log->end_head();
1927       if( loop->_child ) log_loop_tree(root, loop->_child, log);
1928       log->tail("loop_tree");
1929       assert(loop->_next == NULL, "what?");
1930     }
1931   } else {
1932     Node* head = loop->_head;
1933     log->begin_head("loop");
1934     log->print(" idx='%d' ", head->_idx);
1935     if (loop->_irreducible) log->print("irreducible='1' ");
1936     if (head->is_Loop()) {
1937       if (head->as_Loop()->is_inner_loop()) log->print("inner_loop='1' ");
1938       if (head->as_Loop()->is_partial_peel_loop()) log->print("partial_peel_loop='1' ");
1939     }
1940     if (head->is_CountedLoop()) {
1941       CountedLoopNode* cl = head->as_CountedLoop();
1942       if (cl->is_pre_loop())  log->print("pre_loop='%d' ",  cl->main_idx());
1943       if (cl->is_main_loop()) log->print("main_loop='%d' ", cl->_idx);
1944       if (cl->is_post_loop()) log->print("post_loop='%d' ",  cl->main_idx());
1945     }
1946     log->end_head();
1947     if( loop->_child ) log_loop_tree(root, loop->_child, log);
1948     log->tail("loop");
1949     if( loop->_next  ) log_loop_tree(root, loop->_next, log);
1950   }
1951 }
1952 
1953 //---------------------collect_potentially_useful_predicates-----------------------
1954 // Helper function to collect potentially useful predicates to prevent them from
1955 // being eliminated by PhaseIdealLoop::eliminate_useless_predicates
1956 void PhaseIdealLoop::collect_potentially_useful_predicates(
1957                          IdealLoopTree * loop, Unique_Node_List &useful_predicates) {
1958   if (loop->_child) { // child
1959     collect_potentially_useful_predicates(loop->_child, useful_predicates);
1960   }
1961 
1962   // self (only loops that we can apply loop predication may use their predicates)
1963   if (loop->_head->is_Loop() &&
1964       !loop->_irreducible    &&
1965       !loop->tail()->is_top()) {
1966     LoopNode* lpn = loop->_head->as_Loop();
1967     Node* entry = lpn->in(LoopNode::EntryControl);
1968     Node* predicate_proj = find_predicate(entry); // loop_limit_check first
1969     if (predicate_proj != NULL ) { // right pattern that can be used by loop predication
1970       assert(entry->in(0)->in(1)->in(1)->Opcode() == Op_Opaque1, "must be");
1971       useful_predicates.push(entry->in(0)->in(1)->in(1)); // good one
1972       entry = entry->in(0)->in(0);
1973     }
1974     predicate_proj = find_predicate(entry); // Predicate
1975     if (predicate_proj != NULL ) {
1976       useful_predicates.push(entry->in(0)->in(1)->in(1)); // good one
1977     }
1978   }
1979 
1980   if (loop->_next) { // sibling
1981     collect_potentially_useful_predicates(loop->_next, useful_predicates);
1982   }
1983 }
1984 
1985 //------------------------eliminate_useless_predicates-----------------------------
1986 // Eliminate all inserted predicates if they could not be used by loop predication.
1987 // Note: it will also eliminates loop limits check predicate since it also uses
1988 // Opaque1 node (see Parse::add_predicate()).
1989 void PhaseIdealLoop::eliminate_useless_predicates() {
1990   if (C->predicate_count() == 0)
1991     return; // no predicate left
1992 
1993   Unique_Node_List useful_predicates; // to store useful predicates
1994   if (C->has_loops()) {
1995     collect_potentially_useful_predicates(_ltree_root->_child, useful_predicates);
1996   }
1997 
1998   for (int i = C->predicate_count(); i > 0; i--) {
1999      Node * n = C->predicate_opaque1_node(i-1);
2000      assert(n->Opcode() == Op_Opaque1, "must be");
2001      if (!useful_predicates.member(n)) { // not in the useful list
2002        _igvn.replace_node(n, n->in(1));
2003      }
2004   }
2005 }
2006 
2007 //------------------------process_expensive_nodes-----------------------------
2008 // Expensive nodes have their control input set to prevent the GVN
2009 // from commoning them and as a result forcing the resulting node to
2010 // be in a more frequent path. Use CFG information here, to change the
2011 // control inputs so that some expensive nodes can be commoned while
2012 // not executed more frequently.
2013 bool PhaseIdealLoop::process_expensive_nodes() {
2014   assert(OptimizeExpensiveOps, "optimization off?");
2015 
2016   // Sort nodes to bring similar nodes together
2017   C->sort_expensive_nodes();
2018 
2019   bool progress = false;
2020 
2021   for (int i = 0; i < C->expensive_count(); ) {
2022     Node* n = C->expensive_node(i);
2023     int start = i;
2024     // Find nodes similar to n
2025     i++;
2026     for (; i < C->expensive_count() && Compile::cmp_expensive_nodes(n, C->expensive_node(i)) == 0; i++);
2027     int end = i;
2028     // And compare them two by two
2029     for (int j = start; j < end; j++) {
2030       Node* n1 = C->expensive_node(j);
2031       if (is_node_unreachable(n1)) {
2032         continue;
2033       }
2034       for (int k = j+1; k < end; k++) {
2035         Node* n2 = C->expensive_node(k);
2036         if (is_node_unreachable(n2)) {
2037           continue;
2038         }
2039 
2040         assert(n1 != n2, "should be pair of nodes");
2041 
2042         Node* c1 = n1->in(0);
2043         Node* c2 = n2->in(0);
2044 
2045         Node* parent_c1 = c1;
2046         Node* parent_c2 = c2;
2047 
2048         // The call to get_early_ctrl_for_expensive() moves the
2049         // expensive nodes up but stops at loops that are in a if
2050         // branch. See whether we can exit the loop and move above the
2051         // If.
2052         if (c1->is_Loop()) {
2053           parent_c1 = c1->in(1);
2054         }
2055         if (c2->is_Loop()) {
2056           parent_c2 = c2->in(1);
2057         }
2058 
2059         if (parent_c1 == parent_c2) {
2060           _igvn._worklist.push(n1);
2061           _igvn._worklist.push(n2);
2062           continue;
2063         }
2064 
2065         // Look for identical expensive node up the dominator chain.
2066         if (is_dominator(c1, c2)) {
2067           c2 = c1;
2068         } else if (is_dominator(c2, c1)) {
2069           c1 = c2;
2070         } else if (parent_c1->is_Proj() && parent_c1->in(0)->is_If() &&
2071                    parent_c2->is_Proj() && parent_c1->in(0) == parent_c2->in(0)) {
2072           // Both branches have the same expensive node so move it up
2073           // before the if.
2074           c1 = c2 = idom(parent_c1->in(0));
2075         }
2076         // Do the actual moves
2077         if (n1->in(0) != c1) {
2078           _igvn.hash_delete(n1);
2079           n1->set_req(0, c1);
2080           _igvn.hash_insert(n1);
2081           _igvn._worklist.push(n1);
2082           progress = true;
2083         }
2084         if (n2->in(0) != c2) {
2085           _igvn.hash_delete(n2);
2086           n2->set_req(0, c2);
2087           _igvn.hash_insert(n2);
2088           _igvn._worklist.push(n2);
2089           progress = true;
2090         }
2091       }
2092     }
2093   }
2094 
2095   return progress;
2096 }
2097 
2098 
2099 //=============================================================================
2100 //----------------------------build_and_optimize-------------------------------
2101 // Create a PhaseLoop.  Build the ideal Loop tree.  Map each Ideal Node to
2102 // its corresponding LoopNode.  If 'optimize' is true, do some loop cleanups.
2103 void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool skip_loop_opts) {
2104   ResourceMark rm;
2105 
2106   int old_progress = C->major_progress();
2107   uint orig_worklist_size = _igvn._worklist.size();
2108 
2109   // Reset major-progress flag for the driver's heuristics
2110   C->clear_major_progress();
2111 
2112 #ifndef PRODUCT
2113   // Capture for later assert
2114   uint unique = C->unique();
2115   _loop_invokes++;
2116   _loop_work += unique;
2117 #endif
2118 
2119   // True if the method has at least 1 irreducible loop
2120   _has_irreducible_loops = false;
2121 
2122   _created_loop_node = false;
2123 
2124   Arena *a = Thread::current()->resource_area();
2125   VectorSet visited(a);
2126   // Pre-grow the mapping from Nodes to IdealLoopTrees.
2127   _nodes.map(C->unique(), NULL);
2128   memset(_nodes.adr(), 0, wordSize * C->unique());
2129 
2130   // Pre-build the top-level outermost loop tree entry
2131   _ltree_root = new IdealLoopTree( this, C->root(), C->root() );
2132   // Do not need a safepoint at the top level
2133   _ltree_root->_has_sfpt = 1;
2134 
2135   // Initialize Dominators.
2136   // Checked in clone_loop_predicate() during beautify_loops().
2137   _idom_size = 0;
2138   _idom      = NULL;
2139   _dom_depth = NULL;
2140   _dom_stk   = NULL;
2141 
2142   // Empty pre-order array
2143   allocate_preorders();
2144 
2145   // Build a loop tree on the fly.  Build a mapping from CFG nodes to
2146   // IdealLoopTree entries.  Data nodes are NOT walked.
2147   build_loop_tree();
2148   // Check for bailout, and return
2149   if (C->failing()) {
2150     return;
2151   }
2152 
2153   // No loops after all
2154   if( !_ltree_root->_child && !_verify_only ) C->set_has_loops(false);
2155 
2156   // There should always be an outer loop containing the Root and Return nodes.
2157   // If not, we have a degenerate empty program.  Bail out in this case.
2158   if (!has_node(C->root())) {
2159     if (!_verify_only) {
2160       C->clear_major_progress();
2161       C->record_method_not_compilable("empty program detected during loop optimization");
2162     }
2163     return;
2164   }
2165 
2166   // Nothing to do, so get out
2167   bool stop_early = !C->has_loops() && !skip_loop_opts && !do_split_ifs && !_verify_me && !_verify_only;
2168   bool do_expensive_nodes = C->should_optimize_expensive_nodes(_igvn);
2169   if (stop_early && !do_expensive_nodes) {
2170     _igvn.optimize();           // Cleanup NeverBranches
2171     return;
2172   }
2173 
2174   // Set loop nesting depth
2175   _ltree_root->set_nest( 0 );
2176 
2177   // Split shared headers and insert loop landing pads.
2178   // Do not bother doing this on the Root loop of course.
2179   if( !_verify_me && !_verify_only && _ltree_root->_child ) {
2180     C->print_method(PHASE_BEFORE_BEAUTIFY_LOOPS, 3);
2181     if( _ltree_root->_child->beautify_loops( this ) ) {
2182       // Re-build loop tree!
2183       _ltree_root->_child = NULL;
2184       _nodes.clear();
2185       reallocate_preorders();
2186       build_loop_tree();
2187       // Check for bailout, and return
2188       if (C->failing()) {
2189         return;
2190       }
2191       // Reset loop nesting depth
2192       _ltree_root->set_nest( 0 );
2193 
2194       C->print_method(PHASE_AFTER_BEAUTIFY_LOOPS, 3);
2195     }
2196   }
2197 
2198   // Build Dominators for elision of NULL checks & loop finding.
2199   // Since nodes do not have a slot for immediate dominator, make
2200   // a persistent side array for that info indexed on node->_idx.
2201   _idom_size = C->unique();
2202   _idom      = NEW_RESOURCE_ARRAY( Node*, _idom_size );
2203   _dom_depth = NEW_RESOURCE_ARRAY( uint,  _idom_size );
2204   _dom_stk   = NULL; // Allocated on demand in recompute_dom_depth
2205   memset( _dom_depth, 0, _idom_size * sizeof(uint) );
2206 
2207   Dominators();
2208 
2209   if (!_verify_only) {
2210     // As a side effect, Dominators removed any unreachable CFG paths
2211     // into RegionNodes.  It doesn't do this test against Root, so
2212     // we do it here.
2213     for( uint i = 1; i < C->root()->req(); i++ ) {
2214       if( !_nodes[C->root()->in(i)->_idx] ) {    // Dead path into Root?
2215         _igvn.delete_input_of(C->root(), i);
2216         i--;                      // Rerun same iteration on compressed edges
2217       }
2218     }
2219 
2220     // Given dominators, try to find inner loops with calls that must
2221     // always be executed (call dominates loop tail).  These loops do
2222     // not need a separate safepoint.
2223     Node_List cisstack(a);
2224     _ltree_root->check_safepts(visited, cisstack);
2225   }
2226 
2227   // Walk the DATA nodes and place into loops.  Find earliest control
2228   // node.  For CFG nodes, the _nodes array starts out and remains
2229   // holding the associated IdealLoopTree pointer.  For DATA nodes, the
2230   // _nodes array holds the earliest legal controlling CFG node.
2231 
2232   // Allocate stack with enough space to avoid frequent realloc
2233   int stack_size = (C->unique() >> 1) + 16; // (unique>>1)+16 from Java2D stats
2234   Node_Stack nstack( a, stack_size );
2235 
2236   visited.Clear();
2237   Node_List worklist(a);
2238   // Don't need C->root() on worklist since
2239   // it will be processed among C->top() inputs
2240   worklist.push( C->top() );
2241   visited.set( C->top()->_idx ); // Set C->top() as visited now
2242   build_loop_early( visited, worklist, nstack );
2243 
2244   // Given early legal placement, try finding counted loops.  This placement
2245   // is good enough to discover most loop invariants.
2246   if( !_verify_me && !_verify_only )
2247     _ltree_root->counted_loop( this );
2248 
2249   // Find latest loop placement.  Find ideal loop placement.
2250   visited.Clear();
2251   init_dom_lca_tags();
2252   // Need C->root() on worklist when processing outs
2253   worklist.push( C->root() );
2254   NOT_PRODUCT( C->verify_graph_edges(); )
2255   worklist.push( C->top() );
2256   build_loop_late( visited, worklist, nstack );
2257 
2258   if (_verify_only) {
2259     // restore major progress flag
2260     for (int i = 0; i < old_progress; i++)
2261       C->set_major_progress();
2262     assert(C->unique() == unique, "verification mode made Nodes? ? ?");
2263     assert(_igvn._worklist.size() == orig_worklist_size, "shouldn't push anything");
2264     return;
2265   }
2266 
2267   // clear out the dead code after build_loop_late
2268   while (_deadlist.size()) {
2269     _igvn.remove_globally_dead_node(_deadlist.pop());
2270   }
2271 
2272   if (stop_early) {
2273     assert(do_expensive_nodes, "why are we here?");
2274     if (process_expensive_nodes()) {
2275       // If we made some progress when processing expensive nodes then
2276       // the IGVN may modify the graph in a way that will allow us to
2277       // make some more progress: we need to try processing expensive
2278       // nodes again.
2279       C->set_major_progress();
2280     }
2281     _igvn.optimize();
2282     return;
2283   }
2284 
2285   // Some parser-inserted loop predicates could never be used by loop
2286   // predication or they were moved away from loop during some optimizations.
2287   // For example, peeling. Eliminate them before next loop optimizations.
2288   if (UseLoopPredicate || LoopLimitCheck) {
2289     eliminate_useless_predicates();
2290   }
2291 
2292 #ifndef PRODUCT
2293   C->verify_graph_edges();
2294   if (_verify_me) {             // Nested verify pass?
2295     // Check to see if the verify mode is broken
2296     assert(C->unique() == unique, "non-optimize mode made Nodes? ? ?");
2297     return;
2298   }
2299   if(VerifyLoopOptimizations) verify();
2300   if(TraceLoopOpts && C->has_loops()) {
2301     _ltree_root->dump();
2302   }
2303 #endif
2304 
2305   if (skip_loop_opts) {
2306     // Cleanup any modified bits
2307     _igvn.optimize();
2308 
2309     if (C->log() != NULL) {
2310       log_loop_tree(_ltree_root, _ltree_root, C->log());
2311     }
2312     return;
2313   }
2314 
2315   if (ReassociateInvariants) {
2316     // Reassociate invariants and prep for split_thru_phi
2317     for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
2318       IdealLoopTree* lpt = iter.current();
2319       if (!lpt->is_counted() || !lpt->is_inner()) continue;
2320 
2321       lpt->reassociate_invariants(this);
2322 
2323       // Because RCE opportunities can be masked by split_thru_phi,
2324       // look for RCE candidates and inhibit split_thru_phi
2325       // on just their loop-phi's for this pass of loop opts
2326       if (SplitIfBlocks && do_split_ifs) {
2327         if (lpt->policy_range_check(this)) {
2328           lpt->_rce_candidate = 1; // = true
2329         }
2330       }
2331     }
2332   }
2333 
2334   // Check for aggressive application of split-if and other transforms
2335   // that require basic-block info (like cloning through Phi's)
2336   if( SplitIfBlocks && do_split_ifs ) {
2337     visited.Clear();
2338     split_if_with_blocks( visited, nstack );
2339     NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); );
2340   }
2341 
2342   if (!C->major_progress() && do_expensive_nodes && process_expensive_nodes()) {
2343     C->set_major_progress();
2344   }
2345 
2346   // Perform loop predication before iteration splitting
2347   if (C->has_loops() && !C->major_progress() && (C->predicate_count() > 0)) {
2348     _ltree_root->_child->loop_predication(this);
2349   }
2350 
2351   if (OptimizeFill && UseLoopPredicate && C->has_loops() && !C->major_progress()) {
2352     if (do_intrinsify_fill()) {
2353       C->set_major_progress();
2354     }
2355   }
2356 
2357   // Perform iteration-splitting on inner loops.  Split iterations to avoid
2358   // range checks or one-shot null checks.
2359 
2360   // If split-if's didn't hack the graph too bad (no CFG changes)
2361   // then do loop opts.
2362   if (C->has_loops() && !C->major_progress()) {
2363     memset( worklist.adr(), 0, worklist.Size()*sizeof(Node*) );
2364     _ltree_root->_child->iteration_split( this, worklist );
2365     // No verify after peeling!  GCM has hoisted code out of the loop.
2366     // After peeling, the hoisted code could sink inside the peeled area.
2367     // The peeling code does not try to recompute the best location for
2368     // all the code before the peeled area, so the verify pass will always
2369     // complain about it.
2370   }
2371   // Do verify graph edges in any case
2372   NOT_PRODUCT( C->verify_graph_edges(); );
2373 
2374   if (!do_split_ifs) {
2375     // We saw major progress in Split-If to get here.  We forced a
2376     // pass with unrolling and not split-if, however more split-if's
2377     // might make progress.  If the unrolling didn't make progress
2378     // then the major-progress flag got cleared and we won't try
2379     // another round of Split-If.  In particular the ever-common
2380     // instance-of/check-cast pattern requires at least 2 rounds of
2381     // Split-If to clear out.
2382     C->set_major_progress();
2383   }
2384 
2385   // Repeat loop optimizations if new loops were seen
2386   if (created_loop_node()) {
2387     C->set_major_progress();
2388   }
2389 
2390   // Keep loop predicates and perform optimizations with them
2391   // until no more loop optimizations could be done.
2392   // After that switch predicates off and do more loop optimizations.
2393   if (!C->major_progress() && (C->predicate_count() > 0)) {
2394      C->cleanup_loop_predicates(_igvn);
2395 #ifndef PRODUCT
2396      if (TraceLoopOpts) {
2397        tty->print_cr("PredicatesOff");
2398      }
2399 #endif
2400      C->set_major_progress();
2401   }
2402 
2403   // Convert scalar to superword operations at the end of all loop opts.
2404   if (UseSuperWord && C->has_loops() && !C->major_progress()) {
2405     // SuperWord transform
2406     SuperWord sw(this);
2407     for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
2408       IdealLoopTree* lpt = iter.current();
2409       if (lpt->is_counted()) {
2410         sw.transform_loop(lpt);
2411       }
2412     }
2413   }
2414 
2415   // Cleanup any modified bits
2416   _igvn.optimize();
2417 
2418   // disable assert until issue with split_flow_path is resolved (6742111)
2419   // assert(!_has_irreducible_loops || C->parsed_irreducible_loop() || C->is_osr_compilation(),
2420   //        "shouldn't introduce irreducible loops");
2421 
2422   if (C->log() != NULL) {
2423     log_loop_tree(_ltree_root, _ltree_root, C->log());
2424   }
2425 }
2426 
2427 #ifndef PRODUCT
2428 //------------------------------print_statistics-------------------------------
2429 int PhaseIdealLoop::_loop_invokes=0;// Count of PhaseIdealLoop invokes
2430 int PhaseIdealLoop::_loop_work=0; // Sum of PhaseIdealLoop x unique
2431 void PhaseIdealLoop::print_statistics() {
2432   tty->print_cr("PhaseIdealLoop=%d, sum _unique=%d", _loop_invokes, _loop_work);
2433 }
2434 
2435 //------------------------------verify-----------------------------------------
2436 // Build a verify-only PhaseIdealLoop, and see that it agrees with me.
2437 static int fail;                // debug only, so its multi-thread dont care
2438 void PhaseIdealLoop::verify() const {
2439   int old_progress = C->major_progress();
2440   ResourceMark rm;
2441   PhaseIdealLoop loop_verify( _igvn, this );
2442   VectorSet visited(Thread::current()->resource_area());
2443 
2444   fail = 0;
2445   verify_compare( C->root(), &loop_verify, visited );
2446   assert( fail == 0, "verify loops failed" );
2447   // Verify loop structure is the same
2448   _ltree_root->verify_tree(loop_verify._ltree_root, NULL);
2449   // Reset major-progress.  It was cleared by creating a verify version of
2450   // PhaseIdealLoop.
2451   for( int i=0; i<old_progress; i++ )
2452     C->set_major_progress();
2453 }
2454 
2455 //------------------------------verify_compare---------------------------------
2456 // Make sure me and the given PhaseIdealLoop agree on key data structures
2457 void PhaseIdealLoop::verify_compare( Node *n, const PhaseIdealLoop *loop_verify, VectorSet &visited ) const {
2458   if( !n ) return;
2459   if( visited.test_set( n->_idx ) ) return;
2460   if( !_nodes[n->_idx] ) {      // Unreachable
2461     assert( !loop_verify->_nodes[n->_idx], "both should be unreachable" );
2462     return;
2463   }
2464 
2465   uint i;
2466   for( i = 0; i < n->req(); i++ )
2467     verify_compare( n->in(i), loop_verify, visited );
2468 
2469   // Check the '_nodes' block/loop structure
2470   i = n->_idx;
2471   if( has_ctrl(n) ) {           // We have control; verify has loop or ctrl
2472     if( _nodes[i] != loop_verify->_nodes[i] &&
2473         get_ctrl_no_update(n) != loop_verify->get_ctrl_no_update(n) ) {
2474       tty->print("Mismatched control setting for: ");
2475       n->dump();
2476       if( fail++ > 10 ) return;
2477       Node *c = get_ctrl_no_update(n);
2478       tty->print("We have it as: ");
2479       if( c->in(0) ) c->dump();
2480         else tty->print_cr("N%d",c->_idx);
2481       tty->print("Verify thinks: ");
2482       if( loop_verify->has_ctrl(n) )
2483         loop_verify->get_ctrl_no_update(n)->dump();
2484       else
2485         loop_verify->get_loop_idx(n)->dump();
2486       tty->cr();
2487     }
2488   } else {                    // We have a loop
2489     IdealLoopTree *us = get_loop_idx(n);
2490     if( loop_verify->has_ctrl(n) ) {
2491       tty->print("Mismatched loop setting for: ");
2492       n->dump();
2493       if( fail++ > 10 ) return;
2494       tty->print("We have it as: ");
2495       us->dump();
2496       tty->print("Verify thinks: ");
2497       loop_verify->get_ctrl_no_update(n)->dump();
2498       tty->cr();
2499     } else if (!C->major_progress()) {
2500       // Loop selection can be messed up if we did a major progress
2501       // operation, like split-if.  Do not verify in that case.
2502       IdealLoopTree *them = loop_verify->get_loop_idx(n);
2503       if( us->_head != them->_head ||  us->_tail != them->_tail ) {
2504         tty->print("Unequals loops for: ");
2505         n->dump();
2506         if( fail++ > 10 ) return;
2507         tty->print("We have it as: ");
2508         us->dump();
2509         tty->print("Verify thinks: ");
2510         them->dump();
2511         tty->cr();
2512       }
2513     }
2514   }
2515 
2516   // Check for immediate dominators being equal
2517   if( i >= _idom_size ) {
2518     if( !n->is_CFG() ) return;
2519     tty->print("CFG Node with no idom: ");
2520     n->dump();
2521     return;
2522   }
2523   if( !n->is_CFG() ) return;
2524   if( n == C->root() ) return; // No IDOM here
2525 
2526   assert(n->_idx == i, "sanity");
2527   Node *id = idom_no_update(n);
2528   if( id != loop_verify->idom_no_update(n) ) {
2529     tty->print("Unequals idoms for: ");
2530     n->dump();
2531     if( fail++ > 10 ) return;
2532     tty->print("We have it as: ");
2533     id->dump();
2534     tty->print("Verify thinks: ");
2535     loop_verify->idom_no_update(n)->dump();
2536     tty->cr();
2537   }
2538 
2539 }
2540 
2541 //------------------------------verify_tree------------------------------------
2542 // Verify that tree structures match.  Because the CFG can change, siblings
2543 // within the loop tree can be reordered.  We attempt to deal with that by
2544 // reordering the verify's loop tree if possible.
2545 void IdealLoopTree::verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const {
2546   assert( _parent == parent, "Badly formed loop tree" );
2547 
2548   // Siblings not in same order?  Attempt to re-order.
2549   if( _head != loop->_head ) {
2550     // Find _next pointer to update
2551     IdealLoopTree **pp = &loop->_parent->_child;
2552     while( *pp != loop )
2553       pp = &((*pp)->_next);
2554     // Find proper sibling to be next
2555     IdealLoopTree **nn = &loop->_next;
2556     while( (*nn) && (*nn)->_head != _head )
2557       nn = &((*nn)->_next);
2558 
2559     // Check for no match.
2560     if( !(*nn) ) {
2561       // Annoyingly, irreducible loops can pick different headers
2562       // after a major_progress operation, so the rest of the loop
2563       // tree cannot be matched.
2564       if (_irreducible && Compile::current()->major_progress())  return;
2565       assert( 0, "failed to match loop tree" );
2566     }
2567 
2568     // Move (*nn) to (*pp)
2569     IdealLoopTree *hit = *nn;
2570     *nn = hit->_next;
2571     hit->_next = loop;
2572     *pp = loop;
2573     loop = hit;
2574     // Now try again to verify
2575   }
2576 
2577   assert( _head  == loop->_head , "mismatched loop head" );
2578   Node *tail = _tail;           // Inline a non-updating version of
2579   while( !tail->in(0) )         // the 'tail()' call.
2580     tail = tail->in(1);
2581   assert( tail == loop->_tail, "mismatched loop tail" );
2582 
2583   // Counted loops that are guarded should be able to find their guards
2584   if( _head->is_CountedLoop() && _head->as_CountedLoop()->is_main_loop() ) {
2585     CountedLoopNode *cl = _head->as_CountedLoop();
2586     Node *init = cl->init_trip();
2587     Node *ctrl = cl->in(LoopNode::EntryControl);
2588     assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" );
2589     Node *iff  = ctrl->in(0);
2590     assert( iff->Opcode() == Op_If, "" );
2591     Node *bol  = iff->in(1);
2592     assert( bol->Opcode() == Op_Bool, "" );
2593     Node *cmp  = bol->in(1);
2594     assert( cmp->Opcode() == Op_CmpI, "" );
2595     Node *add  = cmp->in(1);
2596     Node *opaq;
2597     if( add->Opcode() == Op_Opaque1 ) {
2598       opaq = add;
2599     } else {
2600       assert( add->Opcode() == Op_AddI || add->Opcode() == Op_ConI , "" );
2601       assert( add == init, "" );
2602       opaq = cmp->in(2);
2603     }
2604     assert( opaq->Opcode() == Op_Opaque1, "" );
2605 
2606   }
2607 
2608   if (_child != NULL)  _child->verify_tree(loop->_child, this);
2609   if (_next  != NULL)  _next ->verify_tree(loop->_next,  parent);
2610   // Innermost loops need to verify loop bodies,
2611   // but only if no 'major_progress'
2612   int fail = 0;
2613   if (!Compile::current()->major_progress() && _child == NULL) {
2614     for( uint i = 0; i < _body.size(); i++ ) {
2615       Node *n = _body.at(i);
2616       if (n->outcnt() == 0)  continue; // Ignore dead
2617       uint j;
2618       for( j = 0; j < loop->_body.size(); j++ )
2619         if( loop->_body.at(j) == n )
2620           break;
2621       if( j == loop->_body.size() ) { // Not found in loop body
2622         // Last ditch effort to avoid assertion: Its possible that we
2623         // have some users (so outcnt not zero) but are still dead.
2624         // Try to find from root.
2625         if (Compile::current()->root()->find(n->_idx)) {
2626           fail++;
2627           tty->print("We have that verify does not: ");
2628           n->dump();
2629         }
2630       }
2631     }
2632     for( uint i2 = 0; i2 < loop->_body.size(); i2++ ) {
2633       Node *n = loop->_body.at(i2);
2634       if (n->outcnt() == 0)  continue; // Ignore dead
2635       uint j;
2636       for( j = 0; j < _body.size(); j++ )
2637         if( _body.at(j) == n )
2638           break;
2639       if( j == _body.size() ) { // Not found in loop body
2640         // Last ditch effort to avoid assertion: Its possible that we
2641         // have some users (so outcnt not zero) but are still dead.
2642         // Try to find from root.
2643         if (Compile::current()->root()->find(n->_idx)) {
2644           fail++;
2645           tty->print("Verify has that we do not: ");
2646           n->dump();
2647         }
2648       }
2649     }
2650     assert( !fail, "loop body mismatch" );
2651   }
2652 }
2653 
2654 #endif
2655 
2656 //------------------------------set_idom---------------------------------------
2657 void PhaseIdealLoop::set_idom(Node* d, Node* n, uint dom_depth) {
2658   uint idx = d->_idx;
2659   if (idx >= _idom_size) {
2660     uint newsize = _idom_size<<1;
2661     while( idx >= newsize ) {
2662       newsize <<= 1;
2663     }
2664     _idom      = REALLOC_RESOURCE_ARRAY( Node*,     _idom,_idom_size,newsize);
2665     _dom_depth = REALLOC_RESOURCE_ARRAY( uint, _dom_depth,_idom_size,newsize);
2666     memset( _dom_depth + _idom_size, 0, (newsize - _idom_size) * sizeof(uint) );
2667     _idom_size = newsize;
2668   }
2669   _idom[idx] = n;
2670   _dom_depth[idx] = dom_depth;
2671 }
2672 
2673 //------------------------------recompute_dom_depth---------------------------------------
2674 // The dominator tree is constructed with only parent pointers.
2675 // This recomputes the depth in the tree by first tagging all
2676 // nodes as "no depth yet" marker.  The next pass then runs up
2677 // the dom tree from each node marked "no depth yet", and computes
2678 // the depth on the way back down.
2679 void PhaseIdealLoop::recompute_dom_depth() {
2680   uint no_depth_marker = C->unique();
2681   uint i;
2682   // Initialize depth to "no depth yet"
2683   for (i = 0; i < _idom_size; i++) {
2684     if (_dom_depth[i] > 0 && _idom[i] != NULL) {
2685      _dom_depth[i] = no_depth_marker;
2686     }
2687   }
2688   if (_dom_stk == NULL) {
2689     uint init_size = C->unique() / 100; // Guess that 1/100 is a reasonable initial size.
2690     if (init_size < 10) init_size = 10;
2691     _dom_stk = new GrowableArray<uint>(init_size);
2692   }
2693   // Compute new depth for each node.
2694   for (i = 0; i < _idom_size; i++) {
2695     uint j = i;
2696     // Run up the dom tree to find a node with a depth
2697     while (_dom_depth[j] == no_depth_marker) {
2698       _dom_stk->push(j);
2699       j = _idom[j]->_idx;
2700     }
2701     // Compute the depth on the way back down this tree branch
2702     uint dd = _dom_depth[j] + 1;
2703     while (_dom_stk->length() > 0) {
2704       uint j = _dom_stk->pop();
2705       _dom_depth[j] = dd;
2706       dd++;
2707     }
2708   }
2709 }
2710 
2711 //------------------------------sort-------------------------------------------
2712 // Insert 'loop' into the existing loop tree.  'innermost' is a leaf of the
2713 // loop tree, not the root.
2714 IdealLoopTree *PhaseIdealLoop::sort( IdealLoopTree *loop, IdealLoopTree *innermost ) {
2715   if( !innermost ) return loop; // New innermost loop
2716 
2717   int loop_preorder = get_preorder(loop->_head); // Cache pre-order number
2718   assert( loop_preorder, "not yet post-walked loop" );
2719   IdealLoopTree **pp = &innermost;      // Pointer to previous next-pointer
2720   IdealLoopTree *l = *pp;               // Do I go before or after 'l'?
2721 
2722   // Insert at start of list
2723   while( l ) {                  // Insertion sort based on pre-order
2724     if( l == loop ) return innermost; // Already on list!
2725     int l_preorder = get_preorder(l->_head); // Cache pre-order number
2726     assert( l_preorder, "not yet post-walked l" );
2727     // Check header pre-order number to figure proper nesting
2728     if( loop_preorder > l_preorder )
2729       break;                    // End of insertion
2730     // If headers tie (e.g., shared headers) check tail pre-order numbers.
2731     // Since I split shared headers, you'd think this could not happen.
2732     // BUT: I must first do the preorder numbering before I can discover I
2733     // have shared headers, so the split headers all get the same preorder
2734     // number as the RegionNode they split from.
2735     if( loop_preorder == l_preorder &&
2736         get_preorder(loop->_tail) < get_preorder(l->_tail) )
2737       break;                    // Also check for shared headers (same pre#)
2738     pp = &l->_parent;           // Chain up list
2739     l = *pp;
2740   }
2741   // Link into list
2742   // Point predecessor to me
2743   *pp = loop;
2744   // Point me to successor
2745   IdealLoopTree *p = loop->_parent;
2746   loop->_parent = l;            // Point me to successor
2747   if( p ) sort( p, innermost ); // Insert my parents into list as well
2748   return innermost;
2749 }
2750 
2751 //------------------------------build_loop_tree--------------------------------
2752 // I use a modified Vick/Tarjan algorithm.  I need pre- and a post- visit
2753 // bits.  The _nodes[] array is mapped by Node index and holds a NULL for
2754 // not-yet-pre-walked, pre-order # for pre-but-not-post-walked and holds the
2755 // tightest enclosing IdealLoopTree for post-walked.
2756 //
2757 // During my forward walk I do a short 1-layer lookahead to see if I can find
2758 // a loop backedge with that doesn't have any work on the backedge.  This
2759 // helps me construct nested loops with shared headers better.
2760 //
2761 // Once I've done the forward recursion, I do the post-work.  For each child
2762 // I check to see if there is a backedge.  Backedges define a loop!  I
2763 // insert an IdealLoopTree at the target of the backedge.
2764 //
2765 // During the post-work I also check to see if I have several children
2766 // belonging to different loops.  If so, then this Node is a decision point
2767 // where control flow can choose to change loop nests.  It is at this
2768 // decision point where I can figure out how loops are nested.  At this
2769 // time I can properly order the different loop nests from my children.
2770 // Note that there may not be any backedges at the decision point!
2771 //
2772 // Since the decision point can be far removed from the backedges, I can't
2773 // order my loops at the time I discover them.  Thus at the decision point
2774 // I need to inspect loop header pre-order numbers to properly nest my
2775 // loops.  This means I need to sort my childrens' loops by pre-order.
2776 // The sort is of size number-of-control-children, which generally limits
2777 // it to size 2 (i.e., I just choose between my 2 target loops).
2778 void PhaseIdealLoop::build_loop_tree() {
2779   // Allocate stack of size C->unique()/2 to avoid frequent realloc
2780   GrowableArray <Node *> bltstack(C->unique() >> 1);
2781   Node *n = C->root();
2782   bltstack.push(n);
2783   int pre_order = 1;
2784   int stack_size;
2785 
2786   while ( ( stack_size = bltstack.length() ) != 0 ) {
2787     n = bltstack.top(); // Leave node on stack
2788     if ( !is_visited(n) ) {
2789       // ---- Pre-pass Work ----
2790       // Pre-walked but not post-walked nodes need a pre_order number.
2791 
2792       set_preorder_visited( n, pre_order ); // set as visited
2793 
2794       // ---- Scan over children ----
2795       // Scan first over control projections that lead to loop headers.
2796       // This helps us find inner-to-outer loops with shared headers better.
2797 
2798       // Scan children's children for loop headers.
2799       for ( int i = n->outcnt() - 1; i >= 0; --i ) {
2800         Node* m = n->raw_out(i);       // Child
2801         if( m->is_CFG() && !is_visited(m) ) { // Only for CFG children
2802           // Scan over children's children to find loop
2803           for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) {
2804             Node* l = m->fast_out(j);
2805             if( is_visited(l) &&       // Been visited?
2806                 !is_postvisited(l) &&  // But not post-visited
2807                 get_preorder(l) < pre_order ) { // And smaller pre-order
2808               // Found!  Scan the DFS down this path before doing other paths
2809               bltstack.push(m);
2810               break;
2811             }
2812           }
2813         }
2814       }
2815       pre_order++;
2816     }
2817     else if ( !is_postvisited(n) ) {
2818       // Note: build_loop_tree_impl() adds out edges on rare occasions,
2819       // such as com.sun.rsasign.am::a.
2820       // For non-recursive version, first, process current children.
2821       // On next iteration, check if additional children were added.
2822       for ( int k = n->outcnt() - 1; k >= 0; --k ) {
2823         Node* u = n->raw_out(k);
2824         if ( u->is_CFG() && !is_visited(u) ) {
2825           bltstack.push(u);
2826         }
2827       }
2828       if ( bltstack.length() == stack_size ) {
2829         // There were no additional children, post visit node now
2830         (void)bltstack.pop(); // Remove node from stack
2831         pre_order = build_loop_tree_impl( n, pre_order );
2832         // Check for bailout
2833         if (C->failing()) {
2834           return;
2835         }
2836         // Check to grow _preorders[] array for the case when
2837         // build_loop_tree_impl() adds new nodes.
2838         check_grow_preorders();
2839       }
2840     }
2841     else {
2842       (void)bltstack.pop(); // Remove post-visited node from stack
2843     }
2844   }
2845 }
2846 
2847 //------------------------------build_loop_tree_impl---------------------------
2848 int PhaseIdealLoop::build_loop_tree_impl( Node *n, int pre_order ) {
2849   // ---- Post-pass Work ----
2850   // Pre-walked but not post-walked nodes need a pre_order number.
2851 
2852   // Tightest enclosing loop for this Node
2853   IdealLoopTree *innermost = NULL;
2854 
2855   // For all children, see if any edge is a backedge.  If so, make a loop
2856   // for it.  Then find the tightest enclosing loop for the self Node.
2857   for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
2858     Node* m = n->fast_out(i);   // Child
2859     if( n == m ) continue;      // Ignore control self-cycles
2860     if( !m->is_CFG() ) continue;// Ignore non-CFG edges
2861 
2862     IdealLoopTree *l;           // Child's loop
2863     if( !is_postvisited(m) ) {  // Child visited but not post-visited?
2864       // Found a backedge
2865       assert( get_preorder(m) < pre_order, "should be backedge" );
2866       // Check for the RootNode, which is already a LoopNode and is allowed
2867       // to have multiple "backedges".
2868       if( m == C->root()) {     // Found the root?
2869         l = _ltree_root;        // Root is the outermost LoopNode
2870       } else {                  // Else found a nested loop
2871         // Insert a LoopNode to mark this loop.
2872         l = new IdealLoopTree(this, m, n);
2873       } // End of Else found a nested loop
2874       if( !has_loop(m) )        // If 'm' does not already have a loop set
2875         set_loop(m, l);         // Set loop header to loop now
2876 
2877     } else {                    // Else not a nested loop
2878       if( !_nodes[m->_idx] ) continue; // Dead code has no loop
2879       l = get_loop(m);          // Get previously determined loop
2880       // If successor is header of a loop (nest), move up-loop till it
2881       // is a member of some outer enclosing loop.  Since there are no
2882       // shared headers (I've split them already) I only need to go up
2883       // at most 1 level.
2884       while( l && l->_head == m ) // Successor heads loop?
2885         l = l->_parent;         // Move up 1 for me
2886       // If this loop is not properly parented, then this loop
2887       // has no exit path out, i.e. its an infinite loop.
2888       if( !l ) {
2889         // Make loop "reachable" from root so the CFG is reachable.  Basically
2890         // insert a bogus loop exit that is never taken.  'm', the loop head,
2891         // points to 'n', one (of possibly many) fall-in paths.  There may be
2892         // many backedges as well.
2893 
2894         // Here I set the loop to be the root loop.  I could have, after
2895         // inserting a bogus loop exit, restarted the recursion and found my
2896         // new loop exit.  This would make the infinite loop a first-class
2897         // loop and it would then get properly optimized.  What's the use of
2898         // optimizing an infinite loop?
2899         l = _ltree_root;        // Oops, found infinite loop
2900 
2901         if (!_verify_only) {
2902           // Insert the NeverBranch between 'm' and it's control user.
2903           NeverBranchNode *iff = new (C) NeverBranchNode( m );
2904           _igvn.register_new_node_with_optimizer(iff);
2905           set_loop(iff, l);
2906           Node *if_t = new (C) CProjNode( iff, 0 );
2907           _igvn.register_new_node_with_optimizer(if_t);
2908           set_loop(if_t, l);
2909 
2910           Node* cfg = NULL;       // Find the One True Control User of m
2911           for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) {
2912             Node* x = m->fast_out(j);
2913             if (x->is_CFG() && x != m && x != iff)
2914               { cfg = x; break; }
2915           }
2916           assert(cfg != NULL, "must find the control user of m");
2917           uint k = 0;             // Probably cfg->in(0)
2918           while( cfg->in(k) != m ) k++; // But check incase cfg is a Region
2919           cfg->set_req( k, if_t ); // Now point to NeverBranch
2920 
2921           // Now create the never-taken loop exit
2922           Node *if_f = new (C) CProjNode( iff, 1 );
2923           _igvn.register_new_node_with_optimizer(if_f);
2924           set_loop(if_f, l);
2925           // Find frame ptr for Halt.  Relies on the optimizer
2926           // V-N'ing.  Easier and quicker than searching through
2927           // the program structure.
2928           Node *frame = new (C) ParmNode( C->start(), TypeFunc::FramePtr );
2929           _igvn.register_new_node_with_optimizer(frame);
2930           // Halt & Catch Fire
2931           Node *halt = new (C) HaltNode( if_f, frame );
2932           _igvn.register_new_node_with_optimizer(halt);
2933           set_loop(halt, l);
2934           C->root()->add_req(halt);
2935         }
2936         set_loop(C->root(), _ltree_root);
2937       }
2938     }
2939     // Weeny check for irreducible.  This child was already visited (this
2940     // IS the post-work phase).  Is this child's loop header post-visited
2941     // as well?  If so, then I found another entry into the loop.
2942     if (!_verify_only) {
2943       while( is_postvisited(l->_head) ) {
2944         // found irreducible
2945         l->_irreducible = 1; // = true
2946         l = l->_parent;
2947         _has_irreducible_loops = true;
2948         // Check for bad CFG here to prevent crash, and bailout of compile
2949         if (l == NULL) {
2950           C->record_method_not_compilable("unhandled CFG detected during loop optimization");
2951           return pre_order;
2952         }
2953       }
2954       C->set_has_irreducible_loop(_has_irreducible_loops);
2955     }
2956 
2957     // This Node might be a decision point for loops.  It is only if
2958     // it's children belong to several different loops.  The sort call
2959     // does a trivial amount of work if there is only 1 child or all
2960     // children belong to the same loop.  If however, the children
2961     // belong to different loops, the sort call will properly set the
2962     // _parent pointers to show how the loops nest.
2963     //
2964     // In any case, it returns the tightest enclosing loop.
2965     innermost = sort( l, innermost );
2966   }
2967 
2968   // Def-use info will have some dead stuff; dead stuff will have no
2969   // loop decided on.
2970 
2971   // Am I a loop header?  If so fix up my parent's child and next ptrs.
2972   if( innermost && innermost->_head == n ) {
2973     assert( get_loop(n) == innermost, "" );
2974     IdealLoopTree *p = innermost->_parent;
2975     IdealLoopTree *l = innermost;
2976     while( p && l->_head == n ) {
2977       l->_next = p->_child;     // Put self on parents 'next child'
2978       p->_child = l;            // Make self as first child of parent
2979       l = p;                    // Now walk up the parent chain
2980       p = l->_parent;
2981     }
2982   } else {
2983     // Note that it is possible for a LoopNode to reach here, if the
2984     // backedge has been made unreachable (hence the LoopNode no longer
2985     // denotes a Loop, and will eventually be removed).
2986 
2987     // Record tightest enclosing loop for self.  Mark as post-visited.
2988     set_loop(n, innermost);
2989     // Also record has_call flag early on
2990     if( innermost ) {
2991       if( n->is_Call() && !n->is_CallLeaf() && !n->is_macro() ) {
2992         // Do not count uncommon calls
2993         if( !n->is_CallStaticJava() || !n->as_CallStaticJava()->_name ) {
2994           Node *iff = n->in(0)->in(0);
2995           // No any calls for vectorized loops.
2996           if( UseSuperWord || !iff->is_If() ||
2997               (n->in(0)->Opcode() == Op_IfFalse &&
2998                (1.0 - iff->as_If()->_prob) >= 0.01) ||
2999               (iff->as_If()->_prob >= 0.01) )
3000             innermost->_has_call = 1;
3001         }
3002       } else if( n->is_Allocate() && n->as_Allocate()->_is_scalar_replaceable ) {
3003         // Disable loop optimizations if the loop has a scalar replaceable
3004         // allocation. This disabling may cause a potential performance lost
3005         // if the allocation is not eliminated for some reason.
3006         innermost->_allow_optimizations = false;
3007         innermost->_has_call = 1; // = true
3008       } else if (n->Opcode() == Op_SafePoint) {
3009         // Record all safepoints in this loop.
3010         if (innermost->_safepts == NULL) innermost->_safepts = new Node_List();
3011         innermost->_safepts->push(n);
3012       }
3013     }
3014   }
3015 
3016   // Flag as post-visited now
3017   set_postvisited(n);
3018   return pre_order;
3019 }
3020 
3021 
3022 //------------------------------build_loop_early-------------------------------
3023 // Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping.
3024 // First pass computes the earliest controlling node possible.  This is the
3025 // controlling input with the deepest dominating depth.
3026 void PhaseIdealLoop::build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ) {
3027   while (worklist.size() != 0) {
3028     // Use local variables nstack_top_n & nstack_top_i to cache values
3029     // on nstack's top.
3030     Node *nstack_top_n = worklist.pop();
3031     uint  nstack_top_i = 0;
3032 //while_nstack_nonempty:
3033     while (true) {
3034       // Get parent node and next input's index from stack's top.
3035       Node  *n = nstack_top_n;
3036       uint   i = nstack_top_i;
3037       uint cnt = n->req(); // Count of inputs
3038       if (i == 0) {        // Pre-process the node.
3039         if( has_node(n) &&            // Have either loop or control already?
3040             !has_ctrl(n) ) {          // Have loop picked out already?
3041           // During "merge_many_backedges" we fold up several nested loops
3042           // into a single loop.  This makes the members of the original
3043           // loop bodies pointing to dead loops; they need to move up
3044           // to the new UNION'd larger loop.  I set the _head field of these
3045           // dead loops to NULL and the _parent field points to the owning
3046           // loop.  Shades of UNION-FIND algorithm.
3047           IdealLoopTree *ilt;
3048           while( !(ilt = get_loop(n))->_head ) {
3049             // Normally I would use a set_loop here.  But in this one special
3050             // case, it is legal (and expected) to change what loop a Node
3051             // belongs to.
3052             _nodes.map(n->_idx, (Node*)(ilt->_parent) );
3053           }
3054           // Remove safepoints ONLY if I've already seen I don't need one.
3055           // (the old code here would yank a 2nd safepoint after seeing a
3056           // first one, even though the 1st did not dominate in the loop body
3057           // and thus could be avoided indefinitely)
3058           if( !_verify_only && !_verify_me && ilt->_has_sfpt && n->Opcode() == Op_SafePoint &&
3059               is_deleteable_safept(n)) {
3060             Node *in = n->in(TypeFunc::Control);
3061             lazy_replace(n,in);       // Pull safepoint now
3062             if (ilt->_safepts != NULL) {
3063               ilt->_safepts->yank(n);
3064             }
3065             // Carry on with the recursion "as if" we are walking
3066             // only the control input
3067             if( !visited.test_set( in->_idx ) ) {
3068               worklist.push(in);      // Visit this guy later, using worklist
3069             }
3070             // Get next node from nstack:
3071             // - skip n's inputs processing by setting i > cnt;
3072             // - we also will not call set_early_ctrl(n) since
3073             //   has_node(n) == true (see the condition above).
3074             i = cnt + 1;
3075           }
3076         }
3077       } // if (i == 0)
3078 
3079       // Visit all inputs
3080       bool done = true;       // Assume all n's inputs will be processed
3081       while (i < cnt) {
3082         Node *in = n->in(i);
3083         ++i;
3084         if (in == NULL) continue;
3085         if (in->pinned() && !in->is_CFG())
3086           set_ctrl(in, in->in(0));
3087         int is_visited = visited.test_set( in->_idx );
3088         if (!has_node(in)) {  // No controlling input yet?
3089           assert( !in->is_CFG(), "CFG Node with no controlling input?" );
3090           assert( !is_visited, "visit only once" );
3091           nstack.push(n, i);  // Save parent node and next input's index.
3092           nstack_top_n = in;  // Process current input now.
3093           nstack_top_i = 0;
3094           done = false;       // Not all n's inputs processed.
3095           break; // continue while_nstack_nonempty;
3096         } else if (!is_visited) {
3097           // This guy has a location picked out for him, but has not yet
3098           // been visited.  Happens to all CFG nodes, for instance.
3099           // Visit him using the worklist instead of recursion, to break
3100           // cycles.  Since he has a location already we do not need to
3101           // find his location before proceeding with the current Node.
3102           worklist.push(in);  // Visit this guy later, using worklist
3103         }
3104       }
3105       if (done) {
3106         // All of n's inputs have been processed, complete post-processing.
3107 
3108         // Compute earliest point this Node can go.
3109         // CFG, Phi, pinned nodes already know their controlling input.
3110         if (!has_node(n)) {
3111           // Record earliest legal location
3112           set_early_ctrl( n );
3113         }
3114         if (nstack.is_empty()) {
3115           // Finished all nodes on stack.
3116           // Process next node on the worklist.
3117           break;
3118         }
3119         // Get saved parent node and next input's index.
3120         nstack_top_n = nstack.node();
3121         nstack_top_i = nstack.index();
3122         nstack.pop();
3123       }
3124     } // while (true)
3125   }
3126 }
3127 
3128 //------------------------------dom_lca_internal--------------------------------
3129 // Pair-wise LCA
3130 Node *PhaseIdealLoop::dom_lca_internal( Node *n1, Node *n2 ) const {
3131   if( !n1 ) return n2;          // Handle NULL original LCA
3132   assert( n1->is_CFG(), "" );
3133   assert( n2->is_CFG(), "" );
3134   // find LCA of all uses
3135   uint d1 = dom_depth(n1);
3136   uint d2 = dom_depth(n2);
3137   while (n1 != n2) {
3138     if (d1 > d2) {
3139       n1 =      idom(n1);
3140       d1 = dom_depth(n1);
3141     } else if (d1 < d2) {
3142       n2 =      idom(n2);
3143       d2 = dom_depth(n2);
3144     } else {
3145       // Here d1 == d2.  Due to edits of the dominator-tree, sections
3146       // of the tree might have the same depth.  These sections have
3147       // to be searched more carefully.
3148 
3149       // Scan up all the n1's with equal depth, looking for n2.
3150       Node *t1 = idom(n1);
3151       while (dom_depth(t1) == d1) {
3152         if (t1 == n2)  return n2;
3153         t1 = idom(t1);
3154       }
3155       // Scan up all the n2's with equal depth, looking for n1.
3156       Node *t2 = idom(n2);
3157       while (dom_depth(t2) == d2) {
3158         if (t2 == n1)  return n1;
3159         t2 = idom(t2);
3160       }
3161       // Move up to a new dominator-depth value as well as up the dom-tree.
3162       n1 = t1;
3163       n2 = t2;
3164       d1 = dom_depth(n1);
3165       d2 = dom_depth(n2);
3166     }
3167   }
3168   return n1;
3169 }
3170 
3171 //------------------------------compute_idom-----------------------------------
3172 // Locally compute IDOM using dom_lca call.  Correct only if the incoming
3173 // IDOMs are correct.
3174 Node *PhaseIdealLoop::compute_idom( Node *region ) const {
3175   assert( region->is_Region(), "" );
3176   Node *LCA = NULL;
3177   for( uint i = 1; i < region->req(); i++ ) {
3178     if( region->in(i) != C->top() )
3179       LCA = dom_lca( LCA, region->in(i) );
3180   }
3181   return LCA;
3182 }
3183 
3184 bool PhaseIdealLoop::verify_dominance(Node* n, Node* use, Node* LCA, Node* early) {
3185   bool had_error = false;
3186 #ifdef ASSERT
3187   if (early != C->root()) {
3188     // Make sure that there's a dominance path from LCA to early
3189     Node* d = LCA;
3190     while (d != early) {
3191       if (d == C->root()) {
3192         dump_bad_graph("Bad graph detected in compute_lca_of_uses", n, early, LCA);
3193         tty->print_cr("*** Use %d isn't dominated by def %d ***", use->_idx, n->_idx);
3194         had_error = true;
3195         break;
3196       }
3197       d = idom(d);
3198     }
3199   }
3200 #endif
3201   return had_error;
3202 }
3203 
3204 
3205 Node* PhaseIdealLoop::compute_lca_of_uses(Node* n, Node* early, bool verify) {
3206   // Compute LCA over list of uses
3207   bool had_error = false;
3208   Node *LCA = NULL;
3209   for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax && LCA != early; i++) {
3210     Node* c = n->fast_out(i);
3211     if (_nodes[c->_idx] == NULL)
3212       continue;                 // Skip the occasional dead node
3213     if( c->is_Phi() ) {         // For Phis, we must land above on the path
3214       for( uint j=1; j<c->req(); j++ ) {// For all inputs
3215         if( c->in(j) == n ) {   // Found matching input?
3216           Node *use = c->in(0)->in(j);
3217           if (_verify_only && use->is_top()) continue;
3218           LCA = dom_lca_for_get_late_ctrl( LCA, use, n );
3219           if (verify) had_error = verify_dominance(n, use, LCA, early) || had_error;
3220         }
3221       }
3222     } else {
3223       // For CFG data-users, use is in the block just prior
3224       Node *use = has_ctrl(c) ? get_ctrl(c) : c->in(0);
3225       LCA = dom_lca_for_get_late_ctrl( LCA, use, n );
3226       if (verify) had_error = verify_dominance(n, use, LCA, early) || had_error;
3227     }
3228   }
3229   assert(!had_error, "bad dominance");
3230   return LCA;
3231 }
3232 
3233 //------------------------------get_late_ctrl----------------------------------
3234 // Compute latest legal control.
3235 Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) {
3236   assert(early != NULL, "early control should not be NULL");
3237 
3238   Node* LCA = compute_lca_of_uses(n, early);
3239 #ifdef ASSERT
3240   if (LCA == C->root() && LCA != early) {
3241     // def doesn't dominate uses so print some useful debugging output
3242     compute_lca_of_uses(n, early, true);
3243   }
3244 #endif
3245 
3246   // if this is a load, check for anti-dependent stores
3247   // We use a conservative algorithm to identify potential interfering
3248   // instructions and for rescheduling the load.  The users of the memory
3249   // input of this load are examined.  Any use which is not a load and is
3250   // dominated by early is considered a potentially interfering store.
3251   // This can produce false positives.
3252   if (n->is_Load() && LCA != early) {
3253     Node_List worklist;
3254 
3255     Node *mem = n->in(MemNode::Memory);
3256     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
3257       Node* s = mem->fast_out(i);
3258       worklist.push(s);
3259     }
3260     while(worklist.size() != 0 && LCA != early) {
3261       Node* s = worklist.pop();
3262       if (s->is_Load()) {
3263         continue;
3264       } else if (s->is_MergeMem()) {
3265         for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
3266           Node* s1 = s->fast_out(i);
3267           worklist.push(s1);
3268         }
3269       } else {
3270         Node *sctrl = has_ctrl(s) ? get_ctrl(s) : s->in(0);
3271         assert(sctrl != NULL || s->outcnt() == 0, "must have control");
3272         if (sctrl != NULL && !sctrl->is_top() && is_dominator(early, sctrl)) {
3273           LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n);
3274         }
3275       }
3276     }
3277   }
3278 
3279   assert(LCA == find_non_split_ctrl(LCA), "unexpected late control");
3280   return LCA;
3281 }
3282 
3283 // true if CFG node d dominates CFG node n
3284 bool PhaseIdealLoop::is_dominator(Node *d, Node *n) {
3285   if (d == n)
3286     return true;
3287   assert(d->is_CFG() && n->is_CFG(), "must have CFG nodes");
3288   uint dd = dom_depth(d);
3289   while (dom_depth(n) >= dd) {
3290     if (n == d)
3291       return true;
3292     n = idom(n);
3293   }
3294   return false;
3295 }
3296 
3297 //------------------------------dom_lca_for_get_late_ctrl_internal-------------
3298 // Pair-wise LCA with tags.
3299 // Tag each index with the node 'tag' currently being processed
3300 // before advancing up the dominator chain using idom().
3301 // Later calls that find a match to 'tag' know that this path has already
3302 // been considered in the current LCA (which is input 'n1' by convention).
3303 // Since get_late_ctrl() is only called once for each node, the tag array
3304 // does not need to be cleared between calls to get_late_ctrl().
3305 // Algorithm trades a larger constant factor for better asymptotic behavior
3306 //
3307 Node *PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal( Node *n1, Node *n2, Node *tag ) {
3308   uint d1 = dom_depth(n1);
3309   uint d2 = dom_depth(n2);
3310 
3311   do {
3312     if (d1 > d2) {
3313       // current lca is deeper than n2
3314       _dom_lca_tags.map(n1->_idx, tag);
3315       n1 =      idom(n1);
3316       d1 = dom_depth(n1);
3317     } else if (d1 < d2) {
3318       // n2 is deeper than current lca
3319       Node *memo = _dom_lca_tags[n2->_idx];
3320       if( memo == tag ) {
3321         return n1;    // Return the current LCA
3322       }
3323       _dom_lca_tags.map(n2->_idx, tag);
3324       n2 =      idom(n2);
3325       d2 = dom_depth(n2);
3326     } else {
3327       // Here d1 == d2.  Due to edits of the dominator-tree, sections
3328       // of the tree might have the same depth.  These sections have
3329       // to be searched more carefully.
3330 
3331       // Scan up all the n1's with equal depth, looking for n2.
3332       _dom_lca_tags.map(n1->_idx, tag);
3333       Node *t1 = idom(n1);
3334       while (dom_depth(t1) == d1) {
3335         if (t1 == n2)  return n2;
3336         _dom_lca_tags.map(t1->_idx, tag);
3337         t1 = idom(t1);
3338       }
3339       // Scan up all the n2's with equal depth, looking for n1.
3340       _dom_lca_tags.map(n2->_idx, tag);
3341       Node *t2 = idom(n2);
3342       while (dom_depth(t2) == d2) {
3343         if (t2 == n1)  return n1;
3344         _dom_lca_tags.map(t2->_idx, tag);
3345         t2 = idom(t2);
3346       }
3347       // Move up to a new dominator-depth value as well as up the dom-tree.
3348       n1 = t1;
3349       n2 = t2;
3350       d1 = dom_depth(n1);
3351       d2 = dom_depth(n2);
3352     }
3353   } while (n1 != n2);
3354   return n1;
3355 }
3356 
3357 //------------------------------init_dom_lca_tags------------------------------
3358 // Tag could be a node's integer index, 32bits instead of 64bits in some cases
3359 // Intended use does not involve any growth for the array, so it could
3360 // be of fixed size.
3361 void PhaseIdealLoop::init_dom_lca_tags() {
3362   uint limit = C->unique() + 1;
3363   _dom_lca_tags.map( limit, NULL );
3364 #ifdef ASSERT
3365   for( uint i = 0; i < limit; ++i ) {
3366     assert(_dom_lca_tags[i] == NULL, "Must be distinct from each node pointer");
3367   }
3368 #endif // ASSERT
3369 }
3370 
3371 //------------------------------clear_dom_lca_tags------------------------------
3372 // Tag could be a node's integer index, 32bits instead of 64bits in some cases
3373 // Intended use does not involve any growth for the array, so it could
3374 // be of fixed size.
3375 void PhaseIdealLoop::clear_dom_lca_tags() {
3376   uint limit = C->unique() + 1;
3377   _dom_lca_tags.map( limit, NULL );
3378   _dom_lca_tags.clear();
3379 #ifdef ASSERT
3380   for( uint i = 0; i < limit; ++i ) {
3381     assert(_dom_lca_tags[i] == NULL, "Must be distinct from each node pointer");
3382   }
3383 #endif // ASSERT
3384 }
3385 
3386 //------------------------------build_loop_late--------------------------------
3387 // Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping.
3388 // Second pass finds latest legal placement, and ideal loop placement.
3389 void PhaseIdealLoop::build_loop_late( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ) {
3390   while (worklist.size() != 0) {
3391     Node *n = worklist.pop();
3392     // Only visit once
3393     if (visited.test_set(n->_idx)) continue;
3394     uint cnt = n->outcnt();
3395     uint   i = 0;
3396     while (true) {
3397       assert( _nodes[n->_idx], "no dead nodes" );
3398       // Visit all children
3399       if (i < cnt) {
3400         Node* use = n->raw_out(i);
3401         ++i;
3402         // Check for dead uses.  Aggressively prune such junk.  It might be
3403         // dead in the global sense, but still have local uses so I cannot
3404         // easily call 'remove_dead_node'.
3405         if( _nodes[use->_idx] != NULL || use->is_top() ) { // Not dead?
3406           // Due to cycles, we might not hit the same fixed point in the verify
3407           // pass as we do in the regular pass.  Instead, visit such phis as
3408           // simple uses of the loop head.
3409           if( use->in(0) && (use->is_CFG() || use->is_Phi()) ) {
3410             if( !visited.test(use->_idx) )
3411               worklist.push(use);
3412           } else if( !visited.test_set(use->_idx) ) {
3413             nstack.push(n, i); // Save parent and next use's index.
3414             n   = use;         // Process all children of current use.
3415             cnt = use->outcnt();
3416             i   = 0;
3417           }
3418         } else {
3419           // Do not visit around the backedge of loops via data edges.
3420           // push dead code onto a worklist
3421           _deadlist.push(use);
3422         }
3423       } else {
3424         // All of n's children have been processed, complete post-processing.
3425         build_loop_late_post(n);
3426         if (nstack.is_empty()) {
3427           // Finished all nodes on stack.
3428           // Process next node on the worklist.
3429           break;
3430         }
3431         // Get saved parent node and next use's index. Visit the rest of uses.
3432         n   = nstack.node();
3433         cnt = n->outcnt();
3434         i   = nstack.index();
3435         nstack.pop();
3436       }
3437     }
3438   }
3439 }
3440 
3441 //------------------------------build_loop_late_post---------------------------
3442 // Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping.
3443 // Second pass finds latest legal placement, and ideal loop placement.
3444 void PhaseIdealLoop::build_loop_late_post( Node *n ) {
3445 
3446   if (n->req() == 2 && n->Opcode() == Op_ConvI2L && !C->major_progress() && !_verify_only) {
3447     _igvn._worklist.push(n);  // Maybe we'll normalize it, if no more loops.
3448   }
3449 
3450 #ifdef ASSERT
3451   if (_verify_only && !n->is_CFG()) {
3452     // Check def-use domination.
3453     compute_lca_of_uses(n, get_ctrl(n), true /* verify */);
3454   }
3455 #endif
3456 
3457   // CFG and pinned nodes already handled
3458   if( n->in(0) ) {
3459     if( n->in(0)->is_top() ) return; // Dead?
3460 
3461     // We'd like +VerifyLoopOptimizations to not believe that Mod's/Loads
3462     // _must_ be pinned (they have to observe their control edge of course).
3463     // Unlike Stores (which modify an unallocable resource, the memory
3464     // state), Mods/Loads can float around.  So free them up.
3465     bool pinned = true;
3466     switch( n->Opcode() ) {
3467     case Op_DivI:
3468     case Op_DivF:
3469     case Op_DivD:
3470     case Op_ModI:
3471     case Op_ModF:
3472     case Op_ModD:
3473     case Op_LoadB:              // Same with Loads; they can sink
3474     case Op_LoadUB:             // during loop optimizations.
3475     case Op_LoadUS:
3476     case Op_LoadD:
3477     case Op_LoadF:
3478     case Op_LoadI:
3479     case Op_LoadKlass:
3480     case Op_LoadNKlass:
3481     case Op_LoadL:
3482     case Op_LoadS:
3483     case Op_LoadP:
3484     case Op_LoadN:
3485     case Op_LoadRange:
3486     case Op_LoadD_unaligned:
3487     case Op_LoadL_unaligned:
3488     case Op_StrComp:            // Does a bunch of load-like effects
3489     case Op_StrEquals:
3490     case Op_StrIndexOf:
3491     case Op_AryEq:
3492       pinned = false;
3493     }
3494     if( pinned ) {
3495       IdealLoopTree *chosen_loop = get_loop(n->is_CFG() ? n : get_ctrl(n));
3496       if( !chosen_loop->_child )       // Inner loop?
3497         chosen_loop->_body.push(n); // Collect inner loops
3498       return;
3499     }
3500   } else {                      // No slot zero
3501     if( n->is_CFG() ) {         // CFG with no slot 0 is dead
3502       _nodes.map(n->_idx,0);    // No block setting, it's globally dead
3503       return;
3504     }
3505     assert(!n->is_CFG() || n->outcnt() == 0, "");
3506   }
3507 
3508   // Do I have a "safe range" I can select over?
3509   Node *early = get_ctrl(n);// Early location already computed
3510 
3511   // Compute latest point this Node can go
3512   Node *LCA = get_late_ctrl( n, early );
3513   // LCA is NULL due to uses being dead
3514   if( LCA == NULL ) {
3515 #ifdef ASSERT
3516     for (DUIterator i1 = n->outs(); n->has_out(i1); i1++) {
3517       assert( _nodes[n->out(i1)->_idx] == NULL, "all uses must also be dead");
3518     }
3519 #endif
3520     _nodes.map(n->_idx, 0);     // This node is useless
3521     _deadlist.push(n);
3522     return;
3523   }
3524   assert(LCA != NULL && !LCA->is_top(), "no dead nodes");
3525 
3526   Node *legal = LCA;            // Walk 'legal' up the IDOM chain
3527   Node *least = legal;          // Best legal position so far
3528   while( early != legal ) {     // While not at earliest legal
3529 #ifdef ASSERT
3530     if (legal->is_Start() && !early->is_Root()) {
3531       // Bad graph. Print idom path and fail.
3532       dump_bad_graph("Bad graph detected in build_loop_late", n, early, LCA);
3533       assert(false, "Bad graph detected in build_loop_late");
3534     }
3535 #endif
3536     // Find least loop nesting depth
3537     legal = idom(legal);        // Bump up the IDOM tree
3538     // Check for lower nesting depth
3539     if( get_loop(legal)->_nest < get_loop(least)->_nest )
3540       least = legal;
3541   }
3542   assert(early == legal || legal != C->root(), "bad dominance of inputs");
3543 
3544   // Try not to place code on a loop entry projection
3545   // which can inhibit range check elimination.
3546   if (least != early) {
3547     Node* ctrl_out = least->unique_ctrl_out();
3548     if (ctrl_out && ctrl_out->is_CountedLoop() &&
3549         least == ctrl_out->in(LoopNode::EntryControl)) {
3550       Node* least_dom = idom(least);
3551       if (get_loop(least_dom)->is_member(get_loop(least))) {
3552         least = least_dom;
3553       }
3554     }
3555   }
3556 
3557 #ifdef ASSERT
3558   // If verifying, verify that 'verify_me' has a legal location
3559   // and choose it as our location.
3560   if( _verify_me ) {
3561     Node *v_ctrl = _verify_me->get_ctrl_no_update(n);
3562     Node *legal = LCA;
3563     while( early != legal ) {   // While not at earliest legal
3564       if( legal == v_ctrl ) break;  // Check for prior good location
3565       legal = idom(legal)      ;// Bump up the IDOM tree
3566     }
3567     // Check for prior good location
3568     if( legal == v_ctrl ) least = legal; // Keep prior if found
3569   }
3570 #endif
3571 
3572   // Assign discovered "here or above" point
3573   least = find_non_split_ctrl(least);
3574   set_ctrl(n, least);
3575 
3576   // Collect inner loop bodies
3577   IdealLoopTree *chosen_loop = get_loop(least);
3578   if( !chosen_loop->_child )   // Inner loop?
3579     chosen_loop->_body.push(n);// Collect inner loops
3580 }
3581 
3582 #ifdef ASSERT
3583 void PhaseIdealLoop::dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA) {
3584   tty->print_cr("%s", msg);
3585   tty->print("n: "); n->dump();
3586   tty->print("early(n): "); early->dump();
3587   if (n->in(0) != NULL  && !n->in(0)->is_top() &&
3588       n->in(0) != early && !n->in(0)->is_Root()) {
3589     tty->print("n->in(0): "); n->in(0)->dump();
3590   }
3591   for (uint i = 1; i < n->req(); i++) {
3592     Node* in1 = n->in(i);
3593     if (in1 != NULL && in1 != n && !in1->is_top()) {
3594       tty->print("n->in(%d): ", i); in1->dump();
3595       Node* in1_early = get_ctrl(in1);
3596       tty->print("early(n->in(%d)): ", i); in1_early->dump();
3597       if (in1->in(0) != NULL     && !in1->in(0)->is_top() &&
3598           in1->in(0) != in1_early && !in1->in(0)->is_Root()) {
3599         tty->print("n->in(%d)->in(0): ", i); in1->in(0)->dump();
3600       }
3601       for (uint j = 1; j < in1->req(); j++) {
3602         Node* in2 = in1->in(j);
3603         if (in2 != NULL && in2 != n && in2 != in1 && !in2->is_top()) {
3604           tty->print("n->in(%d)->in(%d): ", i, j); in2->dump();
3605           Node* in2_early = get_ctrl(in2);
3606           tty->print("early(n->in(%d)->in(%d)): ", i, j); in2_early->dump();
3607           if (in2->in(0) != NULL     && !in2->in(0)->is_top() &&
3608               in2->in(0) != in2_early && !in2->in(0)->is_Root()) {
3609             tty->print("n->in(%d)->in(%d)->in(0): ", i, j); in2->in(0)->dump();
3610           }
3611         }
3612       }
3613     }
3614   }
3615   tty->cr();
3616   tty->print("LCA(n): "); LCA->dump();
3617   for (uint i = 0; i < n->outcnt(); i++) {
3618     Node* u1 = n->raw_out(i);
3619     if (u1 == n)
3620       continue;
3621     tty->print("n->out(%d): ", i); u1->dump();
3622     if (u1->is_CFG()) {
3623       for (uint j = 0; j < u1->outcnt(); j++) {
3624         Node* u2 = u1->raw_out(j);
3625         if (u2 != u1 && u2 != n && u2->is_CFG()) {
3626           tty->print("n->out(%d)->out(%d): ", i, j); u2->dump();
3627         }
3628       }
3629     } else {
3630       Node* u1_later = get_ctrl(u1);
3631       tty->print("later(n->out(%d)): ", i); u1_later->dump();
3632       if (u1->in(0) != NULL     && !u1->in(0)->is_top() &&
3633           u1->in(0) != u1_later && !u1->in(0)->is_Root()) {
3634         tty->print("n->out(%d)->in(0): ", i); u1->in(0)->dump();
3635       }
3636       for (uint j = 0; j < u1->outcnt(); j++) {
3637         Node* u2 = u1->raw_out(j);
3638         if (u2 == n || u2 == u1)
3639           continue;
3640         tty->print("n->out(%d)->out(%d): ", i, j); u2->dump();
3641         if (!u2->is_CFG()) {
3642           Node* u2_later = get_ctrl(u2);
3643           tty->print("later(n->out(%d)->out(%d)): ", i, j); u2_later->dump();
3644           if (u2->in(0) != NULL     && !u2->in(0)->is_top() &&
3645               u2->in(0) != u2_later && !u2->in(0)->is_Root()) {
3646             tty->print("n->out(%d)->in(0): ", i); u2->in(0)->dump();
3647           }
3648         }
3649       }
3650     }
3651   }
3652   tty->cr();
3653   int ct = 0;
3654   Node *dbg_legal = LCA;
3655   while(!dbg_legal->is_Start() && ct < 100) {
3656     tty->print("idom[%d] ",ct); dbg_legal->dump();
3657     ct++;
3658     dbg_legal = idom(dbg_legal);
3659   }
3660   tty->cr();
3661 }
3662 #endif
3663 
3664 #ifndef PRODUCT
3665 //------------------------------dump-------------------------------------------
3666 void PhaseIdealLoop::dump( ) const {
3667   ResourceMark rm;
3668   Arena* arena = Thread::current()->resource_area();
3669   Node_Stack stack(arena, C->unique() >> 2);
3670   Node_List rpo_list;
3671   VectorSet visited(arena);
3672   visited.set(C->top()->_idx);
3673   rpo( C->root(), stack, visited, rpo_list );
3674   // Dump root loop indexed by last element in PO order
3675   dump( _ltree_root, rpo_list.size(), rpo_list );
3676 }
3677 
3678 void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const {
3679   loop->dump_head();
3680 
3681   // Now scan for CFG nodes in the same loop
3682   for( uint j=idx; j > 0;  j-- ) {
3683     Node *n = rpo_list[j-1];
3684     if( !_nodes[n->_idx] )      // Skip dead nodes
3685       continue;
3686     if( get_loop(n) != loop ) { // Wrong loop nest
3687       if( get_loop(n)->_head == n &&    // Found nested loop?
3688           get_loop(n)->_parent == loop )
3689         dump(get_loop(n),rpo_list.size(),rpo_list);     // Print it nested-ly
3690       continue;
3691     }
3692 
3693     // Dump controlling node
3694     for( uint x = 0; x < loop->_nest; x++ )
3695       tty->print("  ");
3696     tty->print("C");
3697     if( n == C->root() ) {
3698       n->dump();
3699     } else {
3700       Node* cached_idom   = idom_no_update(n);
3701       Node *computed_idom = n->in(0);
3702       if( n->is_Region() ) {
3703         computed_idom = compute_idom(n);
3704         // computed_idom() will return n->in(0) when idom(n) is an IfNode (or
3705         // any MultiBranch ctrl node), so apply a similar transform to
3706         // the cached idom returned from idom_no_update.
3707         cached_idom = find_non_split_ctrl(cached_idom);
3708       }
3709       tty->print(" ID:%d",computed_idom->_idx);
3710       n->dump();
3711       if( cached_idom != computed_idom ) {
3712         tty->print_cr("*** BROKEN IDOM!  Computed as: %d, cached as: %d",
3713                       computed_idom->_idx, cached_idom->_idx);
3714       }
3715     }
3716     // Dump nodes it controls
3717     for( uint k = 0; k < _nodes.Size(); k++ ) {
3718       // (k < C->unique() && get_ctrl(find(k)) == n)
3719       if (k < C->unique() && _nodes[k] == (Node*)((intptr_t)n + 1)) {
3720         Node *m = C->root()->find(k);
3721         if( m && m->outcnt() > 0 ) {
3722           if (!(has_ctrl(m) && get_ctrl_no_update(m) == n)) {
3723             tty->print_cr("*** BROKEN CTRL ACCESSOR!  _nodes[k] is %p, ctrl is %p",
3724                           _nodes[k], has_ctrl(m) ? get_ctrl_no_update(m) : NULL);
3725           }
3726           for( uint j = 0; j < loop->_nest; j++ )
3727             tty->print("  ");
3728           tty->print(" ");
3729           m->dump();
3730         }
3731       }
3732     }
3733   }
3734 }
3735 
3736 // Collect a R-P-O for the whole CFG.
3737 // Result list is in post-order (scan backwards for RPO)
3738 void PhaseIdealLoop::rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const {
3739   stk.push(start, 0);
3740   visited.set(start->_idx);
3741 
3742   while (stk.is_nonempty()) {
3743     Node* m   = stk.node();
3744     uint  idx = stk.index();
3745     if (idx < m->outcnt()) {
3746       stk.set_index(idx + 1);
3747       Node* n = m->raw_out(idx);
3748       if (n->is_CFG() && !visited.test_set(n->_idx)) {
3749         stk.push(n, 0);
3750       }
3751     } else {
3752       rpo_list.push(m);
3753       stk.pop();
3754     }
3755   }
3756 }
3757 #endif
3758 
3759 
3760 //=============================================================================
3761 //------------------------------LoopTreeIterator-----------------------------------
3762 
3763 // Advance to next loop tree using a preorder, left-to-right traversal.
3764 void LoopTreeIterator::next() {
3765   assert(!done(), "must not be done.");
3766   if (_curnt->_child != NULL) {
3767     _curnt = _curnt->_child;
3768   } else if (_curnt->_next != NULL) {
3769     _curnt = _curnt->_next;
3770   } else {
3771     while (_curnt != _root && _curnt->_next == NULL) {
3772       _curnt = _curnt->_parent;
3773     }
3774     if (_curnt == _root) {
3775       _curnt = NULL;
3776       assert(done(), "must be done.");
3777     } else {
3778       assert(_curnt->_next != NULL, "must be more to do");
3779       _curnt = _curnt->_next;
3780     }
3781   }
3782 }