hotspot/src/share/vm/opto/loopopts.cpp

Print this page
rev 611 : Merge
   1 #ifdef USE_PRAGMA_IDENT_SRC
   2 #pragma ident "@(#)loopopts.cpp 1.222 08/11/24 12:23:09 JVM"
   3 #endif
   4 /*
   5  * Copyright 1999-2006 Sun Microsystems, Inc.  All Rights Reserved.
   6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   7  *
   8  * This code is free software; you can redistribute it and/or modify it
   9  * under the terms of the GNU General Public License version 2 only, as
  10  * published by the Free Software Foundation.
  11  *
  12  * This code is distributed in the hope that it will be useful, but WITHOUT
  13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15  * version 2 for more details (a copy is included in the LICENSE file that
  16  * accompanied this code).
  17  *
  18  * You should have received a copy of the GNU General Public License version
  19  * 2 along with this work; if not, write to the Free Software Foundation,
  20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  21  *
  22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  23  * CA 95054 USA or visit www.sun.com if you need additional information or
  24  * have any questions.
  25  *  
  26  */
  27 
  28 #include "incls/_precompiled.incl"
  29 #include "incls/_loopopts.cpp.incl"
  30 
  31 //=============================================================================
  32 //------------------------------split_thru_phi---------------------------------
  33 // Split Node 'n' through merge point if there is enough win.
  34 Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
  35   if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::BOTTOM) {
  36     // ConvI2L may have type information on it which is unsafe to push up
  37     // so disable this for now
  38     return NULL;
  39   }
  40   int wins = 0;
  41   assert( !n->is_CFG(), "" );
  42   assert( region->is_Region(), "" );
  43   Node *phi = new (C, region->req()) PhiNode( region, n->bottom_type() );











  44   uint old_unique = C->unique();
  45   for( uint i = 1; i < region->req(); i++ ) {
  46     Node *x;
  47     Node* the_clone = NULL;
  48     if( region->in(i) == C->top() ) {
  49       x = C->top();             // Dead path?  Use a dead data op
  50     } else {
  51       x = n->clone();           // Else clone up the data op
  52       the_clone = x;            // Remember for possible deletion.
  53       // Alter data node to use pre-phi inputs
  54       if( n->in(0) == region )
  55         x->set_req( 0, region->in(i) );
  56       for( uint j = 1; j < n->req(); j++ ) {
  57         Node *in = n->in(j);
  58         if( in->is_Phi() && in->in(0) == region )
  59           x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
  60       }
  61     }
  62     // Check for a 'win' on some paths
  63     const Type *t = x->Value(&_igvn);


  71     // the PhiNode may cause the resulting node to migrate back to a previous
  72     // loop iteration.
  73     if( singleton && t == Type::TOP ) {
  74       // Is_Loop() == false does not confirm the absence of a loop (e.g., an
  75       // irreducible loop may not be indicated by an affirmative is_Loop());
  76       // therefore, the only top we can split thru a phi is on a backedge of
  77       // a loop.
  78       singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
  79     }
  80 
  81     if( singleton ) {
  82       wins++;
  83       x = ((PhaseGVN&)_igvn).makecon(t);
  84     } else {
  85       // We now call Identity to try to simplify the cloned node.
  86       // Note that some Identity methods call phase->type(this).
  87       // Make sure that the type array is big enough for
  88       // our new node, even though we may throw the node away.
  89       // (Note: This tweaking with igvn only works because x is a new node.)
  90       _igvn.set_type(x, t);




  91       Node *y = x->Identity(&_igvn);
  92       if( y != x ) {
  93         wins++;
  94         x = y;
  95       } else {
  96         y = _igvn.hash_find(x);
  97         if( y ) {
  98           wins++;
  99           x = y;
 100         } else {
 101           // Else x is a new node we are keeping
 102           // We do not need register_new_node_with_optimizer
 103           // because set_type has already been called.
 104           _igvn._worklist.push(x);
 105         }
 106       }
 107     }
 108     if (x != the_clone && the_clone != NULL)
 109       _igvn.remove_dead_node(the_clone);
 110     phi->set_req( i, x );


 426   Node *rp = region->in(2);
 427   if( !lp || !rp ) return NULL;
 428   Node *lp_c = lp->in(0);
 429   if( lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If() ) return NULL;
 430   IfNode *iff = lp_c->as_If();
 431 
 432   // Check for highly predictable branch.  No point in CMOV'ing if
 433   // we are going to predict accurately all the time.
 434   // %%% This hides patterns produced by utility methods like Math.min.
 435   if( iff->_prob < PROB_UNLIKELY_MAG(3) ||
 436       iff->_prob > PROB_LIKELY_MAG(3) )
 437     return NULL;
 438 
 439   // Check for ops pinned in an arm of the diamond.
 440   // Can't remove the control flow in this case
 441   if( lp->outcnt() > 1 ) return NULL;
 442   if( rp->outcnt() > 1 ) return NULL;
 443 
 444   // Check profitability
 445   int cost = 0;

 446   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
 447     Node *out = region->fast_out(i);
 448     if( !out->is_Phi() ) continue; // Ignore other control edges, etc

 449     PhiNode* phi = out->as_Phi();
 450     switch (phi->type()->basic_type()) {
 451     case T_LONG:
 452       cost++;                   // Probably encodes as 2 CMOV's
 453     case T_INT:                 // These all CMOV fine
 454     case T_FLOAT:
 455     case T_DOUBLE:
 456     case T_ADDRESS:             // (RawPtr)
 457       cost++;
 458       break;

 459     case T_OBJECT: {            // Base oops are OK, but not derived oops
 460       const TypeOopPtr *tp = phi->type()->isa_oopptr();
 461       // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
 462       // CMOVE'd derived pointer?  It's a CMOVE'd derived base.  Thus
 463       // CMOVE'ing a derived pointer requires we also CMOVE the base.  If we
 464       // have a Phi for the base here that we convert to a CMOVE all is well
 465       // and good.  But if the base is dead, we'll not make a CMOVE.  Later
 466       // the allocator will have to produce a base by creating a CMOVE of the
 467       // relevant bases.  This puts the allocator in the business of
 468       // manufacturing expensive instructions, generally a bad plan.
 469       // Just Say No to Conditionally-Moved Derived Pointers.
 470       if( tp && tp->offset() != 0 )
 471         return NULL;
 472       cost++;
 473       break;
 474     }
 475     default:
 476       return NULL;              // In particular, can't do memory or I/O
 477     }
 478     // Add in cost any speculative ops
 479     for( uint j = 1; j < region->req(); j++ ) {
 480       Node *proj = region->in(j);
 481       Node *inp = phi->in(j);
 482       if (get_ctrl(inp) == proj) { // Found local op
 483         cost++;
 484         // Check for a chain of dependent ops; these will all become
 485         // speculative in a CMOV.
 486         for( uint k = 1; k < inp->req(); k++ )
 487           if (get_ctrl(inp->in(k)) == proj)
 488             return NULL;        // Too much speculative goo
 489       }
 490     }
 491     // See if the Phi is used by a Cmp.  This will likely Split-If, a
 492     // higher-payoff operation.
 493     for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
 494       Node* use = phi->fast_out(k);
 495       if( use->is_Cmp() )
 496         return NULL;
 497     }
 498   }
 499   if( cost >= ConditionalMoveLimit ) return NULL; // Too much goo






 500 
 501   // --------------
 502   // Now replace all Phis with CMOV's
 503   Node *cmov_ctrl = iff->in(0);
 504   uint flip = (lp->Opcode() == Op_IfTrue);
 505   while( 1 ) {
 506     PhiNode* phi = NULL;
 507     for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
 508       Node *out = region->fast_out(i);
 509       if (out->is_Phi()) {
 510         phi = out->as_Phi();
 511         break;
 512       }
 513     }
 514     if (phi == NULL)  break;
 515 #ifndef PRODUCT
 516     if( PrintOpto && VerifyLoopOptimizations ) tty->print_cr("CMOV");
 517 #endif
 518     // Move speculative ops
 519     for( uint j = 1; j < region->req(); j++ ) {


 544 
 545   return iff->in(1);
 546 }
 547 
 548 //------------------------------split_if_with_blocks_pre-----------------------
 549 // Do the real work in a non-recursive function.  Data nodes want to be
 550 // cloned in the pre-order so they can feed each other nicely.
 551 Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
 552   // Cloning these guys is unlikely to win
 553   int n_op = n->Opcode();
 554   if( n_op == Op_MergeMem ) return n;
 555   if( n->is_Proj() ) return n;
 556   // Do not clone-up CmpFXXX variations, as these are always
 557   // followed by a CmpI
 558   if( n->is_Cmp() ) return n;
 559   // Attempt to use a conditional move instead of a phi/branch
 560   if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
 561     Node *cmov = conditional_move( n );
 562     if( cmov ) return cmov;
 563   }
 564   if( n->is_CFG() || n_op == Op_StorePConditional || n_op == Op_StoreLConditional || n_op == Op_CompareAndSwapI || n_op == Op_CompareAndSwapL ||n_op == Op_CompareAndSwapP)  return n;

 565   if( n_op == Op_Opaque1 ||     // Opaque nodes cannot be mod'd
 566       n_op == Op_Opaque2 ) {
 567     if( !C->major_progress() )   // If chance of no more loop opts...
 568       _igvn._worklist.push(n);  // maybe we'll remove them
 569     return n;
 570   }
 571 
 572   if( n->is_Con() ) return n;   // No cloning for Con nodes
 573 
 574   Node *n_ctrl = get_ctrl(n);
 575   if( !n_ctrl ) return n;       // Dead node
 576 
 577   // Attempt to remix address expressions for loop invariants
 578   Node *m = remix_address_expressions( n );
 579   if( m ) return m;
 580 
 581   // Determine if the Node has inputs from some local Phi.
 582   // Returns the block to clone thru.
 583   Node *n_blk = has_local_phi_input( n );
 584   if( !n_blk ) return n;


 893               // Don't allow the control input to be a CFG splitting node.
 894               // Such nodes should only have ProjNodes as outs, e.g. IfNode
 895               // should only have IfTrueNode and IfFalseNode (4985384).
 896               x_ctrl = find_non_split_ctrl(x_ctrl);
 897               assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
 898 
 899               x->set_req(0, x_ctrl);
 900             }
 901             register_new_node(x, x_ctrl);
 902 
 903             // Some institutional knowledge is needed here: 'x' is
 904             // yanked because if the optimizer runs GVN on it all the
 905             // cloned x's will common up and undo this optimization and
 906             // be forced back in the loop.  This is annoying because it
 907             // makes +VerifyOpto report false-positives on progress.  I
 908             // tried setting control edges on the x's to force them to
 909             // not combine, but the matching gets worried when it tries
 910             // to fold a StoreP and an AddP together (as part of an
 911             // address expression) and the AddP and StoreP have
 912             // different controls.
 913             if( !x->is_Load() ) _igvn._worklist.yank(x);
 914           }
 915           _igvn.remove_dead_node(n);
 916         }
 917       }
 918     }
 919   }
 920 
 921   // Check for Opaque2's who's loop has disappeared - who's input is in the
 922   // same loop nest as their output.  Remove 'em, they are no longer useful.
 923   if( n_op == Op_Opaque2 &&
 924       n->in(1) != NULL &&
 925       get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
 926     _igvn.add_users_to_worklist(n);
 927     _igvn.hash_delete(n);
 928     _igvn.subsume_node( n, n->in(1) );
 929   }
 930 }
 931 
 932 //------------------------------split_if_with_blocks---------------------------
 933 // Check for aggressive application of 'split-if' optimization,


1857   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1858     Node* use = n->fast_out(j);
1859     if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
1860       worklist.push(use);
1861     }
1862   }
1863   while( worklist.size() ) {
1864     Node *use = worklist.pop();
1865     if (!has_node(use) || use->in(0) == C->top()) continue;
1866     uint j;
1867     for (j = 0; j < use->req(); j++) {
1868       if (use->in(j) == n) break;
1869     }
1870     assert(j < use->req(), "must be there");
1871 
1872     // clone "n" and insert it between the inputs of "n" and the use outside the loop
1873     Node* n_clone = n->clone();
1874     _igvn.hash_delete(use);
1875     use->set_req(j, n_clone);
1876     _igvn._worklist.push(use);

1877     if (!use->is_Phi()) {
1878       Node* use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
1879       set_ctrl(n_clone, use_c);
1880       assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
1881       get_loop(use_c)->_body.push(n_clone);
1882     } else {
1883       // Use in a phi is considered a use in the associated predecessor block
1884       Node *prevbb = use->in(0)->in(j);
1885       set_ctrl(n_clone, prevbb);
1886       assert(!loop->is_member(get_loop(prevbb)), "should be outside loop");
1887       get_loop(prevbb)->_body.push(n_clone);
1888     }



1889     _igvn.register_new_node_with_optimizer(n_clone);
1890 #if !defined(PRODUCT)
1891     if (TracePartialPeeling) {
1892       tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
1893     }
1894 #endif
1895   }
1896 }
1897 
1898 
1899 //------------------------------ clone_for_special_use_inside_loop -------------------------------------
1900 // clone "n" for special uses that are in the not_peeled region.
1901 // If these def-uses occur in separate blocks, the code generator
1902 // marks the method as not compilable.  For example, if a "BoolNode"
1903 // is in a different basic block than the "IfNode" that uses it, then
1904 // the compilation is aborted in the code generator.
1905 void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
1906                                                         VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
1907   if (n->is_Phi() || n->is_Load()) {
1908     return;


2224 //         :  |     v   v       |
2225 //         :  |  false true     |
2226 //         :  |   |     |       |
2227 //         :  |   v    stmt2    |
2228 //         :  | exitB:  |       |
2229 //         :  | stmt4   v       |
2230 //         :  |       ifA orig  |
2231 //         :  |      /  \       |
2232 //         :  |     /    \      |
2233 //         :  |    v     v      |
2234 //         :  |  false  true    |
2235 //         :  |  /        \     |
2236 //         :  v  v         -----+
2237 //          RegionA
2238 //             |
2239 //             v
2240 //           exitA
2241 //
2242 bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
2243 



2244   LoopNode *head  = loop->_head->as_Loop();
2245 
2246   if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
2247     return false;
2248   }
2249 
2250   // Check for complex exit control
2251   for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
2252     Node *n = loop->_body.at(ii);
2253     int opc = n->Opcode();
2254     if (n->is_Call()        ||
2255         opc == Op_Catch     ||
2256         opc == Op_CatchProj ||
2257         opc == Op_Jump      ||
2258         opc == Op_JumpProj) {
2259 #if !defined(PRODUCT)
2260       if (TracePartialPeeling) {
2261         tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
2262       }
2263 #endif


2624 //------------------------------reorg_offsets----------------------------------
2625 // Reorganize offset computations to lower register pressure.  Mostly
2626 // prevent loop-fallout uses of the pre-incremented trip counter (which are
2627 // then alive with the post-incremented trip counter forcing an extra
2628 // register move)
2629 void PhaseIdealLoop::reorg_offsets( IdealLoopTree *loop ) {
2630 
2631   CountedLoopNode *cl = loop->_head->as_CountedLoop();
2632   CountedLoopEndNode *cle = cl->loopexit();
2633   if( !cle ) return;            // The occasional dead loop
2634   // Find loop exit control
2635   Node *exit = cle->proj_out(false);
2636   assert( exit->Opcode() == Op_IfFalse, "" );
2637 
2638   // Check for the special case of folks using the pre-incremented
2639   // trip-counter on the fall-out path (forces the pre-incremented
2640   // and post-incremented trip counter to be live at the same time).
2641   // Fix this by adjusting to use the post-increment trip counter.
2642   Node *phi = cl->phi();
2643   if( !phi ) return;            // Dead infinite loop




2644   bool progress = true;
2645   while (progress) {
2646     progress = false;
2647     for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
2648       Node* use = phi->fast_out(i);   // User of trip-counter
2649       if (!has_ctrl(use))  continue;
2650       Node *u_ctrl = get_ctrl(use);
2651       if( use->is_Phi() ) {
2652         u_ctrl = NULL;
2653         for( uint j = 1; j < use->req(); j++ )
2654           if( use->in(j) == phi )
2655             u_ctrl = dom_lca( u_ctrl, use->in(0)->in(j) );
2656       }
2657       IdealLoopTree *u_loop = get_loop(u_ctrl);
2658       // Look for loop-invariant use
2659       if( u_loop == loop ) continue;
2660       if( loop->is_member( u_loop ) ) continue;
2661       // Check that use is live out the bottom.  Assuming the trip-counter
2662       // update is right at the bottom, uses of of the loop middle are ok.
2663       if( dom_lca( exit, u_ctrl ) != exit ) continue;
2664       // protect against stride not being a constant
2665       if( !cle->stride_is_con() ) continue;
2666       // Hit!  Refactor use to use the post-incremented tripcounter.
2667       // Compute a post-increment tripcounter.
2668       Node *opaq = new (C, 2) Opaque2Node( cle->incr() );
2669       register_new_node( opaq, u_ctrl );
2670       Node *neg_stride = _igvn.intcon(-cle->stride_con());
2671       set_ctrl(neg_stride, C->root());
2672       Node *post = new (C, 3) AddINode( opaq, neg_stride);
2673       register_new_node( post, u_ctrl );
2674       _igvn.hash_delete(use);
2675       _igvn._worklist.push(use);
2676       for( uint j = 1; j < use->req(); j++ )
2677         if( use->in(j) == phi )
2678           use->set_req(j, post);
2679       // Since DU info changed, rerun loop
2680       progress = true;
2681       break;
2682     }
2683   }
2684 
2685 }
2686 
   1 #ifdef USE_PRAGMA_IDENT_SRC
   2 #pragma ident "@(#)loopopts.cpp 1.222 08/11/24 12:23:09 JVM"
   3 #endif
   4 /*
   5  * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
   6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   7  *
   8  * This code is free software; you can redistribute it and/or modify it
   9  * under the terms of the GNU General Public License version 2 only, as
  10  * published by the Free Software Foundation.
  11  *
  12  * This code is distributed in the hope that it will be useful, but WITHOUT
  13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15  * version 2 for more details (a copy is included in the LICENSE file that
  16  * accompanied this code).
  17  *
  18  * You should have received a copy of the GNU General Public License version
  19  * 2 along with this work; if not, write to the Free Software Foundation,
  20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  21  *
  22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  23  * CA 95054 USA or visit www.sun.com if you need additional information or
  24  * have any questions.
  25  *  
  26  */
  27 
  28 #include "incls/_precompiled.incl"
  29 #include "incls/_loopopts.cpp.incl"
  30 
  31 //=============================================================================
  32 //------------------------------split_thru_phi---------------------------------
  33 // Split Node 'n' through merge point if there is enough win.
  34 Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
  35   if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) {
  36     // ConvI2L may have type information on it which is unsafe to push up
  37     // so disable this for now
  38     return NULL;
  39   }
  40   int wins = 0;
  41   assert( !n->is_CFG(), "" );
  42   assert( region->is_Region(), "" );
  43 
  44   const Type* type = n->bottom_type();
  45   const TypeOopPtr *t_oop = _igvn.type(n)->isa_oopptr();
  46   Node *phi;
  47   if( t_oop != NULL && t_oop->is_known_instance_field() ) {
  48     int iid    = t_oop->instance_id();
  49     int index  = C->get_alias_index(t_oop);
  50     int offset = t_oop->offset();
  51     phi = new (C,region->req()) PhiNode(region, type, NULL, iid, index, offset);
  52   } else {
  53     phi = new (C,region->req()) PhiNode(region, type);
  54   }
  55   uint old_unique = C->unique();
  56   for( uint i = 1; i < region->req(); i++ ) {
  57     Node *x;
  58     Node* the_clone = NULL;
  59     if( region->in(i) == C->top() ) {
  60       x = C->top();             // Dead path?  Use a dead data op
  61     } else {
  62       x = n->clone();           // Else clone up the data op
  63       the_clone = x;            // Remember for possible deletion.
  64       // Alter data node to use pre-phi inputs
  65       if( n->in(0) == region )
  66         x->set_req( 0, region->in(i) );
  67       for( uint j = 1; j < n->req(); j++ ) {
  68         Node *in = n->in(j);
  69         if( in->is_Phi() && in->in(0) == region )
  70           x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
  71       }
  72     }
  73     // Check for a 'win' on some paths
  74     const Type *t = x->Value(&_igvn);


  82     // the PhiNode may cause the resulting node to migrate back to a previous
  83     // loop iteration.
  84     if( singleton && t == Type::TOP ) {
  85       // Is_Loop() == false does not confirm the absence of a loop (e.g., an
  86       // irreducible loop may not be indicated by an affirmative is_Loop());
  87       // therefore, the only top we can split thru a phi is on a backedge of
  88       // a loop.
  89       singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
  90     }
  91 
  92     if( singleton ) {
  93       wins++;
  94       x = ((PhaseGVN&)_igvn).makecon(t);
  95     } else {
  96       // We now call Identity to try to simplify the cloned node.
  97       // Note that some Identity methods call phase->type(this).
  98       // Make sure that the type array is big enough for
  99       // our new node, even though we may throw the node away.
 100       // (Note: This tweaking with igvn only works because x is a new node.)
 101       _igvn.set_type(x, t);
 102       // If x is a TypeNode, capture any more-precise type permanently into Node
 103       // othewise it will be not updated during igvn->transform since
 104       // igvn->type(x) is set to x->Value() already.
 105       x->raise_bottom_type(t);
 106       Node *y = x->Identity(&_igvn);
 107       if( y != x ) {
 108         wins++;
 109         x = y;
 110       } else {
 111         y = _igvn.hash_find(x);
 112         if( y ) {
 113           wins++;
 114           x = y;
 115         } else {
 116           // Else x is a new node we are keeping
 117           // We do not need register_new_node_with_optimizer
 118           // because set_type has already been called.
 119           _igvn._worklist.push(x);
 120         }
 121       }
 122     }
 123     if (x != the_clone && the_clone != NULL)
 124       _igvn.remove_dead_node(the_clone);
 125     phi->set_req( i, x );


 441   Node *rp = region->in(2);
 442   if( !lp || !rp ) return NULL;
 443   Node *lp_c = lp->in(0);
 444   if( lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If() ) return NULL;
 445   IfNode *iff = lp_c->as_If();
 446 
 447   // Check for highly predictable branch.  No point in CMOV'ing if
 448   // we are going to predict accurately all the time.
 449   // %%% This hides patterns produced by utility methods like Math.min.
 450   if( iff->_prob < PROB_UNLIKELY_MAG(3) ||
 451       iff->_prob > PROB_LIKELY_MAG(3) )
 452     return NULL;
 453 
 454   // Check for ops pinned in an arm of the diamond.
 455   // Can't remove the control flow in this case
 456   if( lp->outcnt() > 1 ) return NULL;
 457   if( rp->outcnt() > 1 ) return NULL;
 458 
 459   // Check profitability
 460   int cost = 0;
 461   int phis = 0;
 462   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
 463     Node *out = region->fast_out(i);
 464     if( !out->is_Phi() ) continue; // Ignore other control edges, etc
 465     phis++;
 466     PhiNode* phi = out->as_Phi();
 467     switch (phi->type()->basic_type()) {
 468     case T_LONG:
 469       cost++;                   // Probably encodes as 2 CMOV's
 470     case T_INT:                 // These all CMOV fine
 471     case T_FLOAT:
 472     case T_DOUBLE:
 473     case T_ADDRESS:             // (RawPtr)
 474       cost++;
 475       break;
 476     case T_NARROWOOP: // Fall through
 477     case T_OBJECT: {            // Base oops are OK, but not derived oops
 478       const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr();
 479       // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
 480       // CMOVE'd derived pointer?  It's a CMOVE'd derived base.  Thus
 481       // CMOVE'ing a derived pointer requires we also CMOVE the base.  If we
 482       // have a Phi for the base here that we convert to a CMOVE all is well
 483       // and good.  But if the base is dead, we'll not make a CMOVE.  Later
 484       // the allocator will have to produce a base by creating a CMOVE of the
 485       // relevant bases.  This puts the allocator in the business of
 486       // manufacturing expensive instructions, generally a bad plan.
 487       // Just Say No to Conditionally-Moved Derived Pointers.
 488       if( tp && tp->offset() != 0 )
 489         return NULL;
 490       cost++;
 491       break;
 492     }
 493     default:
 494       return NULL;              // In particular, can't do memory or I/O
 495     }
 496     // Add in cost any speculative ops
 497     for( uint j = 1; j < region->req(); j++ ) {
 498       Node *proj = region->in(j);
 499       Node *inp = phi->in(j);
 500       if (get_ctrl(inp) == proj) { // Found local op
 501         cost++;
 502         // Check for a chain of dependent ops; these will all become
 503         // speculative in a CMOV.
 504         for( uint k = 1; k < inp->req(); k++ )
 505           if (get_ctrl(inp->in(k)) == proj)
 506             return NULL;        // Too much speculative goo
 507       }
 508     }
 509     // See if the Phi is used by a Cmp or Narrow oop Decode/Encode.
 510     // This will likely Split-If, a higher-payoff operation.
 511     for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
 512       Node* use = phi->fast_out(k);
 513       if( use->is_Cmp() || use->is_DecodeN() || use->is_EncodeP() )
 514         return NULL;
 515     }
 516   }
 517   if( cost >= ConditionalMoveLimit ) return NULL; // Too much goo
 518   Node* bol = iff->in(1);
 519   assert( bol->Opcode() == Op_Bool, "" );
 520   int cmp_op = bol->in(1)->Opcode();
 521   // It is expensive to generate flags from a float compare.
 522   // Avoid duplicated float compare.
 523   if( phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
 524 
 525   // --------------
 526   // Now replace all Phis with CMOV's
 527   Node *cmov_ctrl = iff->in(0);
 528   uint flip = (lp->Opcode() == Op_IfTrue);
 529   while( 1 ) {
 530     PhiNode* phi = NULL;
 531     for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
 532       Node *out = region->fast_out(i);
 533       if (out->is_Phi()) {
 534         phi = out->as_Phi();
 535         break;
 536       }
 537     }
 538     if (phi == NULL)  break;
 539 #ifndef PRODUCT
 540     if( PrintOpto && VerifyLoopOptimizations ) tty->print_cr("CMOV");
 541 #endif
 542     // Move speculative ops
 543     for( uint j = 1; j < region->req(); j++ ) {


 568 
 569   return iff->in(1);
 570 }
 571 
 572 //------------------------------split_if_with_blocks_pre-----------------------
 573 // Do the real work in a non-recursive function.  Data nodes want to be
 574 // cloned in the pre-order so they can feed each other nicely.
 575 Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
 576   // Cloning these guys is unlikely to win
 577   int n_op = n->Opcode();
 578   if( n_op == Op_MergeMem ) return n;
 579   if( n->is_Proj() ) return n;
 580   // Do not clone-up CmpFXXX variations, as these are always
 581   // followed by a CmpI
 582   if( n->is_Cmp() ) return n;
 583   // Attempt to use a conditional move instead of a phi/branch
 584   if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
 585     Node *cmov = conditional_move( n );
 586     if( cmov ) return cmov;
 587   }
 588   if( n->is_CFG() || n->is_LoadStore() )
 589     return n;
 590   if( n_op == Op_Opaque1 ||     // Opaque nodes cannot be mod'd
 591       n_op == Op_Opaque2 ) {
 592     if( !C->major_progress() )   // If chance of no more loop opts...
 593       _igvn._worklist.push(n);  // maybe we'll remove them
 594     return n;
 595   }
 596 
 597   if( n->is_Con() ) return n;   // No cloning for Con nodes
 598 
 599   Node *n_ctrl = get_ctrl(n);
 600   if( !n_ctrl ) return n;       // Dead node
 601 
 602   // Attempt to remix address expressions for loop invariants
 603   Node *m = remix_address_expressions( n );
 604   if( m ) return m;
 605 
 606   // Determine if the Node has inputs from some local Phi.
 607   // Returns the block to clone thru.
 608   Node *n_blk = has_local_phi_input( n );
 609   if( !n_blk ) return n;


 918               // Don't allow the control input to be a CFG splitting node.
 919               // Such nodes should only have ProjNodes as outs, e.g. IfNode
 920               // should only have IfTrueNode and IfFalseNode (4985384).
 921               x_ctrl = find_non_split_ctrl(x_ctrl);
 922               assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
 923 
 924               x->set_req(0, x_ctrl);
 925             }
 926             register_new_node(x, x_ctrl);
 927 
 928             // Some institutional knowledge is needed here: 'x' is
 929             // yanked because if the optimizer runs GVN on it all the
 930             // cloned x's will common up and undo this optimization and
 931             // be forced back in the loop.  This is annoying because it
 932             // makes +VerifyOpto report false-positives on progress.  I
 933             // tried setting control edges on the x's to force them to
 934             // not combine, but the matching gets worried when it tries
 935             // to fold a StoreP and an AddP together (as part of an
 936             // address expression) and the AddP and StoreP have
 937             // different controls.
 938             if( !x->is_Load() && !x->is_DecodeN() ) _igvn._worklist.yank(x);
 939           }
 940           _igvn.remove_dead_node(n);
 941         }
 942       }
 943     }
 944   }
 945 
 946   // Check for Opaque2's who's loop has disappeared - who's input is in the
 947   // same loop nest as their output.  Remove 'em, they are no longer useful.
 948   if( n_op == Op_Opaque2 &&
 949       n->in(1) != NULL &&
 950       get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
 951     _igvn.add_users_to_worklist(n);
 952     _igvn.hash_delete(n);
 953     _igvn.subsume_node( n, n->in(1) );
 954   }
 955 }
 956 
 957 //------------------------------split_if_with_blocks---------------------------
 958 // Check for aggressive application of 'split-if' optimization,


1882   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1883     Node* use = n->fast_out(j);
1884     if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
1885       worklist.push(use);
1886     }
1887   }
1888   while( worklist.size() ) {
1889     Node *use = worklist.pop();
1890     if (!has_node(use) || use->in(0) == C->top()) continue;
1891     uint j;
1892     for (j = 0; j < use->req(); j++) {
1893       if (use->in(j) == n) break;
1894     }
1895     assert(j < use->req(), "must be there");
1896 
1897     // clone "n" and insert it between the inputs of "n" and the use outside the loop
1898     Node* n_clone = n->clone();
1899     _igvn.hash_delete(use);
1900     use->set_req(j, n_clone);
1901     _igvn._worklist.push(use);
1902     Node* use_c;
1903     if (!use->is_Phi()) {
1904       use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);



1905     } else {
1906       // Use in a phi is considered a use in the associated predecessor block
1907       use_c = use->in(0)->in(j);



1908     }
1909     set_ctrl(n_clone, use_c);
1910     assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
1911     get_loop(use_c)->_body.push(n_clone);
1912     _igvn.register_new_node_with_optimizer(n_clone);
1913 #if !defined(PRODUCT)
1914     if (TracePartialPeeling) {
1915       tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
1916     }
1917 #endif
1918   }
1919 }
1920 
1921 
1922 //------------------------------ clone_for_special_use_inside_loop -------------------------------------
1923 // clone "n" for special uses that are in the not_peeled region.
1924 // If these def-uses occur in separate blocks, the code generator
1925 // marks the method as not compilable.  For example, if a "BoolNode"
1926 // is in a different basic block than the "IfNode" that uses it, then
1927 // the compilation is aborted in the code generator.
1928 void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
1929                                                         VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
1930   if (n->is_Phi() || n->is_Load()) {
1931     return;


2247 //         :  |     v   v       |
2248 //         :  |  false true     |
2249 //         :  |   |     |       |
2250 //         :  |   v    stmt2    |
2251 //         :  | exitB:  |       |
2252 //         :  | stmt4   v       |
2253 //         :  |       ifA orig  |
2254 //         :  |      /  \       |
2255 //         :  |     /    \      |
2256 //         :  |    v     v      |
2257 //         :  |  false  true    |
2258 //         :  |  /        \     |
2259 //         :  v  v         -----+
2260 //          RegionA
2261 //             |
2262 //             v
2263 //           exitA
2264 //
2265 bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
2266 
2267   if (!loop->_head->is_Loop()) {
2268     return false;  }
2269 
2270   LoopNode *head  = loop->_head->as_Loop();
2271 
2272   if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
2273     return false;
2274   }
2275 
2276   // Check for complex exit control
2277   for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
2278     Node *n = loop->_body.at(ii);
2279     int opc = n->Opcode();
2280     if (n->is_Call()        ||
2281         opc == Op_Catch     ||
2282         opc == Op_CatchProj ||
2283         opc == Op_Jump      ||
2284         opc == Op_JumpProj) {
2285 #if !defined(PRODUCT)
2286       if (TracePartialPeeling) {
2287         tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
2288       }
2289 #endif


2650 //------------------------------reorg_offsets----------------------------------
2651 // Reorganize offset computations to lower register pressure.  Mostly
2652 // prevent loop-fallout uses of the pre-incremented trip counter (which are
2653 // then alive with the post-incremented trip counter forcing an extra
2654 // register move)
2655 void PhaseIdealLoop::reorg_offsets( IdealLoopTree *loop ) {
2656 
2657   CountedLoopNode *cl = loop->_head->as_CountedLoop();
2658   CountedLoopEndNode *cle = cl->loopexit();
2659   if( !cle ) return;            // The occasional dead loop
2660   // Find loop exit control
2661   Node *exit = cle->proj_out(false);
2662   assert( exit->Opcode() == Op_IfFalse, "" );
2663 
2664   // Check for the special case of folks using the pre-incremented
2665   // trip-counter on the fall-out path (forces the pre-incremented
2666   // and post-incremented trip counter to be live at the same time).
2667   // Fix this by adjusting to use the post-increment trip counter.
2668   Node *phi = cl->phi();
2669   if( !phi ) return;            // Dead infinite loop
2670 
2671   // Shape messed up, probably by iteration_split_impl
2672   if (phi->in(LoopNode::LoopBackControl) != cl->incr()) return;
2673 
2674   bool progress = true;
2675   while (progress) {
2676     progress = false;
2677     for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
2678       Node* use = phi->fast_out(i);   // User of trip-counter
2679       if (!has_ctrl(use))  continue;
2680       Node *u_ctrl = get_ctrl(use);
2681       if( use->is_Phi() ) {
2682         u_ctrl = NULL;
2683         for( uint j = 1; j < use->req(); j++ )
2684           if( use->in(j) == phi )
2685             u_ctrl = dom_lca( u_ctrl, use->in(0)->in(j) );
2686       }
2687       IdealLoopTree *u_loop = get_loop(u_ctrl);
2688       // Look for loop-invariant use
2689       if( u_loop == loop ) continue;
2690       if( loop->is_member( u_loop ) ) continue;
2691       // Check that use is live out the bottom.  Assuming the trip-counter
2692       // update is right at the bottom, uses of of the loop middle are ok.
2693       if( dom_lca( exit, u_ctrl ) != exit ) continue;
2694       // protect against stride not being a constant
2695       if( !cle->stride_is_con() ) continue;
2696       // Hit!  Refactor use to use the post-incremented tripcounter.
2697       // Compute a post-increment tripcounter.
2698       Node *opaq = new (C, 2) Opaque2Node( C, cle->incr() );
2699       register_new_node( opaq, u_ctrl );
2700       Node *neg_stride = _igvn.intcon(-cle->stride_con());
2701       set_ctrl(neg_stride, C->root());
2702       Node *post = new (C, 3) AddINode( opaq, neg_stride);
2703       register_new_node( post, u_ctrl );
2704       _igvn.hash_delete(use);
2705       _igvn._worklist.push(use);
2706       for( uint j = 1; j < use->req(); j++ )
2707         if( use->in(j) == phi )
2708           use->set_req(j, post);
2709       // Since DU info changed, rerun loop
2710       progress = true;
2711       break;
2712     }
2713   }
2714 
2715 }
2716