< prev index next >

src/share/vm/opto/loopTransform.cpp

Print this page




 107       cl->set_exact_trip_count((uint)trip_count);
 108     }
 109   }
 110 }
 111 
 112 //------------------------------compute_profile_trip_cnt----------------------------
 113 // Compute loop trip count from profile data as
 114 //    (backedge_count + loop_exit_count) / loop_exit_count
 115 void IdealLoopTree::compute_profile_trip_cnt( PhaseIdealLoop *phase ) {
 116   if (!_head->is_CountedLoop()) {
 117     return;
 118   }
 119   CountedLoopNode* head = _head->as_CountedLoop();
 120   if (head->profile_trip_cnt() != COUNT_UNKNOWN) {
 121     return; // Already computed
 122   }
 123   float trip_cnt = (float)max_jint; // default is big
 124 
 125   Node* back = head->in(LoopNode::LoopBackControl);
 126   while (back != head) {
 127     if ((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&
 128         back->in(0) &&
 129         back->in(0)->is_If() &&
 130         back->in(0)->as_If()->_fcnt != COUNT_UNKNOWN &&
 131         back->in(0)->as_If()->_prob != PROB_UNKNOWN) {
 132       break;
 133     }
 134     back = phase->idom(back);
 135   }
 136   if (back != head) {
 137     assert((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&
 138            back->in(0), "if-projection exists");
 139     IfNode* back_if = back->in(0)->as_If();
 140     float loop_back_cnt = back_if->_fcnt * back_if->_prob;
 141 
 142     // Now compute a loop exit count
 143     float loop_exit_cnt = 0.0f;
 144     for( uint i = 0; i < _body.size(); i++ ) {
 145       Node *n = _body[i];
 146       if( n->is_If() ) {
 147         IfNode *iff = n->as_If();
 148         if( iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN ) {
 149           Node *exit = is_loop_exit(iff);
 150           if( exit ) {
 151             float exit_prob = iff->_prob;
 152             if (exit->Opcode() == Op_IfFalse) exit_prob = 1.0 - exit_prob;
 153             if (exit_prob > PROB_MIN) {
 154               float exit_cnt = iff->_fcnt * exit_prob;
 155               loop_exit_cnt += exit_cnt;
 156             }
 157           }
 158         }
 159       }
 160     }
 161     if (loop_exit_cnt > 0.0f) {
 162       trip_cnt = (loop_back_cnt + loop_exit_cnt) / loop_exit_cnt;
 163     } else {
 164       // No exit count so use
 165       trip_cnt = loop_back_cnt;
 166     }
 167   }
 168 #ifndef PRODUCT
 169   if (TraceProfileTripCount) {
 170     tty->print_cr("compute_profile_trip_cnt  lp: %d cnt: %f\n", head->_idx, trip_cnt);
 171   }
 172 #endif
 173   head->set_profile_trip_cnt(trip_cnt);
 174 }
 175 
 176 //---------------------is_invariant_addition-----------------------------
 177 // Return nonzero index of invariant operand for an Add or Sub
 178 // of (nonconstant) invariant and variant values. Helper for reassociate_invariants.
 179 int IdealLoopTree::is_invariant_addition(Node* n, PhaseIdealLoop *phase) {
 180   int op = n->Opcode();
 181   if (op == Op_AddI || op == Op_SubI) {
 182     bool in1_invar = this->is_invariant(n->in(1));
 183     bool in2_invar = this->is_invariant(n->in(2));
 184     if (in1_invar && !in2_invar) return 1;
 185     if (!in1_invar && in2_invar) return 2;
 186   }
 187   return 0;
 188 }
 189 
 190 //---------------------reassociate_add_sub-----------------------------
 191 // Reassociate invariant add and subtract expressions:
 192 //
 193 // inv1 + (x + inv2)  =>  ( inv1 + inv2) + x
 194 // (x + inv2) + inv1  =>  ( inv1 + inv2) + x
 195 // inv1 + (x - inv2)  =>  ( inv1 - inv2) + x
 196 // inv1 - (inv2 - x)  =>  ( inv1 - inv2) + x
 197 // (x + inv2) - inv1  =>  (-inv1 + inv2) + x
 198 // (x - inv2) + inv1  =>  ( inv1 - inv2) + x
 199 // (x - inv2) - inv1  =>  (-inv1 - inv2) + x
 200 // inv1 + (inv2 - x)  =>  ( inv1 + inv2) - x
 201 // inv1 - (x - inv2)  =>  ( inv1 + inv2) - x


 273 // Return TRUE or FALSE if the loop should be peeled or not.  Peel if we can
 274 // make some loop-invariant test (usually a null-check) happen before the loop.
 275 bool IdealLoopTree::policy_peeling( PhaseIdealLoop *phase ) const {
 276   Node *test = ((IdealLoopTree*)this)->tail();
 277   int  body_size = ((IdealLoopTree*)this)->_body.size();
 278   // Peeling does loop cloning which can result in O(N^2) node construction
 279   if( body_size > 255 /* Prevent overflow for large body_size */
 280       || (body_size * body_size + phase->C->live_nodes()) > phase->C->max_node_limit() ) {
 281     return false;           // too large to safely clone
 282   }
 283 
 284   // check for vectorized loops, any peeling done was already applied
 285   if (_head->is_CountedLoop() && _head->as_CountedLoop()->do_unroll_only()) return false;
 286 
 287   while( test != _head ) {      // Scan till run off top of loop
 288     if( test->is_If() ) {       // Test?
 289       Node *ctrl = phase->get_ctrl(test->in(1));
 290       if (ctrl->is_top())
 291         return false;           // Found dead test on live IF?  No peeling!
 292       // Standard IF only has one input value to check for loop invariance
 293       assert(test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd || test->Opcode() == Op_RangeCheck, "Check this code when new subtype is added");
 294       // Condition is not a member of this loop?
 295       if( !is_member(phase->get_loop(ctrl)) &&
 296           is_loop_exit(test) )
 297         return true;            // Found reason to peel!
 298     }
 299     // Walk up dominators to loop _head looking for test which is
 300     // executed on every path thru loop.
 301     test = phase->idom(test);
 302   }
 303   return false;
 304 }
 305 
 306 //------------------------------peeled_dom_test_elim---------------------------
 307 // If we got the effect of peeling, either by actually peeling or by making
 308 // a pre-loop which must execute at least once, we can remove all
 309 // loop-invariant dominated tests in the main body.
 310 void PhaseIdealLoop::peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new ) {
 311   bool progress = true;
 312   while( progress ) {
 313     progress = false;           // Reset for next iteration
 314     Node *prev = loop->_head->in(LoopNode::LoopBackControl);//loop->tail();
 315     Node *test = prev->in(0);
 316     while( test != loop->_head ) { // Scan till run off top of loop
 317 
 318       int p_op = prev->Opcode();
 319       if( (p_op == Op_IfFalse || p_op == Op_IfTrue) &&
 320           test->is_If() &&      // Test?
 321           !test->in(1)->is_Con() && // And not already obvious?
 322           // Condition is not a member of this loop?
 323           !loop->is_member(get_loop(get_ctrl(test->in(1))))){
 324         // Walk loop body looking for instances of this test
 325         for( uint i = 0; i < loop->_body.size(); i++ ) {
 326           Node *n = loop->_body.at(i);
 327           if( n->is_If() && n->in(1) == test->in(1) /*&& n != loop->tail()->in(0)*/ ) {
 328             // IfNode was dominated by version in peeled loop body
 329             progress = true;
 330             dominated_by( old_new[prev->_idx], n );
 331           }
 332         }
 333       }
 334       prev = test;
 335       test = idom(test);
 336     } // End of scan tests in loop
 337 
 338   } // End of while( progress )
 339 }


 602   if (trip_count <= 3)
 603     return true;
 604 
 605   // Take into account that after unroll conjoined heads and tails will fold,
 606   // otherwise policy_unroll() may allow more unrolling than max unrolling.
 607   uint new_body_size = EMPTY_LOOP_SIZE + (body_size - EMPTY_LOOP_SIZE) * trip_count;
 608   uint tst_body_size = (new_body_size - EMPTY_LOOP_SIZE) / trip_count + EMPTY_LOOP_SIZE;
 609   if (body_size != tst_body_size) // Check for int overflow
 610     return false;
 611   if (new_body_size > unroll_limit ||
 612       // Unrolling can result in a large amount of node construction
 613       new_body_size >= phase->C->max_node_limit() - phase->C->live_nodes()) {
 614     return false;
 615   }
 616 
 617   // Do not unroll a loop with String intrinsics code.
 618   // String intrinsics are large and have loops.
 619   for (uint k = 0; k < _body.size(); k++) {
 620     Node* n = _body.at(k);
 621     switch (n->Opcode()) {
 622       case Op_StrComp:
 623       case Op_StrEquals:
 624       case Op_StrIndexOf:
 625       case Op_StrIndexOfChar:
 626       case Op_EncodeISOArray:
 627       case Op_AryEq:
 628       case Op_HasNegatives: {
 629         return false;
 630       }
 631 #if INCLUDE_RTM_OPT
 632       case Op_FastLock:
 633       case Op_FastUnlock: {
 634         // Don't unroll RTM locking code because it is large.
 635         if (UseRTMLocking) {
 636           return false;
 637         }
 638       }
 639 #endif
 640     } // switch
 641   }
 642 
 643   return true; // Do maximally unroll
 644 }
 645 
 646 
 647 //------------------------------policy_unroll----------------------------------
 648 // Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if
 649 // the loop is a CountedLoop and the body is small enough.
 650 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
 651 
 652   CountedLoopNode *cl = _head->as_CountedLoop();
 653   assert(cl->is_normal_loop() || cl->is_main_loop(), "");


 720         }
 721       }
 722     }
 723   }
 724 
 725   // After unroll limit will be adjusted: new_limit = limit-stride.
 726   // Bailout if adjustment overflow.
 727   const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int();
 728   if (stride_con > 0 && ((limit_type->_hi - stride_con) >= limit_type->_hi) ||
 729       stride_con < 0 && ((limit_type->_lo - stride_con) <= limit_type->_lo))
 730     return false;  // overflow
 731 
 732   // Adjust body_size to determine if we unroll or not
 733   uint body_size = _body.size();
 734   // Key test to unroll loop in CRC32 java code
 735   int xors_in_loop = 0;
 736   // Also count ModL, DivL and MulL which expand mightly
 737   for (uint k = 0; k < _body.size(); k++) {
 738     Node* n = _body.at(k);
 739     switch (n->Opcode()) {
 740       case Op_XorI: xors_in_loop++; break; // CRC32 java code
 741       case Op_ModL: body_size += 30; break;
 742       case Op_DivL: body_size += 30; break;
 743       case Op_MulL: body_size += 10; break;
 744       case Op_StrComp:
 745       case Op_StrEquals:
 746       case Op_StrIndexOf:
 747       case Op_StrIndexOfChar:
 748       case Op_EncodeISOArray:
 749       case Op_AryEq:
 750       case Op_HasNegatives: {
 751         // Do not unroll a loop with String intrinsics code.
 752         // String intrinsics are large and have loops.
 753         return false;
 754       }
 755 #if INCLUDE_RTM_OPT
 756       case Op_FastLock:
 757       case Op_FastUnlock: {
 758         // Don't unroll RTM locking code because it is large.
 759         if (UseRTMLocking) {
 760           return false;
 761         }
 762       }
 763 #endif
 764     } // switch
 765   }
 766 
 767   if (UseSuperWord) {
 768     if (!cl->is_reduction_loop()) {
 769       phase->mark_reductions(this);
 770     }
 771 
 772     // Only attempt slp analysis when user controls do not prohibit it
 773     if (LoopMaxUnroll > _local_loop_unroll_factor) {
 774       // Once policy_slp_analysis succeeds, mark the loop with the
 775       // maximal unroll factor so that we minimize analysis passes
 776       if (future_unroll_ct >= _local_loop_unroll_factor) {
 777         policy_unroll_slp_analysis(cl, phase, future_unroll_ct);


 844 //------------------------------policy_range_check-----------------------------
 845 // Return TRUE or FALSE if the loop should be range-check-eliminated.
 846 // Actually we do iteration-splitting, a more powerful form of RCE.
 847 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const {
 848   if (!RangeCheckElimination) return false;
 849 
 850   CountedLoopNode *cl = _head->as_CountedLoop();
 851   // If we unrolled with no intention of doing RCE and we later
 852   // changed our minds, we got no pre-loop.  Either we need to
 853   // make a new pre-loop, or we gotta disallow RCE.
 854   if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
 855   Node *trip_counter = cl->phi();
 856 
 857   // check for vectorized loops, some opts are no longer needed
 858   if (cl->do_unroll_only()) return false;
 859 
 860   // Check loop body for tests of trip-counter plus loop-invariant vs
 861   // loop-invariant.
 862   for (uint i = 0; i < _body.size(); i++) {
 863     Node *iff = _body[i];
 864     if (iff->Opcode() == Op_If ||
 865         iff->Opcode() == Op_RangeCheck) { // Test?
 866 
 867       // Comparing trip+off vs limit
 868       Node *bol = iff->in(1);
 869       if (bol->req() != 2) continue; // dead constant test
 870       if (!bol->is_Bool()) {
 871         assert(bol->Opcode() == Op_Conv2B, "predicate check only");
 872         continue;
 873       }
 874       if (bol->as_Bool()->_test._test == BoolTest::ne)
 875         continue; // not RC
 876 
 877       Node *cmp = bol->in(1);
 878       Node *rc_exp = cmp->in(1);
 879       Node *limit = cmp->in(2);
 880 
 881       Node *limit_c = phase->get_ctrl(limit);
 882       if( limit_c == phase->C->top() )
 883         return false;           // Found dead test on live IF?  No RCE!
 884       if( is_member(phase->get_loop(limit_c) ) ) {
 885         // Compare might have operands swapped; commute them
 886         rc_exp = cmp->in(2);
 887         limit  = cmp->in(1);
 888         limit_c = phase->get_ctrl(limit);
 889         if( is_member(phase->get_loop(limit_c) ) )
 890           continue;             // Both inputs are loop varying; cannot RCE
 891       }


1030   // Add the post loop
1031   CountedLoopNode *post_head = NULL;
1032   Node *main_exit = insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_head);
1033 
1034   //------------------------------
1035   // Step B: Create Pre-Loop.
1036 
1037   // Step B1: Clone the loop body.  The clone becomes the pre-loop.  The main
1038   // loop pre-header illegally has 2 control users (old & new loops).
1039   clone_loop( loop, old_new, dd_main_head );
1040   CountedLoopNode*    pre_head = old_new[main_head->_idx]->as_CountedLoop();
1041   CountedLoopEndNode* pre_end  = old_new[main_end ->_idx]->as_CountedLoopEnd();
1042   pre_head->set_pre_loop(main_head);
1043   Node *pre_incr = old_new[incr->_idx];
1044 
1045   // Reduce the pre-loop trip count.
1046   pre_end->_prob = PROB_FAIR;
1047 
1048   // Find the pre-loop normal exit.
1049   Node* pre_exit = pre_end->proj_out(false);
1050   assert( pre_exit->Opcode() == Op_IfFalse, "" );
1051   IfFalseNode *new_pre_exit = new IfFalseNode(pre_end);
1052   _igvn.register_new_node_with_optimizer( new_pre_exit );
1053   set_idom(new_pre_exit, pre_end, dd_main_head);
1054   set_loop(new_pre_exit, loop->_parent);
1055 
1056   // Step B2: Build a zero-trip guard for the main-loop.  After leaving the
1057   // pre-loop, the main-loop may not execute at all.  Later in life this
1058   // zero-trip guard will become the minimum-trip guard when we unroll
1059   // the main-loop.
1060   Node *min_opaq = new Opaque1Node(C, limit);
1061   Node *min_cmp  = new CmpINode( pre_incr, min_opaq );
1062   Node *min_bol  = new BoolNode( min_cmp, b_test );
1063   register_new_node( min_opaq, new_pre_exit );
1064   register_new_node( min_cmp , new_pre_exit );
1065   register_new_node( min_bol , new_pre_exit );
1066 
1067   // Build the IfNode (assume the main-loop is executed always).
1068   IfNode *min_iff = new IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
1069   _igvn.register_new_node_with_optimizer( min_iff );
1070   set_idom(min_iff, new_pre_exit, dd_main_head);


1285   // so guess that unit vector trips is a reasonable value.
1286   post_head->set_profile_trip_cnt(4.0);
1287   post_head->set_is_rce_post_loop();
1288 
1289   // Now force out all loop-invariant dominating tests.  The optimizer
1290   // finds some, but we _know_ they are all useless.
1291   peeled_dom_test_elim(loop, old_new);
1292   loop->record_for_igvn();
1293 }
1294 
1295 
1296 //------------------------------insert_post_loop-------------------------------
1297 // Insert post loops.  Add a post loop to the given loop passed.
1298 Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
1299                                        CountedLoopNode *main_head, CountedLoopEndNode *main_end,
1300                                        Node *incr, Node *limit, CountedLoopNode *&post_head) {
1301 
1302   //------------------------------
1303   // Step A: Create a new post-Loop.
1304   Node* main_exit = main_end->proj_out(false);
1305   assert(main_exit->Opcode() == Op_IfFalse, "");
1306   int dd_main_exit = dom_depth(main_exit);
1307 
1308   // Step A1: Clone the loop body of main.  The clone becomes the vector post-loop.
1309   // The main loop pre-header illegally has 2 control users (old & new loops).
1310   clone_loop(loop, old_new, dd_main_exit);
1311   assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
1312   post_head = old_new[main_head->_idx]->as_CountedLoop();
1313   post_head->set_normal_loop();
1314   post_head->set_post_loop(main_head);
1315 
1316   // Reduce the post-loop trip count.
1317   CountedLoopEndNode* post_end = old_new[main_end->_idx]->as_CountedLoopEnd();
1318   post_end->_prob = PROB_FAIR;
1319 
1320   // Build the main-loop normal exit.
1321   IfFalseNode *new_main_exit = new IfFalseNode(main_end);
1322   _igvn.register_new_node_with_optimizer(new_main_exit);
1323   set_idom(new_main_exit, main_end, dd_main_exit);
1324   set_loop(new_main_exit, loop->_parent);
1325 
1326   // Step A2: Build a zero-trip guard for the post-loop.  After leaving the
1327   // main-loop, the post-loop may not execute at all.  We 'opaque' the incr
1328   // (the previous loop trip-counter exit value) because we will be changing
1329   // the exit value (via additional unrolling) so we cannot constant-fold away the zero
1330   // trip guard until all unrolling is done.
1331   Node *zer_opaq = new Opaque1Node(C, incr);


1517         // No underflow.
1518         new_limit = new SubINode(limit, stride);
1519       } else {
1520         // (limit - stride) may underflow.
1521         // Clamp the adjustment value with MININT or MAXINT:
1522         //
1523         //   new_limit = limit-stride
1524         //   if (stride > 0)
1525         //     new_limit = (limit < new_limit) ? MININT : new_limit;
1526         //   else
1527         //     new_limit = (limit > new_limit) ? MAXINT : new_limit;
1528         //
1529         BoolTest::mask bt = loop_end->test_trip();
1530         assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected");
1531         Node* adj_max = _igvn.intcon((stride_con > 0) ? min_jint : max_jint);
1532         set_ctrl(adj_max, C->root());
1533         Node* old_limit = NULL;
1534         Node* adj_limit = NULL;
1535         Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : NULL;
1536         if (loop_head->unrolled_count() > 1 &&
1537             limit->is_CMove() && limit->Opcode() == Op_CMoveI &&
1538             limit->in(CMoveNode::IfTrue) == adj_max &&
1539             bol->as_Bool()->_test._test == bt &&
1540             bol->in(1)->Opcode() == Op_CmpI &&
1541             bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) {
1542           // Loop was unrolled before.
1543           // Optimize the limit to avoid nested CMove:
1544           // use original limit as old limit.
1545           old_limit = bol->in(1)->in(1);
1546           // Adjust previous adjusted limit.
1547           adj_limit = limit->in(CMoveNode::IfFalse);
1548           adj_limit = new SubINode(adj_limit, stride);
1549         } else {
1550           old_limit = limit;
1551           adj_limit = new SubINode(limit, stride);
1552         }
1553         assert(old_limit != NULL && adj_limit != NULL, "");
1554         register_new_node( adj_limit, ctrl ); // adjust amount
1555         Node* adj_cmp = new CmpINode(old_limit, adj_limit);
1556         register_new_node( adj_cmp, ctrl );
1557         Node* adj_bool = new BoolNode(adj_cmp, bt);
1558         register_new_node( adj_bool, ctrl );
1559         new_limit = new CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT);
1560       }


1700 
1701 void PhaseIdealLoop::mark_reductions(IdealLoopTree *loop) {
1702   if (SuperWordReductions == false) return;
1703 
1704   CountedLoopNode* loop_head = loop->_head->as_CountedLoop();
1705   if (loop_head->unrolled_count() > 1) {
1706     return;
1707   }
1708 
1709   Node* trip_phi = loop_head->phi();
1710   for (DUIterator_Fast imax, i = loop_head->fast_outs(imax); i < imax; i++) {
1711     Node* phi = loop_head->fast_out(i);
1712     if (phi->is_Phi() && phi->outcnt() > 0 && phi != trip_phi) {
1713       // For definitions which are loop inclusive and not tripcounts.
1714       Node* def_node = phi->in(LoopNode::LoopBackControl);
1715 
1716       if (def_node != NULL) {
1717         Node* n_ctrl = get_ctrl(def_node);
1718         if (n_ctrl != NULL && loop->is_member(get_loop(n_ctrl))) {
1719           // Now test it to see if it fits the standard pattern for a reduction operator.
1720           int opc = def_node->Opcode();
1721           if (opc != ReductionNode::opcode(opc, def_node->bottom_type()->basic_type())) {
1722             if (!def_node->is_reduction()) { // Not marked yet
1723               // To be a reduction, the arithmetic node must have the phi as input and provide a def to it
1724               bool ok = false;
1725               for (unsigned j = 1; j < def_node->req(); j++) {
1726                 Node* in = def_node->in(j);
1727                 if (in == phi) {
1728                   ok = true;
1729                   break;
1730                 }
1731               }
1732 
1733               // do nothing if we did not match the initial criteria
1734               if (ok == false) {
1735                 continue;
1736               }
1737 
1738               // The result of the reduction must not be used in the loop
1739               for (DUIterator_Fast imax, i = def_node->fast_outs(imax); i < imax && ok; i++) {
1740                 Node* u = def_node->fast_out(i);


1901     //   ( if (scale < 0) /* and stride > 0 */
1902     //       I < (low_limit-(offset+1))/scale
1903     //     else /* scale > 0 and stride < 0 */
1904     //       I > (low_limit-(offset+1))/scale
1905     //   )
1906 
1907     *main_limit = adjust_limit(stride_con, scale, plus_one, low_limit, *main_limit, pre_ctrl);
1908   }
1909 }
1910 
1911 
1912 //------------------------------is_scaled_iv---------------------------------
1913 // Return true if exp is a constant times an induction var
1914 bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) {
1915   if (exp == iv) {
1916     if (p_scale != NULL) {
1917       *p_scale = 1;
1918     }
1919     return true;
1920   }
1921   int opc = exp->Opcode();
1922   if (opc == Op_MulI) {
1923     if (exp->in(1) == iv && exp->in(2)->is_Con()) {
1924       if (p_scale != NULL) {
1925         *p_scale = exp->in(2)->get_int();
1926       }
1927       return true;
1928     }
1929     if (exp->in(2) == iv && exp->in(1)->is_Con()) {
1930       if (p_scale != NULL) {
1931         *p_scale = exp->in(1)->get_int();
1932       }
1933       return true;
1934     }
1935   } else if (opc == Op_LShiftI) {
1936     if (exp->in(1) == iv && exp->in(2)->is_Con()) {
1937       if (p_scale != NULL) {
1938         *p_scale = 1 << exp->in(2)->get_int();
1939       }
1940       return true;
1941     }
1942   }
1943   return false;
1944 }
1945 
1946 //-----------------------------is_scaled_iv_plus_offset------------------------------
1947 // Return true if exp is a simple induction variable expression: k1*iv + (invar + k2)
1948 bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth) {
1949   if (is_scaled_iv(exp, iv, p_scale)) {
1950     if (p_offset != NULL) {
1951       Node *zero = _igvn.intcon(0);
1952       set_ctrl(zero, C->root());
1953       *p_offset = zero;
1954     }
1955     return true;
1956   }
1957   int opc = exp->Opcode();
1958   if (opc == Op_AddI) {
1959     if (is_scaled_iv(exp->in(1), iv, p_scale)) {
1960       if (p_offset != NULL) {
1961         *p_offset = exp->in(2);
1962       }
1963       return true;
1964     }
1965     if (is_scaled_iv(exp->in(2), iv, p_scale)) {
1966       if (p_offset != NULL) {
1967         *p_offset = exp->in(1);
1968       }
1969       return true;
1970     }
1971     if (exp->in(2)->is_Con()) {
1972       Node* offset2 = NULL;
1973       if (depth < 2 &&
1974           is_scaled_iv_plus_offset(exp->in(1), iv, p_scale,
1975                                    p_offset != NULL ? &offset2 : NULL, depth+1)) {
1976         if (p_offset != NULL) {
1977           Node *ctrl_off2 = get_ctrl(offset2);
1978           Node* offset = new AddINode(offset2, exp->in(2));
1979           register_new_node(offset, ctrl_off2);
1980           *p_offset = offset;
1981         }
1982         return true;
1983       }
1984     }
1985   } else if (opc == Op_SubI) {
1986     if (is_scaled_iv(exp->in(1), iv, p_scale)) {
1987       if (p_offset != NULL) {
1988         Node *zero = _igvn.intcon(0);
1989         set_ctrl(zero, C->root());
1990         Node *ctrl_off = get_ctrl(exp->in(2));
1991         Node* offset = new SubINode(zero, exp->in(2));
1992         register_new_node(offset, ctrl_off);
1993         *p_offset = offset;
1994       }
1995       return true;
1996     }
1997     if (is_scaled_iv(exp->in(2), iv, p_scale)) {
1998       if (p_offset != NULL) {
1999         *p_scale *= -1;
2000         *p_offset = exp->in(1);
2001       }
2002       return true;
2003     }
2004   }
2005   return false;


2032   // to not ever trip end tests
2033   Node *main_limit = cl->limit();
2034 
2035   // Check graph shape. Cannot optimize a loop if zero-trip
2036   // Opaque1 node is optimized away and then another round
2037   // of loop opts attempted.
2038   if (!is_canonical_loop_entry(cl)) {
2039     return closed_range_checks;
2040   }
2041 
2042   // Need to find the main-loop zero-trip guard
2043   Node *ctrl  = cl->in(LoopNode::EntryControl);
2044   Node *iffm = ctrl->in(0);
2045   Node *opqzm = iffm->in(1)->in(1)->in(2);
2046   assert(opqzm->in(1) == main_limit, "do not understand situation");
2047 
2048   // Find the pre-loop limit; we will expand its iterations to
2049   // not ever trip low tests.
2050   Node *p_f = iffm->in(0);
2051   // pre loop may have been optimized out
2052   if (p_f->Opcode() != Op_IfFalse) {
2053     return closed_range_checks;
2054   }
2055   CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2056   assert(pre_end->loopnode()->is_pre_loop(), "");
2057   Node *pre_opaq1 = pre_end->limit();
2058   // Occasionally it's possible for a pre-loop Opaque1 node to be
2059   // optimized away and then another round of loop opts attempted.
2060   // We can not optimize this particular loop in that case.
2061   if (pre_opaq1->Opcode() != Op_Opaque1)
2062     return closed_range_checks;
2063   Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
2064   Node *pre_limit = pre_opaq->in(1);
2065 
2066   // Where do we put new limit calculations
2067   Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
2068 
2069   // Ensure the original loop limit is available from the
2070   // pre-loop Opaque1 node.
2071   Node *orig_limit = pre_opaq->original_loop_limit();
2072   if (orig_limit == NULL || _igvn.type(orig_limit) == Type::TOP)
2073     return closed_range_checks;
2074 
2075   // Must know if its a count-up or count-down loop
2076 
2077   int stride_con = cl->stride_con();
2078   Node *zero = _igvn.intcon(0);
2079   Node *one  = _igvn.intcon(1);
2080   // Use symmetrical int range [-max_jint,max_jint]
2081   Node *mini = _igvn.intcon(-max_jint);
2082   set_ctrl(zero, C->root());
2083   set_ctrl(one,  C->root());
2084   set_ctrl(mini, C->root());
2085 
2086   // Range checks that do not dominate the loop backedge (ie.
2087   // conditionally executed) can lengthen the pre loop limit beyond
2088   // the original loop limit. To prevent this, the pre limit is
2089   // (for stride > 0) MINed with the original loop limit (MAXed
2090   // stride < 0) when some range_check (rc) is conditionally
2091   // executed.
2092   bool conditional_rc = false;
2093 
2094   // Count number of range checks and reduce by load range limits, if zero,
2095   // the loop is in canonical form to multiversion.
2096   closed_range_checks = 0;
2097 
2098   // Check loop body for tests of trip-counter plus loop-invariant vs
2099   // loop-invariant.
2100   for( uint i = 0; i < loop->_body.size(); i++ ) {
2101     Node *iff = loop->_body[i];
2102     if (iff->Opcode() == Op_If ||
2103         iff->Opcode() == Op_RangeCheck) { // Test?
2104       // Test is an IfNode, has 2 projections.  If BOTH are in the loop
2105       // we need loop unswitching instead of iteration splitting.
2106       closed_range_checks++;
2107       Node *exit = loop->is_loop_exit(iff);
2108       if( !exit ) continue;
2109       int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0;
2110 
2111       // Get boolean condition to test
2112       Node *i1 = iff->in(1);
2113       if( !i1->is_Bool() ) continue;
2114       BoolNode *bol = i1->as_Bool();
2115       BoolTest b_test = bol->_test;
2116       // Flip sense of test if exit condition is flipped
2117       if( flip )
2118         b_test = b_test.negate();
2119 
2120       // Get compare
2121       Node *cmp = bol->in(1);
2122 
2123       // Look for trip_counter + offset vs limit
2124       Node *rc_exp = cmp->in(1);
2125       Node *limit  = cmp->in(2);
2126       jint scale_con= 1;        // Assume trip counter not scaled
2127 
2128       Node *limit_c = get_ctrl(limit);
2129       if( loop->is_member(get_loop(limit_c) ) ) {


2158 
2159       // As above for the 'limit', the 'offset' maybe pinned below the
2160       // zero trip test.
2161       if( offset_c == ctrl ) {
2162         continue; // Don't rce this check but continue looking for other candidates.
2163       }
2164 #ifdef ASSERT
2165       if (TraceRangeLimitCheck) {
2166         tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2167         bol->dump(2);
2168       }
2169 #endif
2170       // At this point we have the expression as:
2171       //   scale_con * trip_counter + offset :: limit
2172       // where scale_con, offset and limit are loop invariant.  Trip_counter
2173       // monotonically increases by stride_con, a constant.  Both (or either)
2174       // stride_con and scale_con can be negative which will flip about the
2175       // sense of the test.
2176 
2177       // Adjust pre and main loop limits to guard the correct iteration set
2178       if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
2179         if( b_test._test == BoolTest::lt ) { // Range checks always use lt
2180           // The underflow and overflow limits: 0 <= scale*I+offset < limit
2181           add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
2182           // (0-offset)/scale could be outside of loop iterations range.
2183           conditional_rc = true;
2184         } else {
2185           if (PrintOpto) {
2186             tty->print_cr("missed RCE opportunity");
2187           }
2188           continue;             // In release mode, ignore it
2189         }
2190       } else {                  // Otherwise work on normal compares
2191         switch( b_test._test ) {
2192         case BoolTest::gt:
2193           // Fall into GE case
2194         case BoolTest::ge:
2195           // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2196           scale_con = -scale_con;
2197           offset = new SubINode( zero, offset );
2198           register_new_node( offset, pre_ctrl );


2226 
2227       // Kill the eliminated test
2228       C->set_major_progress();
2229       Node *kill_con = _igvn.intcon( 1-flip );
2230       set_ctrl(kill_con, C->root());
2231       _igvn.replace_input_of(iff, 1, kill_con);
2232       // Find surviving projection
2233       assert(iff->is_If(), "");
2234       ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
2235       // Find loads off the surviving projection; remove their control edge
2236       for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
2237         Node* cd = dp->fast_out(i); // Control-dependent node
2238         if (cd->is_Load() && cd->depends_only_on_test()) {   // Loads can now float around in the loop
2239           // Allow the load to float around in the loop, or before it
2240           // but NOT before the pre-loop.
2241           _igvn.replace_input_of(cd, 0, ctrl); // ctrl, not NULL
2242           --i;
2243           --imax;
2244         }
2245       }
2246       if (limit->Opcode() == Op_LoadRange) {
2247         closed_range_checks--;
2248       }
2249 
2250     } // End of is IF
2251 
2252   }
2253 
2254   // Update loop limits
2255   if (conditional_rc) {
2256     pre_limit = (stride_con > 0) ? (Node*)new MinINode(pre_limit, orig_limit)
2257                                  : (Node*)new MaxINode(pre_limit, orig_limit);
2258     register_new_node(pre_limit, pre_ctrl);
2259   }
2260   _igvn.replace_input_of(pre_opaq, 1, pre_limit);
2261 
2262   // Note:: we are making the main loop limit no longer precise;
2263   // need to round up based on stride.
2264   cl->set_nonexact_trip_count();
2265   Node *main_cle = cl->loopexit();
2266   Node *main_bol = main_cle->in(1);


2284 
2285   return closed_range_checks;
2286 }
2287 
2288 //------------------------------has_range_checks-------------------------------
2289 // Check to see if RCE cleaned the current loop of range-checks.
2290 void PhaseIdealLoop::has_range_checks(IdealLoopTree *loop) {
2291   assert(RangeCheckElimination, "");
2292 
2293   // skip if not a counted loop
2294   if (!loop->is_counted()) return;
2295 
2296   CountedLoopNode *cl = loop->_head->as_CountedLoop();
2297 
2298   // skip this loop if it is already checked
2299   if (cl->has_been_range_checked()) return;
2300 
2301   // Now check for existance of range checks
2302   for (uint i = 0; i < loop->_body.size(); i++) {
2303     Node *iff = loop->_body[i];
2304     int iff_opc = iff->Opcode();
2305     if (iff_opc == Op_If || iff_opc == Op_RangeCheck) {
2306       cl->mark_has_range_checks();
2307       break;
2308     }
2309   }
2310   cl->set_has_been_range_checked();
2311 }
2312 
2313 //-------------------------multi_version_post_loops----------------------------
2314 // Check the range checks that remain, if simple, use the bounds to guard
2315 // which version to a post loop we execute, one with range checks or one without
2316 bool PhaseIdealLoop::multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop) {
2317   bool multi_version_succeeded = false;
2318   assert(RangeCheckElimination, "");
2319   CountedLoopNode *legacy_cl = legacy_loop->_head->as_CountedLoop();
2320   assert(legacy_cl->is_post_loop(), "");
2321 
2322   // Check for existance of range checks using the unique instance to make a guard with
2323   Unique_Node_List worklist;
2324   for (uint i = 0; i < legacy_loop->_body.size(); i++) {
2325     Node *iff = legacy_loop->_body[i];
2326     int iff_opc = iff->Opcode();
2327     if (iff_opc == Op_If || iff_opc == Op_RangeCheck) {
2328       worklist.push(iff);
2329     }
2330   }
2331 
2332   // Find RCE'd post loop so that we can stage its guard.
2333   if (!is_canonical_loop_entry(legacy_cl)) return multi_version_succeeded;
2334   Node* ctrl = legacy_cl->in(LoopNode::EntryControl);
2335   Node* iffm = ctrl->in(0);
2336 
2337   // Now we test that both the post loops are connected
2338   Node* post_loop_region = iffm->in(0);
2339   if (post_loop_region == NULL) return multi_version_succeeded;
2340   if (!post_loop_region->is_Region()) return multi_version_succeeded;
2341   Node* covering_region = post_loop_region->in(RegionNode::Control+1);
2342   if (covering_region == NULL) return multi_version_succeeded;
2343   if (!covering_region->is_Region()) return multi_version_succeeded;
2344   Node* p_f = covering_region->in(RegionNode::Control);
2345   if (p_f == NULL) return multi_version_succeeded;
2346   if (!p_f->is_IfFalse()) return multi_version_succeeded;
2347   if (!p_f->in(0)->is_CountedLoopEnd()) return multi_version_succeeded;


2365 #endif
2366 
2367   // Now fetch the limit we want to compare against
2368   Node *limit = rce_cl->limit();
2369   bool first_time = true;
2370 
2371   // If we got this far, we identified the post loop which has been RCE'd and
2372   // we have a work list.  Now we will try to transform the if guard to cause
2373   // the loop pair to be multi version executed with the determination left to runtime
2374   // or the optimizer if full information is known about the given arrays at compile time.
2375   Node *last_min = NULL;
2376   multi_version_succeeded = true;
2377   while (worklist.size()) {
2378     Node* rc_iffm = worklist.pop();
2379     if (rc_iffm->is_If()) {
2380       Node *rc_bolzm = rc_iffm->in(1);
2381       if (rc_bolzm->is_Bool()) {
2382         Node *rc_cmpzm = rc_bolzm->in(1);
2383         if (rc_cmpzm->is_Cmp()) {
2384           Node *rc_left = rc_cmpzm->in(2);
2385           if (rc_left->Opcode() != Op_LoadRange) {
2386             multi_version_succeeded = false;
2387             break;
2388           }
2389           if (first_time) {
2390             last_min = rc_left;
2391             first_time = false;
2392           } else {
2393             Node *cur_min = new MinINode(last_min, rc_left);
2394             last_min = cur_min;
2395             _igvn.register_new_node_with_optimizer(last_min);
2396           }
2397         }
2398       }
2399     }
2400   }
2401 
2402   // All we have to do is update the limit of the rce loop
2403   // with the min of our expression and the current limit.
2404   // We will use this expression to replace the current limit.
2405   if (last_min && multi_version_succeeded) {


2444       }
2445     }
2446   }
2447 }
2448 
2449 //------------------------------DCE_loop_body----------------------------------
2450 // Remove simplistic dead code from loop body
2451 void IdealLoopTree::DCE_loop_body() {
2452   for( uint i = 0; i < _body.size(); i++ )
2453     if( _body.at(i)->outcnt() == 0 )
2454       _body.map( i--, _body.pop() );
2455 }
2456 
2457 
2458 //------------------------------adjust_loop_exit_prob--------------------------
2459 // Look for loop-exit tests with the 50/50 (or worse) guesses from the parsing stage.
2460 // Replace with a 1-in-10 exit guess.
2461 void IdealLoopTree::adjust_loop_exit_prob( PhaseIdealLoop *phase ) {
2462   Node *test = tail();
2463   while( test != _head ) {
2464     uint top = test->Opcode();
2465     if( top == Op_IfTrue || top == Op_IfFalse ) {
2466       int test_con = ((ProjNode*)test)->_con;
2467       assert(top == (uint)(test_con? Op_IfTrue: Op_IfFalse), "sanity");
2468       IfNode *iff = test->in(0)->as_If();
2469       if( iff->outcnt() == 2 ) {        // Ignore dead tests
2470         Node *bol = iff->in(1);
2471         if( bol && bol->req() > 1 && bol->in(1) &&
2472             ((bol->in(1)->Opcode() == Op_StorePConditional ) ||
2473              (bol->in(1)->Opcode() == Op_StoreIConditional ) ||
2474              (bol->in(1)->Opcode() == Op_StoreLConditional ) ||
2475              (bol->in(1)->Opcode() == Op_CompareAndExchangeB ) ||
2476              (bol->in(1)->Opcode() == Op_CompareAndExchangeS ) ||
2477              (bol->in(1)->Opcode() == Op_CompareAndExchangeI ) ||
2478              (bol->in(1)->Opcode() == Op_CompareAndExchangeL ) ||
2479              (bol->in(1)->Opcode() == Op_CompareAndExchangeP ) ||
2480              (bol->in(1)->Opcode() == Op_CompareAndExchangeN ) ||
2481              (bol->in(1)->Opcode() == Op_WeakCompareAndSwapB ) ||
2482              (bol->in(1)->Opcode() == Op_WeakCompareAndSwapS ) ||
2483              (bol->in(1)->Opcode() == Op_WeakCompareAndSwapI ) ||
2484              (bol->in(1)->Opcode() == Op_WeakCompareAndSwapL ) ||
2485              (bol->in(1)->Opcode() == Op_WeakCompareAndSwapP ) ||
2486              (bol->in(1)->Opcode() == Op_WeakCompareAndSwapN ) ||
2487              (bol->in(1)->Opcode() == Op_CompareAndSwapB ) ||
2488              (bol->in(1)->Opcode() == Op_CompareAndSwapS ) ||
2489              (bol->in(1)->Opcode() == Op_CompareAndSwapI ) ||
2490              (bol->in(1)->Opcode() == Op_CompareAndSwapL ) ||
2491              (bol->in(1)->Opcode() == Op_CompareAndSwapP ) ||
2492              (bol->in(1)->Opcode() == Op_CompareAndSwapN )))
2493           return;               // Allocation loops RARELY take backedge
2494         // Find the OTHER exit path from the IF
2495         Node* ex = iff->proj_out(1-test_con);
2496         float p = iff->_prob;
2497         if( !phase->is_member( this, ex ) && iff->_fcnt == COUNT_UNKNOWN ) {
2498           if( top == Op_IfTrue ) {
2499             if( p < (PROB_FAIR + PROB_UNLIKELY_MAG(3))) {
2500               iff->_prob = PROB_STATIC_FREQUENT;
2501             }
2502           } else {
2503             if( p > (PROB_FAIR - PROB_UNLIKELY_MAG(3))) {
2504               iff->_prob = PROB_STATIC_INFREQUENT;
2505             }
2506           }
2507         }
2508       }
2509     }
2510     test = phase->idom(test);
2511   }
2512 }
2513 
2514 #ifdef ASSERT
2515 static CountedLoopNode* locate_pre_from_main(CountedLoopNode *cl) {
2516   Node *ctrl  = cl->in(LoopNode::EntryControl);
2517   assert(ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "");
2518   Node *iffm = ctrl->in(0);
2519   assert(iffm->Opcode() == Op_If, "");
2520   Node *p_f = iffm->in(0);
2521   assert(p_f->Opcode() == Op_IfFalse, "");
2522   CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2523   assert(pre_end->loopnode()->is_pre_loop(), "");
2524   return pre_end->loopnode();
2525 }
2526 #endif
2527 
2528 // Remove the main and post loops and make the pre loop execute all
2529 // iterations. Useful when the pre loop is found empty.
2530 void IdealLoopTree::remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase) {
2531   CountedLoopEndNode* pre_end = cl->loopexit();
2532   Node* pre_cmp = pre_end->cmp_node();
2533   if (pre_cmp->in(2)->Opcode() != Op_Opaque1) {
2534     // Only safe to remove the main loop if the compiler optimized it
2535     // out based on an unknown number of iterations
2536     return;
2537   }
2538 
2539   // Can we find the main loop?
2540   if (_next == NULL) {
2541     return;
2542   }
2543 
2544   Node* next_head = _next->_head;
2545   if (!next_head->is_CountedLoop()) {
2546     return;
2547   }
2548 
2549   CountedLoopNode* main_head = next_head->as_CountedLoop();
2550   if (!main_head->is_main_loop()) {
2551     return;
2552   }
2553 
2554   assert(locate_pre_from_main(main_head) == cl, "bad main loop");
2555   Node* main_iff = main_head->in(LoopNode::EntryControl)->in(0);
2556 
2557   // Remove the Opaque1Node of the pre loop and make it execute all iterations
2558   phase->_igvn.replace_input_of(pre_cmp, 2, pre_cmp->in(2)->in(2));
2559   // Remove the Opaque1Node of the main loop so it can be optimized out
2560   Node* main_cmp = main_iff->in(1)->in(1);
2561   assert(main_cmp->in(2)->Opcode() == Op_Opaque1, "main loop has no opaque node?");
2562   phase->_igvn.replace_input_of(main_cmp, 2, main_cmp->in(2)->in(1));
2563 }
2564 
2565 //------------------------------policy_do_remove_empty_loop--------------------
2566 // Micro-benchmark spamming.  Policy is to always remove empty loops.
2567 // The 'DO' part is to replace the trip counter with the value it will
2568 // have on the last iteration.  This will break the loop.
2569 bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) {
2570   // Minimum size must be empty loop
2571   if (_body.size() > EMPTY_LOOP_SIZE)
2572     return false;
2573 
2574   if (!_head->is_CountedLoop())
2575     return false;     // Dead loop
2576   CountedLoopNode *cl = _head->as_CountedLoop();
2577   if (!cl->is_valid_counted_loop())
2578     return false; // Malformed loop
2579   if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue))))
2580     return false;             // Infinite loop
2581 
2582   if (cl->is_pre_loop()) {
2583     // If the loop we are removing is a pre-loop then the main and
2584     // post loop can be removed as well
2585     remove_main_post_loops(cl, phase);
2586   }
2587 
2588 #ifdef ASSERT
2589   // Ensure only one phi which is the iv.
2590   Node* iv = NULL;
2591   for (DUIterator_Fast imax, i = cl->fast_outs(imax); i < imax; i++) {
2592     Node* n = cl->fast_out(i);
2593     if (n->Opcode() == Op_Phi) {
2594       assert(iv == NULL, "Too many phis" );
2595       iv = n;
2596     }
2597   }
2598   assert(iv == cl->phi(), "Wrong phi" );
2599 #endif
2600 
2601   // main and post loops have explicitly created zero trip guard
2602   bool needs_guard = !cl->is_main_loop() && !cl->is_post_loop();
2603   if (needs_guard) {
2604     // Skip guard if values not overlap.
2605     const TypeInt* init_t = phase->_igvn.type(cl->init_trip())->is_int();
2606     const TypeInt* limit_t = phase->_igvn.type(cl->limit())->is_int();
2607     int  stride_con = cl->stride_con();
2608     if (stride_con > 0) {
2609       needs_guard = (init_t->_hi >= limit_t->_lo);
2610     } else {
2611       needs_guard = (init_t->_lo <= limit_t->_hi);
2612     }
2613   }
2614   if (needs_guard) {
2615     // Check for an obvious zero trip guard.
2616     Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->in(LoopNode::EntryControl));
2617     if (inctrl->Opcode() == Op_IfTrue || inctrl->Opcode() == Op_IfFalse) {
2618       bool maybe_swapped = (inctrl->Opcode() == Op_IfFalse);
2619       // The test should look like just the backedge of a CountedLoop
2620       Node* iff = inctrl->in(0);
2621       if (iff->is_If()) {
2622         Node* bol = iff->in(1);
2623         if (bol->is_Bool()) {
2624           BoolTest test = bol->as_Bool()->_test;
2625           if (maybe_swapped) {
2626             test._test = test.commute();
2627             test._test = test.negate();
2628           }
2629           if (test._test == cl->loopexit()->test_trip()) {
2630             Node* cmp = bol->in(1);
2631             int init_idx = maybe_swapped ? 2 : 1;
2632             int limit_idx = maybe_swapped ? 1 : 2;
2633             if (cmp->is_Cmp() && cmp->in(init_idx) == cl->init_trip() && cmp->in(limit_idx) == cl->limit()) {
2634               needs_guard = false;
2635             }
2636           }
2637         }
2638       }


2911 bool PhaseIdealLoop::match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
2912                                      Node*& shift, Node*& con) {
2913   const char* msg = NULL;
2914   Node* msg_node = NULL;
2915 
2916   store_value = NULL;
2917   con = NULL;
2918   shift = NULL;
2919 
2920   // Process the loop looking for stores.  If there are multiple
2921   // stores or extra control flow give at this point.
2922   CountedLoopNode* head = lpt->_head->as_CountedLoop();
2923   for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) {
2924     Node* n = lpt->_body.at(i);
2925     if (n->outcnt() == 0) continue; // Ignore dead
2926     if (n->is_Store()) {
2927       if (store != NULL) {
2928         msg = "multiple stores";
2929         break;
2930       }
2931       int opc = n->Opcode();
2932       if (opc == Op_StoreP || opc == Op_StoreN || opc == Op_StoreNKlass || opc == Op_StoreCM) {
2933         msg = "oop fills not handled";
2934         break;
2935       }
2936       Node* value = n->in(MemNode::ValueIn);
2937       if (!lpt->is_invariant(value)) {
2938         msg  = "variant store value";
2939       } else if (!_igvn.type(n->in(MemNode::Address))->isa_aryptr()) {
2940         msg = "not array address";
2941       }
2942       store = n;
2943       store_value = value;
2944     } else if (n->is_If() && n != head->loopexit()) {
2945       msg = "extra control flow";
2946       msg_node = n;
2947     }
2948   }
2949 
2950   if (store == NULL) {
2951     // No store in loop
2952     return false;


2986 #ifndef PRODUCT
2987     if (TraceOptimizeFill) {
2988       tty->print_cr("not fill intrinsic candidate: %s", msg);
2989       if (msg_node != NULL) msg_node->dump();
2990     }
2991 #endif
2992     return false;
2993   }
2994 
2995   // Make sure the address expression can be handled.  It should be
2996   // head->phi * elsize + con.  head->phi might have a ConvI2L(CastII()).
2997   Node* elements[4];
2998   Node* cast = NULL;
2999   Node* conv = NULL;
3000   bool found_index = false;
3001   int count = store->in(MemNode::Address)->as_AddP()->unpack_offsets(elements, ARRAY_SIZE(elements));
3002   for (int e = 0; e < count; e++) {
3003     Node* n = elements[e];
3004     if (n->is_Con() && con == NULL) {
3005       con = n;
3006     } else if (n->Opcode() == Op_LShiftX && shift == NULL) {
3007       Node* value = n->in(1);
3008 #ifdef _LP64
3009       if (value->Opcode() == Op_ConvI2L) {
3010         conv = value;
3011         value = value->in(1);
3012       }
3013       if (value->Opcode() == Op_CastII &&
3014           value->as_CastII()->has_range_check()) {
3015         // Skip range check dependent CastII nodes
3016         cast = value;
3017         value = value->in(1);
3018       }
3019 #endif
3020       if (value != head->phi()) {
3021         msg = "unhandled shift in address";
3022       } else {
3023         if (type2aelembytes(store->as_Mem()->memory_type(), true) != (1 << n->in(2)->get_int())) {
3024           msg = "scale doesn't match";
3025         } else {
3026           found_index = true;
3027           shift = n;
3028         }
3029       }
3030     } else if (n->Opcode() == Op_ConvI2L && conv == NULL) {
3031       conv = n;
3032       n = n->in(1);
3033       if (n->Opcode() == Op_CastII &&
3034           n->as_CastII()->has_range_check()) {
3035         // Skip range check dependent CastII nodes
3036         cast = n;
3037         n = n->in(1);
3038       }
3039       if (n == head->phi()) {
3040         found_index = true;
3041       } else {
3042         msg = "unhandled input to ConvI2L";
3043       }
3044     } else if (n == head->phi()) {
3045       // no shift, check below for allowed cases
3046       found_index = true;
3047     } else {
3048       msg = "unhandled node in address";
3049       msg_node = n;
3050     }
3051   }
3052 
3053   if (count == -1) {




 107       cl->set_exact_trip_count((uint)trip_count);
 108     }
 109   }
 110 }
 111 
 112 //------------------------------compute_profile_trip_cnt----------------------------
 113 // Compute loop trip count from profile data as
 114 //    (backedge_count + loop_exit_count) / loop_exit_count
 115 void IdealLoopTree::compute_profile_trip_cnt( PhaseIdealLoop *phase ) {
 116   if (!_head->is_CountedLoop()) {
 117     return;
 118   }
 119   CountedLoopNode* head = _head->as_CountedLoop();
 120   if (head->profile_trip_cnt() != COUNT_UNKNOWN) {
 121     return; // Already computed
 122   }
 123   float trip_cnt = (float)max_jint; // default is big
 124 
 125   Node* back = head->in(LoopNode::LoopBackControl);
 126   while (back != head) {
 127     if ((back->Opcode() == Opcodes::Op_IfTrue || back->Opcode() == Opcodes::Op_IfFalse) &&
 128         back->in(0) &&
 129         back->in(0)->is_If() &&
 130         back->in(0)->as_If()->_fcnt != COUNT_UNKNOWN &&
 131         back->in(0)->as_If()->_prob != PROB_UNKNOWN) {
 132       break;
 133     }
 134     back = phase->idom(back);
 135   }
 136   if (back != head) {
 137     assert((back->Opcode() == Opcodes::Op_IfTrue || back->Opcode() == Opcodes::Op_IfFalse) &&
 138            back->in(0), "if-projection exists");
 139     IfNode* back_if = back->in(0)->as_If();
 140     float loop_back_cnt = back_if->_fcnt * back_if->_prob;
 141 
 142     // Now compute a loop exit count
 143     float loop_exit_cnt = 0.0f;
 144     for( uint i = 0; i < _body.size(); i++ ) {
 145       Node *n = _body[i];
 146       if( n->is_If() ) {
 147         IfNode *iff = n->as_If();
 148         if( iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN ) {
 149           Node *exit = is_loop_exit(iff);
 150           if( exit ) {
 151             float exit_prob = iff->_prob;
 152             if (exit->Opcode() == Opcodes::Op_IfFalse) exit_prob = 1.0 - exit_prob;
 153             if (exit_prob > PROB_MIN) {
 154               float exit_cnt = iff->_fcnt * exit_prob;
 155               loop_exit_cnt += exit_cnt;
 156             }
 157           }
 158         }
 159       }
 160     }
 161     if (loop_exit_cnt > 0.0f) {
 162       trip_cnt = (loop_back_cnt + loop_exit_cnt) / loop_exit_cnt;
 163     } else {
 164       // No exit count so use
 165       trip_cnt = loop_back_cnt;
 166     }
 167   }
 168 #ifndef PRODUCT
 169   if (TraceProfileTripCount) {
 170     tty->print_cr("compute_profile_trip_cnt  lp: %d cnt: %f\n", head->_idx, trip_cnt);
 171   }
 172 #endif
 173   head->set_profile_trip_cnt(trip_cnt);
 174 }
 175 
 176 //---------------------is_invariant_addition-----------------------------
 177 // Return nonzero index of invariant operand for an Add or Sub
 178 // of (nonconstant) invariant and variant values. Helper for reassociate_invariants.
 179 int IdealLoopTree::is_invariant_addition(Node* n, PhaseIdealLoop *phase) {
 180   Opcodes op = n->Opcode();
 181   if (op == Opcodes::Op_AddI || op == Opcodes::Op_SubI) {
 182     bool in1_invar = this->is_invariant(n->in(1));
 183     bool in2_invar = this->is_invariant(n->in(2));
 184     if (in1_invar && !in2_invar) return 1;
 185     if (!in1_invar && in2_invar) return 2;
 186   }
 187   return 0;
 188 }
 189 
 190 //---------------------reassociate_add_sub-----------------------------
 191 // Reassociate invariant add and subtract expressions:
 192 //
 193 // inv1 + (x + inv2)  =>  ( inv1 + inv2) + x
 194 // (x + inv2) + inv1  =>  ( inv1 + inv2) + x
 195 // inv1 + (x - inv2)  =>  ( inv1 - inv2) + x
 196 // inv1 - (inv2 - x)  =>  ( inv1 - inv2) + x
 197 // (x + inv2) - inv1  =>  (-inv1 + inv2) + x
 198 // (x - inv2) + inv1  =>  ( inv1 - inv2) + x
 199 // (x - inv2) - inv1  =>  (-inv1 - inv2) + x
 200 // inv1 + (inv2 - x)  =>  ( inv1 + inv2) - x
 201 // inv1 - (x - inv2)  =>  ( inv1 + inv2) - x


 273 // Return TRUE or FALSE if the loop should be peeled or not.  Peel if we can
 274 // make some loop-invariant test (usually a null-check) happen before the loop.
 275 bool IdealLoopTree::policy_peeling( PhaseIdealLoop *phase ) const {
 276   Node *test = ((IdealLoopTree*)this)->tail();
 277   int  body_size = ((IdealLoopTree*)this)->_body.size();
 278   // Peeling does loop cloning which can result in O(N^2) node construction
 279   if( body_size > 255 /* Prevent overflow for large body_size */
 280       || (body_size * body_size + phase->C->live_nodes()) > phase->C->max_node_limit() ) {
 281     return false;           // too large to safely clone
 282   }
 283 
 284   // check for vectorized loops, any peeling done was already applied
 285   if (_head->is_CountedLoop() && _head->as_CountedLoop()->do_unroll_only()) return false;
 286 
 287   while( test != _head ) {      // Scan till run off top of loop
 288     if( test->is_If() ) {       // Test?
 289       Node *ctrl = phase->get_ctrl(test->in(1));
 290       if (ctrl->is_top())
 291         return false;           // Found dead test on live IF?  No peeling!
 292       // Standard IF only has one input value to check for loop invariance
 293       assert(test->Opcode() == Opcodes::Op_If || test->Opcode() == Opcodes::Op_CountedLoopEnd || test->Opcode() == Opcodes::Op_RangeCheck, "Check this code when new subtype is added");
 294       // Condition is not a member of this loop?
 295       if( !is_member(phase->get_loop(ctrl)) &&
 296           is_loop_exit(test) )
 297         return true;            // Found reason to peel!
 298     }
 299     // Walk up dominators to loop _head looking for test which is
 300     // executed on every path thru loop.
 301     test = phase->idom(test);
 302   }
 303   return false;
 304 }
 305 
 306 //------------------------------peeled_dom_test_elim---------------------------
 307 // If we got the effect of peeling, either by actually peeling or by making
 308 // a pre-loop which must execute at least once, we can remove all
 309 // loop-invariant dominated tests in the main body.
 310 void PhaseIdealLoop::peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new ) {
 311   bool progress = true;
 312   while( progress ) {
 313     progress = false;           // Reset for next iteration
 314     Node *prev = loop->_head->in(LoopNode::LoopBackControl);//loop->tail();
 315     Node *test = prev->in(0);
 316     while( test != loop->_head ) { // Scan till run off top of loop
 317 
 318       Opcodes p_op = prev->Opcode();
 319       if( (p_op == Opcodes::Op_IfFalse || p_op == Opcodes::Op_IfTrue) &&
 320           test->is_If() &&      // Test?
 321           !test->in(1)->is_Con() && // And not already obvious?
 322           // Condition is not a member of this loop?
 323           !loop->is_member(get_loop(get_ctrl(test->in(1))))){
 324         // Walk loop body looking for instances of this test
 325         for( uint i = 0; i < loop->_body.size(); i++ ) {
 326           Node *n = loop->_body.at(i);
 327           if( n->is_If() && n->in(1) == test->in(1) /*&& n != loop->tail()->in(0)*/ ) {
 328             // IfNode was dominated by version in peeled loop body
 329             progress = true;
 330             dominated_by( old_new[prev->_idx], n );
 331           }
 332         }
 333       }
 334       prev = test;
 335       test = idom(test);
 336     } // End of scan tests in loop
 337 
 338   } // End of while( progress )
 339 }


 602   if (trip_count <= 3)
 603     return true;
 604 
 605   // Take into account that after unroll conjoined heads and tails will fold,
 606   // otherwise policy_unroll() may allow more unrolling than max unrolling.
 607   uint new_body_size = EMPTY_LOOP_SIZE + (body_size - EMPTY_LOOP_SIZE) * trip_count;
 608   uint tst_body_size = (new_body_size - EMPTY_LOOP_SIZE) / trip_count + EMPTY_LOOP_SIZE;
 609   if (body_size != tst_body_size) // Check for int overflow
 610     return false;
 611   if (new_body_size > unroll_limit ||
 612       // Unrolling can result in a large amount of node construction
 613       new_body_size >= phase->C->max_node_limit() - phase->C->live_nodes()) {
 614     return false;
 615   }
 616 
 617   // Do not unroll a loop with String intrinsics code.
 618   // String intrinsics are large and have loops.
 619   for (uint k = 0; k < _body.size(); k++) {
 620     Node* n = _body.at(k);
 621     switch (n->Opcode()) {
 622       case Opcodes::Op_StrComp:
 623       case Opcodes::Op_StrEquals:
 624       case Opcodes::Op_StrIndexOf:
 625       case Opcodes::Op_StrIndexOfChar:
 626       case Opcodes::Op_EncodeISOArray:
 627       case Opcodes::Op_AryEq:
 628       case Opcodes::Op_HasNegatives: {
 629         return false;
 630       }
 631 #if INCLUDE_RTM_OPT
 632       case Opcodes::Op_FastLock:
 633       case Opcodes::Op_FastUnlock: {
 634         // Don't unroll RTM locking code because it is large.
 635         if (UseRTMLocking) {
 636           return false;
 637         }
 638       }
 639 #endif
 640     } // switch
 641   }
 642 
 643   return true; // Do maximally unroll
 644 }
 645 
 646 
 647 //------------------------------policy_unroll----------------------------------
 648 // Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if
 649 // the loop is a CountedLoop and the body is small enough.
 650 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
 651 
 652   CountedLoopNode *cl = _head->as_CountedLoop();
 653   assert(cl->is_normal_loop() || cl->is_main_loop(), "");


 720         }
 721       }
 722     }
 723   }
 724 
 725   // After unroll limit will be adjusted: new_limit = limit-stride.
 726   // Bailout if adjustment overflow.
 727   const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int();
 728   if (stride_con > 0 && ((limit_type->_hi - stride_con) >= limit_type->_hi) ||
 729       stride_con < 0 && ((limit_type->_lo - stride_con) <= limit_type->_lo))
 730     return false;  // overflow
 731 
 732   // Adjust body_size to determine if we unroll or not
 733   uint body_size = _body.size();
 734   // Key test to unroll loop in CRC32 java code
 735   int xors_in_loop = 0;
 736   // Also count ModL, DivL and MulL which expand mightly
 737   for (uint k = 0; k < _body.size(); k++) {
 738     Node* n = _body.at(k);
 739     switch (n->Opcode()) {
 740       case Opcodes::Op_XorI: xors_in_loop++; break; // CRC32 java code
 741       case Opcodes::Op_ModL: body_size += 30; break;
 742       case Opcodes::Op_DivL: body_size += 30; break;
 743       case Opcodes::Op_MulL: body_size += 10; break;
 744       case Opcodes::Op_StrComp:
 745       case Opcodes::Op_StrEquals:
 746       case Opcodes::Op_StrIndexOf:
 747       case Opcodes::Op_StrIndexOfChar:
 748       case Opcodes::Op_EncodeISOArray:
 749       case Opcodes::Op_AryEq:
 750       case Opcodes::Op_HasNegatives: {
 751         // Do not unroll a loop with String intrinsics code.
 752         // String intrinsics are large and have loops.
 753         return false;
 754       }
 755 #if INCLUDE_RTM_OPT
 756       case Opcodes::Op_FastLock:
 757       case Opcodes::Op_FastUnlock: {
 758         // Don't unroll RTM locking code because it is large.
 759         if (UseRTMLocking) {
 760           return false;
 761         }
 762       }
 763 #endif
 764     } // switch
 765   }
 766 
 767   if (UseSuperWord) {
 768     if (!cl->is_reduction_loop()) {
 769       phase->mark_reductions(this);
 770     }
 771 
 772     // Only attempt slp analysis when user controls do not prohibit it
 773     if (LoopMaxUnroll > _local_loop_unroll_factor) {
 774       // Once policy_slp_analysis succeeds, mark the loop with the
 775       // maximal unroll factor so that we minimize analysis passes
 776       if (future_unroll_ct >= _local_loop_unroll_factor) {
 777         policy_unroll_slp_analysis(cl, phase, future_unroll_ct);


 844 //------------------------------policy_range_check-----------------------------
 845 // Return TRUE or FALSE if the loop should be range-check-eliminated.
 846 // Actually we do iteration-splitting, a more powerful form of RCE.
 847 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const {
 848   if (!RangeCheckElimination) return false;
 849 
 850   CountedLoopNode *cl = _head->as_CountedLoop();
 851   // If we unrolled with no intention of doing RCE and we later
 852   // changed our minds, we got no pre-loop.  Either we need to
 853   // make a new pre-loop, or we gotta disallow RCE.
 854   if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
 855   Node *trip_counter = cl->phi();
 856 
 857   // check for vectorized loops, some opts are no longer needed
 858   if (cl->do_unroll_only()) return false;
 859 
 860   // Check loop body for tests of trip-counter plus loop-invariant vs
 861   // loop-invariant.
 862   for (uint i = 0; i < _body.size(); i++) {
 863     Node *iff = _body[i];
 864     if (iff->Opcode() == Opcodes::Op_If ||
 865         iff->Opcode() == Opcodes::Op_RangeCheck) { // Test?
 866 
 867       // Comparing trip+off vs limit
 868       Node *bol = iff->in(1);
 869       if (bol->req() != 2) continue; // dead constant test
 870       if (!bol->is_Bool()) {
 871         assert(bol->Opcode() == Opcodes::Op_Conv2B, "predicate check only");
 872         continue;
 873       }
 874       if (bol->as_Bool()->_test._test == BoolTest::ne)
 875         continue; // not RC
 876 
 877       Node *cmp = bol->in(1);
 878       Node *rc_exp = cmp->in(1);
 879       Node *limit = cmp->in(2);
 880 
 881       Node *limit_c = phase->get_ctrl(limit);
 882       if( limit_c == phase->C->top() )
 883         return false;           // Found dead test on live IF?  No RCE!
 884       if( is_member(phase->get_loop(limit_c) ) ) {
 885         // Compare might have operands swapped; commute them
 886         rc_exp = cmp->in(2);
 887         limit  = cmp->in(1);
 888         limit_c = phase->get_ctrl(limit);
 889         if( is_member(phase->get_loop(limit_c) ) )
 890           continue;             // Both inputs are loop varying; cannot RCE
 891       }


1030   // Add the post loop
1031   CountedLoopNode *post_head = NULL;
1032   Node *main_exit = insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_head);
1033 
1034   //------------------------------
1035   // Step B: Create Pre-Loop.
1036 
1037   // Step B1: Clone the loop body.  The clone becomes the pre-loop.  The main
1038   // loop pre-header illegally has 2 control users (old & new loops).
1039   clone_loop( loop, old_new, dd_main_head );
1040   CountedLoopNode*    pre_head = old_new[main_head->_idx]->as_CountedLoop();
1041   CountedLoopEndNode* pre_end  = old_new[main_end ->_idx]->as_CountedLoopEnd();
1042   pre_head->set_pre_loop(main_head);
1043   Node *pre_incr = old_new[incr->_idx];
1044 
1045   // Reduce the pre-loop trip count.
1046   pre_end->_prob = PROB_FAIR;
1047 
1048   // Find the pre-loop normal exit.
1049   Node* pre_exit = pre_end->proj_out(false);
1050   assert( pre_exit->Opcode() == Opcodes::Op_IfFalse, "" );
1051   IfFalseNode *new_pre_exit = new IfFalseNode(pre_end);
1052   _igvn.register_new_node_with_optimizer( new_pre_exit );
1053   set_idom(new_pre_exit, pre_end, dd_main_head);
1054   set_loop(new_pre_exit, loop->_parent);
1055 
1056   // Step B2: Build a zero-trip guard for the main-loop.  After leaving the
1057   // pre-loop, the main-loop may not execute at all.  Later in life this
1058   // zero-trip guard will become the minimum-trip guard when we unroll
1059   // the main-loop.
1060   Node *min_opaq = new Opaque1Node(C, limit);
1061   Node *min_cmp  = new CmpINode( pre_incr, min_opaq );
1062   Node *min_bol  = new BoolNode( min_cmp, b_test );
1063   register_new_node( min_opaq, new_pre_exit );
1064   register_new_node( min_cmp , new_pre_exit );
1065   register_new_node( min_bol , new_pre_exit );
1066 
1067   // Build the IfNode (assume the main-loop is executed always).
1068   IfNode *min_iff = new IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
1069   _igvn.register_new_node_with_optimizer( min_iff );
1070   set_idom(min_iff, new_pre_exit, dd_main_head);


1285   // so guess that unit vector trips is a reasonable value.
1286   post_head->set_profile_trip_cnt(4.0);
1287   post_head->set_is_rce_post_loop();
1288 
1289   // Now force out all loop-invariant dominating tests.  The optimizer
1290   // finds some, but we _know_ they are all useless.
1291   peeled_dom_test_elim(loop, old_new);
1292   loop->record_for_igvn();
1293 }
1294 
1295 
1296 //------------------------------insert_post_loop-------------------------------
1297 // Insert post loops.  Add a post loop to the given loop passed.
1298 Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
1299                                        CountedLoopNode *main_head, CountedLoopEndNode *main_end,
1300                                        Node *incr, Node *limit, CountedLoopNode *&post_head) {
1301 
1302   //------------------------------
1303   // Step A: Create a new post-Loop.
1304   Node* main_exit = main_end->proj_out(false);
1305   assert(main_exit->Opcode() == Opcodes::Op_IfFalse, "");
1306   int dd_main_exit = dom_depth(main_exit);
1307 
1308   // Step A1: Clone the loop body of main.  The clone becomes the vector post-loop.
1309   // The main loop pre-header illegally has 2 control users (old & new loops).
1310   clone_loop(loop, old_new, dd_main_exit);
1311   assert(old_new[main_end->_idx]->Opcode() == Opcodes::Op_CountedLoopEnd, "");
1312   post_head = old_new[main_head->_idx]->as_CountedLoop();
1313   post_head->set_normal_loop();
1314   post_head->set_post_loop(main_head);
1315 
1316   // Reduce the post-loop trip count.
1317   CountedLoopEndNode* post_end = old_new[main_end->_idx]->as_CountedLoopEnd();
1318   post_end->_prob = PROB_FAIR;
1319 
1320   // Build the main-loop normal exit.
1321   IfFalseNode *new_main_exit = new IfFalseNode(main_end);
1322   _igvn.register_new_node_with_optimizer(new_main_exit);
1323   set_idom(new_main_exit, main_end, dd_main_exit);
1324   set_loop(new_main_exit, loop->_parent);
1325 
1326   // Step A2: Build a zero-trip guard for the post-loop.  After leaving the
1327   // main-loop, the post-loop may not execute at all.  We 'opaque' the incr
1328   // (the previous loop trip-counter exit value) because we will be changing
1329   // the exit value (via additional unrolling) so we cannot constant-fold away the zero
1330   // trip guard until all unrolling is done.
1331   Node *zer_opaq = new Opaque1Node(C, incr);


1517         // No underflow.
1518         new_limit = new SubINode(limit, stride);
1519       } else {
1520         // (limit - stride) may underflow.
1521         // Clamp the adjustment value with MININT or MAXINT:
1522         //
1523         //   new_limit = limit-stride
1524         //   if (stride > 0)
1525         //     new_limit = (limit < new_limit) ? MININT : new_limit;
1526         //   else
1527         //     new_limit = (limit > new_limit) ? MAXINT : new_limit;
1528         //
1529         BoolTest::mask bt = loop_end->test_trip();
1530         assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected");
1531         Node* adj_max = _igvn.intcon((stride_con > 0) ? min_jint : max_jint);
1532         set_ctrl(adj_max, C->root());
1533         Node* old_limit = NULL;
1534         Node* adj_limit = NULL;
1535         Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : NULL;
1536         if (loop_head->unrolled_count() > 1 &&
1537             limit->is_CMove() && limit->Opcode() == Opcodes::Op_CMoveI &&
1538             limit->in(CMoveNode::IfTrue) == adj_max &&
1539             bol->as_Bool()->_test._test == bt &&
1540             bol->in(1)->Opcode() == Opcodes::Op_CmpI &&
1541             bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) {
1542           // Loop was unrolled before.
1543           // Optimize the limit to avoid nested CMove:
1544           // use original limit as old limit.
1545           old_limit = bol->in(1)->in(1);
1546           // Adjust previous adjusted limit.
1547           adj_limit = limit->in(CMoveNode::IfFalse);
1548           adj_limit = new SubINode(adj_limit, stride);
1549         } else {
1550           old_limit = limit;
1551           adj_limit = new SubINode(limit, stride);
1552         }
1553         assert(old_limit != NULL && adj_limit != NULL, "");
1554         register_new_node( adj_limit, ctrl ); // adjust amount
1555         Node* adj_cmp = new CmpINode(old_limit, adj_limit);
1556         register_new_node( adj_cmp, ctrl );
1557         Node* adj_bool = new BoolNode(adj_cmp, bt);
1558         register_new_node( adj_bool, ctrl );
1559         new_limit = new CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT);
1560       }


1700 
1701 void PhaseIdealLoop::mark_reductions(IdealLoopTree *loop) {
1702   if (SuperWordReductions == false) return;
1703 
1704   CountedLoopNode* loop_head = loop->_head->as_CountedLoop();
1705   if (loop_head->unrolled_count() > 1) {
1706     return;
1707   }
1708 
1709   Node* trip_phi = loop_head->phi();
1710   for (DUIterator_Fast imax, i = loop_head->fast_outs(imax); i < imax; i++) {
1711     Node* phi = loop_head->fast_out(i);
1712     if (phi->is_Phi() && phi->outcnt() > 0 && phi != trip_phi) {
1713       // For definitions which are loop inclusive and not tripcounts.
1714       Node* def_node = phi->in(LoopNode::LoopBackControl);
1715 
1716       if (def_node != NULL) {
1717         Node* n_ctrl = get_ctrl(def_node);
1718         if (n_ctrl != NULL && loop->is_member(get_loop(n_ctrl))) {
1719           // Now test it to see if it fits the standard pattern for a reduction operator.
1720           Opcodes opc = def_node->Opcode();
1721           if (opc != ReductionNode::opcode(opc, def_node->bottom_type()->basic_type())) {
1722             if (!def_node->is_reduction()) { // Not marked yet
1723               // To be a reduction, the arithmetic node must have the phi as input and provide a def to it
1724               bool ok = false;
1725               for (unsigned j = 1; j < def_node->req(); j++) {
1726                 Node* in = def_node->in(j);
1727                 if (in == phi) {
1728                   ok = true;
1729                   break;
1730                 }
1731               }
1732 
1733               // do nothing if we did not match the initial criteria
1734               if (ok == false) {
1735                 continue;
1736               }
1737 
1738               // The result of the reduction must not be used in the loop
1739               for (DUIterator_Fast imax, i = def_node->fast_outs(imax); i < imax && ok; i++) {
1740                 Node* u = def_node->fast_out(i);


1901     //   ( if (scale < 0) /* and stride > 0 */
1902     //       I < (low_limit-(offset+1))/scale
1903     //     else /* scale > 0 and stride < 0 */
1904     //       I > (low_limit-(offset+1))/scale
1905     //   )
1906 
1907     *main_limit = adjust_limit(stride_con, scale, plus_one, low_limit, *main_limit, pre_ctrl);
1908   }
1909 }
1910 
1911 
1912 //------------------------------is_scaled_iv---------------------------------
1913 // Return true if exp is a constant times an induction var
1914 bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) {
1915   if (exp == iv) {
1916     if (p_scale != NULL) {
1917       *p_scale = 1;
1918     }
1919     return true;
1920   }
1921   Opcodes opc = exp->Opcode();
1922   if (opc == Opcodes::Op_MulI) {
1923     if (exp->in(1) == iv && exp->in(2)->is_Con()) {
1924       if (p_scale != NULL) {
1925         *p_scale = exp->in(2)->get_int();
1926       }
1927       return true;
1928     }
1929     if (exp->in(2) == iv && exp->in(1)->is_Con()) {
1930       if (p_scale != NULL) {
1931         *p_scale = exp->in(1)->get_int();
1932       }
1933       return true;
1934     }
1935   } else if (opc == Opcodes::Op_LShiftI) {
1936     if (exp->in(1) == iv && exp->in(2)->is_Con()) {
1937       if (p_scale != NULL) {
1938         *p_scale = 1 << exp->in(2)->get_int();
1939       }
1940       return true;
1941     }
1942   }
1943   return false;
1944 }
1945 
1946 //-----------------------------is_scaled_iv_plus_offset------------------------------
1947 // Return true if exp is a simple induction variable expression: k1*iv + (invar + k2)
1948 bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth) {
1949   if (is_scaled_iv(exp, iv, p_scale)) {
1950     if (p_offset != NULL) {
1951       Node *zero = _igvn.intcon(0);
1952       set_ctrl(zero, C->root());
1953       *p_offset = zero;
1954     }
1955     return true;
1956   }
1957   Opcodes opc = exp->Opcode();
1958   if (opc == Opcodes::Op_AddI) {
1959     if (is_scaled_iv(exp->in(1), iv, p_scale)) {
1960       if (p_offset != NULL) {
1961         *p_offset = exp->in(2);
1962       }
1963       return true;
1964     }
1965     if (is_scaled_iv(exp->in(2), iv, p_scale)) {
1966       if (p_offset != NULL) {
1967         *p_offset = exp->in(1);
1968       }
1969       return true;
1970     }
1971     if (exp->in(2)->is_Con()) {
1972       Node* offset2 = NULL;
1973       if (depth < 2 &&
1974           is_scaled_iv_plus_offset(exp->in(1), iv, p_scale,
1975                                    p_offset != NULL ? &offset2 : NULL, depth+1)) {
1976         if (p_offset != NULL) {
1977           Node *ctrl_off2 = get_ctrl(offset2);
1978           Node* offset = new AddINode(offset2, exp->in(2));
1979           register_new_node(offset, ctrl_off2);
1980           *p_offset = offset;
1981         }
1982         return true;
1983       }
1984     }
1985   } else if (opc == Opcodes::Op_SubI) {
1986     if (is_scaled_iv(exp->in(1), iv, p_scale)) {
1987       if (p_offset != NULL) {
1988         Node *zero = _igvn.intcon(0);
1989         set_ctrl(zero, C->root());
1990         Node *ctrl_off = get_ctrl(exp->in(2));
1991         Node* offset = new SubINode(zero, exp->in(2));
1992         register_new_node(offset, ctrl_off);
1993         *p_offset = offset;
1994       }
1995       return true;
1996     }
1997     if (is_scaled_iv(exp->in(2), iv, p_scale)) {
1998       if (p_offset != NULL) {
1999         *p_scale *= -1;
2000         *p_offset = exp->in(1);
2001       }
2002       return true;
2003     }
2004   }
2005   return false;


2032   // to not ever trip end tests
2033   Node *main_limit = cl->limit();
2034 
2035   // Check graph shape. Cannot optimize a loop if zero-trip
2036   // Opaque1 node is optimized away and then another round
2037   // of loop opts attempted.
2038   if (!is_canonical_loop_entry(cl)) {
2039     return closed_range_checks;
2040   }
2041 
2042   // Need to find the main-loop zero-trip guard
2043   Node *ctrl  = cl->in(LoopNode::EntryControl);
2044   Node *iffm = ctrl->in(0);
2045   Node *opqzm = iffm->in(1)->in(1)->in(2);
2046   assert(opqzm->in(1) == main_limit, "do not understand situation");
2047 
2048   // Find the pre-loop limit; we will expand its iterations to
2049   // not ever trip low tests.
2050   Node *p_f = iffm->in(0);
2051   // pre loop may have been optimized out
2052   if (p_f->Opcode() != Opcodes::Op_IfFalse) {
2053     return closed_range_checks;
2054   }
2055   CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2056   assert(pre_end->loopnode()->is_pre_loop(), "");
2057   Node *pre_opaq1 = pre_end->limit();
2058   // Occasionally it's possible for a pre-loop Opaque1 node to be
2059   // optimized away and then another round of loop opts attempted.
2060   // We can not optimize this particular loop in that case.
2061   if (pre_opaq1->Opcode() != Opcodes::Op_Opaque1)
2062     return closed_range_checks;
2063   Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
2064   Node *pre_limit = pre_opaq->in(1);
2065 
2066   // Where do we put new limit calculations
2067   Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
2068 
2069   // Ensure the original loop limit is available from the
2070   // pre-loop Opaque1 node.
2071   Node *orig_limit = pre_opaq->original_loop_limit();
2072   if (orig_limit == NULL || _igvn.type(orig_limit) == Type::TOP)
2073     return closed_range_checks;
2074 
2075   // Must know if its a count-up or count-down loop
2076 
2077   int stride_con = cl->stride_con();
2078   Node *zero = _igvn.intcon(0);
2079   Node *one  = _igvn.intcon(1);
2080   // Use symmetrical int range [-max_jint,max_jint]
2081   Node *mini = _igvn.intcon(-max_jint);
2082   set_ctrl(zero, C->root());
2083   set_ctrl(one,  C->root());
2084   set_ctrl(mini, C->root());
2085 
2086   // Range checks that do not dominate the loop backedge (ie.
2087   // conditionally executed) can lengthen the pre loop limit beyond
2088   // the original loop limit. To prevent this, the pre limit is
2089   // (for stride > 0) MINed with the original loop limit (MAXed
2090   // stride < 0) when some range_check (rc) is conditionally
2091   // executed.
2092   bool conditional_rc = false;
2093 
2094   // Count number of range checks and reduce by load range limits, if zero,
2095   // the loop is in canonical form to multiversion.
2096   closed_range_checks = 0;
2097 
2098   // Check loop body for tests of trip-counter plus loop-invariant vs
2099   // loop-invariant.
2100   for( uint i = 0; i < loop->_body.size(); i++ ) {
2101     Node *iff = loop->_body[i];
2102     if (iff->Opcode() == Opcodes::Op_If ||
2103         iff->Opcode() == Opcodes::Op_RangeCheck) { // Test?
2104       // Test is an IfNode, has 2 projections.  If BOTH are in the loop
2105       // we need loop unswitching instead of iteration splitting.
2106       closed_range_checks++;
2107       Node *exit = loop->is_loop_exit(iff);
2108       if( !exit ) continue;
2109       int flip = (exit->Opcode() == Opcodes::Op_IfTrue) ? 1 : 0;
2110 
2111       // Get boolean condition to test
2112       Node *i1 = iff->in(1);
2113       if( !i1->is_Bool() ) continue;
2114       BoolNode *bol = i1->as_Bool();
2115       BoolTest b_test = bol->_test;
2116       // Flip sense of test if exit condition is flipped
2117       if( flip )
2118         b_test = b_test.negate();
2119 
2120       // Get compare
2121       Node *cmp = bol->in(1);
2122 
2123       // Look for trip_counter + offset vs limit
2124       Node *rc_exp = cmp->in(1);
2125       Node *limit  = cmp->in(2);
2126       jint scale_con= 1;        // Assume trip counter not scaled
2127 
2128       Node *limit_c = get_ctrl(limit);
2129       if( loop->is_member(get_loop(limit_c) ) ) {


2158 
2159       // As above for the 'limit', the 'offset' maybe pinned below the
2160       // zero trip test.
2161       if( offset_c == ctrl ) {
2162         continue; // Don't rce this check but continue looking for other candidates.
2163       }
2164 #ifdef ASSERT
2165       if (TraceRangeLimitCheck) {
2166         tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2167         bol->dump(2);
2168       }
2169 #endif
2170       // At this point we have the expression as:
2171       //   scale_con * trip_counter + offset :: limit
2172       // where scale_con, offset and limit are loop invariant.  Trip_counter
2173       // monotonically increases by stride_con, a constant.  Both (or either)
2174       // stride_con and scale_con can be negative which will flip about the
2175       // sense of the test.
2176 
2177       // Adjust pre and main loop limits to guard the correct iteration set
2178       if( cmp->Opcode() == Opcodes::Op_CmpU ) {// Unsigned compare is really 2 tests
2179         if( b_test._test == BoolTest::lt ) { // Range checks always use lt
2180           // The underflow and overflow limits: 0 <= scale*I+offset < limit
2181           add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
2182           // (0-offset)/scale could be outside of loop iterations range.
2183           conditional_rc = true;
2184         } else {
2185           if (PrintOpto) {
2186             tty->print_cr("missed RCE opportunity");
2187           }
2188           continue;             // In release mode, ignore it
2189         }
2190       } else {                  // Otherwise work on normal compares
2191         switch( b_test._test ) {
2192         case BoolTest::gt:
2193           // Fall into GE case
2194         case BoolTest::ge:
2195           // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2196           scale_con = -scale_con;
2197           offset = new SubINode( zero, offset );
2198           register_new_node( offset, pre_ctrl );


2226 
2227       // Kill the eliminated test
2228       C->set_major_progress();
2229       Node *kill_con = _igvn.intcon( 1-flip );
2230       set_ctrl(kill_con, C->root());
2231       _igvn.replace_input_of(iff, 1, kill_con);
2232       // Find surviving projection
2233       assert(iff->is_If(), "");
2234       ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
2235       // Find loads off the surviving projection; remove their control edge
2236       for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
2237         Node* cd = dp->fast_out(i); // Control-dependent node
2238         if (cd->is_Load() && cd->depends_only_on_test()) {   // Loads can now float around in the loop
2239           // Allow the load to float around in the loop, or before it
2240           // but NOT before the pre-loop.
2241           _igvn.replace_input_of(cd, 0, ctrl); // ctrl, not NULL
2242           --i;
2243           --imax;
2244         }
2245       }
2246       if (limit->Opcode() == Opcodes::Op_LoadRange) {
2247         closed_range_checks--;
2248       }
2249 
2250     } // End of is IF
2251 
2252   }
2253 
2254   // Update loop limits
2255   if (conditional_rc) {
2256     pre_limit = (stride_con > 0) ? (Node*)new MinINode(pre_limit, orig_limit)
2257                                  : (Node*)new MaxINode(pre_limit, orig_limit);
2258     register_new_node(pre_limit, pre_ctrl);
2259   }
2260   _igvn.replace_input_of(pre_opaq, 1, pre_limit);
2261 
2262   // Note:: we are making the main loop limit no longer precise;
2263   // need to round up based on stride.
2264   cl->set_nonexact_trip_count();
2265   Node *main_cle = cl->loopexit();
2266   Node *main_bol = main_cle->in(1);


2284 
2285   return closed_range_checks;
2286 }
2287 
2288 //------------------------------has_range_checks-------------------------------
2289 // Check to see if RCE cleaned the current loop of range-checks.
2290 void PhaseIdealLoop::has_range_checks(IdealLoopTree *loop) {
2291   assert(RangeCheckElimination, "");
2292 
2293   // skip if not a counted loop
2294   if (!loop->is_counted()) return;
2295 
2296   CountedLoopNode *cl = loop->_head->as_CountedLoop();
2297 
2298   // skip this loop if it is already checked
2299   if (cl->has_been_range_checked()) return;
2300 
2301   // Now check for existance of range checks
2302   for (uint i = 0; i < loop->_body.size(); i++) {
2303     Node *iff = loop->_body[i];
2304     Opcodes iff_opc = iff->Opcode();
2305     if (iff_opc == Opcodes::Op_If || iff_opc == Opcodes::Op_RangeCheck) {
2306       cl->mark_has_range_checks();
2307       break;
2308     }
2309   }
2310   cl->set_has_been_range_checked();
2311 }
2312 
2313 //-------------------------multi_version_post_loops----------------------------
2314 // Check the range checks that remain, if simple, use the bounds to guard
2315 // which version to a post loop we execute, one with range checks or one without
2316 bool PhaseIdealLoop::multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop) {
2317   bool multi_version_succeeded = false;
2318   assert(RangeCheckElimination, "");
2319   CountedLoopNode *legacy_cl = legacy_loop->_head->as_CountedLoop();
2320   assert(legacy_cl->is_post_loop(), "");
2321 
2322   // Check for existance of range checks using the unique instance to make a guard with
2323   Unique_Node_List worklist;
2324   for (uint i = 0; i < legacy_loop->_body.size(); i++) {
2325     Node *iff = legacy_loop->_body[i];
2326     Opcodes iff_opc = iff->Opcode();
2327     if (iff_opc == Opcodes::Op_If || iff_opc == Opcodes::Op_RangeCheck) {
2328       worklist.push(iff);
2329     }
2330   }
2331 
2332   // Find RCE'd post loop so that we can stage its guard.
2333   if (!is_canonical_loop_entry(legacy_cl)) return multi_version_succeeded;
2334   Node* ctrl = legacy_cl->in(LoopNode::EntryControl);
2335   Node* iffm = ctrl->in(0);
2336 
2337   // Now we test that both the post loops are connected
2338   Node* post_loop_region = iffm->in(0);
2339   if (post_loop_region == NULL) return multi_version_succeeded;
2340   if (!post_loop_region->is_Region()) return multi_version_succeeded;
2341   Node* covering_region = post_loop_region->in(RegionNode::Control+1);
2342   if (covering_region == NULL) return multi_version_succeeded;
2343   if (!covering_region->is_Region()) return multi_version_succeeded;
2344   Node* p_f = covering_region->in(RegionNode::Control);
2345   if (p_f == NULL) return multi_version_succeeded;
2346   if (!p_f->is_IfFalse()) return multi_version_succeeded;
2347   if (!p_f->in(0)->is_CountedLoopEnd()) return multi_version_succeeded;


2365 #endif
2366 
2367   // Now fetch the limit we want to compare against
2368   Node *limit = rce_cl->limit();
2369   bool first_time = true;
2370 
2371   // If we got this far, we identified the post loop which has been RCE'd and
2372   // we have a work list.  Now we will try to transform the if guard to cause
2373   // the loop pair to be multi version executed with the determination left to runtime
2374   // or the optimizer if full information is known about the given arrays at compile time.
2375   Node *last_min = NULL;
2376   multi_version_succeeded = true;
2377   while (worklist.size()) {
2378     Node* rc_iffm = worklist.pop();
2379     if (rc_iffm->is_If()) {
2380       Node *rc_bolzm = rc_iffm->in(1);
2381       if (rc_bolzm->is_Bool()) {
2382         Node *rc_cmpzm = rc_bolzm->in(1);
2383         if (rc_cmpzm->is_Cmp()) {
2384           Node *rc_left = rc_cmpzm->in(2);
2385           if (rc_left->Opcode() != Opcodes::Op_LoadRange) {
2386             multi_version_succeeded = false;
2387             break;
2388           }
2389           if (first_time) {
2390             last_min = rc_left;
2391             first_time = false;
2392           } else {
2393             Node *cur_min = new MinINode(last_min, rc_left);
2394             last_min = cur_min;
2395             _igvn.register_new_node_with_optimizer(last_min);
2396           }
2397         }
2398       }
2399     }
2400   }
2401 
2402   // All we have to do is update the limit of the rce loop
2403   // with the min of our expression and the current limit.
2404   // We will use this expression to replace the current limit.
2405   if (last_min && multi_version_succeeded) {


2444       }
2445     }
2446   }
2447 }
2448 
2449 //------------------------------DCE_loop_body----------------------------------
2450 // Remove simplistic dead code from loop body
2451 void IdealLoopTree::DCE_loop_body() {
2452   for( uint i = 0; i < _body.size(); i++ )
2453     if( _body.at(i)->outcnt() == 0 )
2454       _body.map( i--, _body.pop() );
2455 }
2456 
2457 
2458 //------------------------------adjust_loop_exit_prob--------------------------
2459 // Look for loop-exit tests with the 50/50 (or worse) guesses from the parsing stage.
2460 // Replace with a 1-in-10 exit guess.
2461 void IdealLoopTree::adjust_loop_exit_prob( PhaseIdealLoop *phase ) {
2462   Node *test = tail();
2463   while( test != _head ) {
2464     Opcodes top = test->Opcode();
2465     if( top == Opcodes::Op_IfTrue || top == Opcodes::Op_IfFalse ) {
2466       int test_con = ((ProjNode*)test)->_con;
2467       assert(top == (test_con? Opcodes::Op_IfTrue: Opcodes::Op_IfFalse), "sanity");
2468       IfNode *iff = test->in(0)->as_If();
2469       if( iff->outcnt() == 2 ) {        // Ignore dead tests
2470         Node *bol = iff->in(1);
2471         if( bol && bol->req() > 1 && bol->in(1) &&
2472             ((bol->in(1)->Opcode() == Opcodes::Op_StorePConditional ) ||
2473              (bol->in(1)->Opcode() == Opcodes::Op_StoreIConditional ) ||
2474              (bol->in(1)->Opcode() == Opcodes::Op_StoreLConditional ) ||
2475              (bol->in(1)->Opcode() == Opcodes::Op_CompareAndExchangeB ) ||
2476              (bol->in(1)->Opcode() == Opcodes::Op_CompareAndExchangeS ) ||
2477              (bol->in(1)->Opcode() == Opcodes::Op_CompareAndExchangeI ) ||
2478              (bol->in(1)->Opcode() == Opcodes::Op_CompareAndExchangeL ) ||
2479              (bol->in(1)->Opcode() == Opcodes::Op_CompareAndExchangeP ) ||
2480              (bol->in(1)->Opcode() == Opcodes::Op_CompareAndExchangeN ) ||
2481              (bol->in(1)->Opcode() == Opcodes::Op_WeakCompareAndSwapB ) ||
2482              (bol->in(1)->Opcode() == Opcodes::Op_WeakCompareAndSwapS ) ||
2483              (bol->in(1)->Opcode() == Opcodes::Op_WeakCompareAndSwapI ) ||
2484              (bol->in(1)->Opcode() == Opcodes::Op_WeakCompareAndSwapL ) ||
2485              (bol->in(1)->Opcode() == Opcodes::Op_WeakCompareAndSwapP ) ||
2486              (bol->in(1)->Opcode() == Opcodes::Op_WeakCompareAndSwapN ) ||
2487              (bol->in(1)->Opcode() == Opcodes::Op_CompareAndSwapB ) ||
2488              (bol->in(1)->Opcode() == Opcodes::Op_CompareAndSwapS ) ||
2489              (bol->in(1)->Opcode() == Opcodes::Op_CompareAndSwapI ) ||
2490              (bol->in(1)->Opcode() == Opcodes::Op_CompareAndSwapL ) ||
2491              (bol->in(1)->Opcode() == Opcodes::Op_CompareAndSwapP ) ||
2492              (bol->in(1)->Opcode() == Opcodes::Op_CompareAndSwapN )))
2493           return;               // Allocation loops RARELY take backedge
2494         // Find the OTHER exit path from the IF
2495         Node* ex = iff->proj_out(1-test_con);
2496         float p = iff->_prob;
2497         if( !phase->is_member( this, ex ) && iff->_fcnt == COUNT_UNKNOWN ) {
2498           if( top == Opcodes::Op_IfTrue ) {
2499             if( p < (PROB_FAIR + PROB_UNLIKELY_MAG(3))) {
2500               iff->_prob = PROB_STATIC_FREQUENT;
2501             }
2502           } else {
2503             if( p > (PROB_FAIR - PROB_UNLIKELY_MAG(3))) {
2504               iff->_prob = PROB_STATIC_INFREQUENT;
2505             }
2506           }
2507         }
2508       }
2509     }
2510     test = phase->idom(test);
2511   }
2512 }
2513 
2514 #ifdef ASSERT
2515 static CountedLoopNode* locate_pre_from_main(CountedLoopNode *cl) {
2516   Node *ctrl  = cl->in(LoopNode::EntryControl);
2517   assert(ctrl->Opcode() == Opcodes::Op_IfTrue || ctrl->Opcode() == Opcodes::Op_IfFalse, "");
2518   Node *iffm = ctrl->in(0);
2519   assert(iffm->Opcode() == Opcodes::Op_If, "");
2520   Node *p_f = iffm->in(0);
2521   assert(p_f->Opcode() == Opcodes::Op_IfFalse, "");
2522   CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2523   assert(pre_end->loopnode()->is_pre_loop(), "");
2524   return pre_end->loopnode();
2525 }
2526 #endif
2527 
2528 // Remove the main and post loops and make the pre loop execute all
2529 // iterations. Useful when the pre loop is found empty.
2530 void IdealLoopTree::remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase) {
2531   CountedLoopEndNode* pre_end = cl->loopexit();
2532   Node* pre_cmp = pre_end->cmp_node();
2533   if (pre_cmp->in(2)->Opcode() != Opcodes::Op_Opaque1) {
2534     // Only safe to remove the main loop if the compiler optimized it
2535     // out based on an unknown number of iterations
2536     return;
2537   }
2538 
2539   // Can we find the main loop?
2540   if (_next == NULL) {
2541     return;
2542   }
2543 
2544   Node* next_head = _next->_head;
2545   if (!next_head->is_CountedLoop()) {
2546     return;
2547   }
2548 
2549   CountedLoopNode* main_head = next_head->as_CountedLoop();
2550   if (!main_head->is_main_loop()) {
2551     return;
2552   }
2553 
2554   assert(locate_pre_from_main(main_head) == cl, "bad main loop");
2555   Node* main_iff = main_head->in(LoopNode::EntryControl)->in(0);
2556 
2557   // Remove the Opaque1Node of the pre loop and make it execute all iterations
2558   phase->_igvn.replace_input_of(pre_cmp, 2, pre_cmp->in(2)->in(2));
2559   // Remove the Opaque1Node of the main loop so it can be optimized out
2560   Node* main_cmp = main_iff->in(1)->in(1);
2561   assert(main_cmp->in(2)->Opcode() == Opcodes::Op_Opaque1, "main loop has no opaque node?");
2562   phase->_igvn.replace_input_of(main_cmp, 2, main_cmp->in(2)->in(1));
2563 }
2564 
2565 //------------------------------policy_do_remove_empty_loop--------------------
2566 // Micro-benchmark spamming.  Policy is to always remove empty loops.
2567 // The 'DO' part is to replace the trip counter with the value it will
2568 // have on the last iteration.  This will break the loop.
2569 bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) {
2570   // Minimum size must be empty loop
2571   if (_body.size() > EMPTY_LOOP_SIZE)
2572     return false;
2573 
2574   if (!_head->is_CountedLoop())
2575     return false;     // Dead loop
2576   CountedLoopNode *cl = _head->as_CountedLoop();
2577   if (!cl->is_valid_counted_loop())
2578     return false; // Malformed loop
2579   if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue))))
2580     return false;             // Infinite loop
2581 
2582   if (cl->is_pre_loop()) {
2583     // If the loop we are removing is a pre-loop then the main and
2584     // post loop can be removed as well
2585     remove_main_post_loops(cl, phase);
2586   }
2587 
2588 #ifdef ASSERT
2589   // Ensure only one phi which is the iv.
2590   Node* iv = NULL;
2591   for (DUIterator_Fast imax, i = cl->fast_outs(imax); i < imax; i++) {
2592     Node* n = cl->fast_out(i);
2593     if (n->Opcode() == Opcodes::Op_Phi) {
2594       assert(iv == NULL, "Too many phis" );
2595       iv = n;
2596     }
2597   }
2598   assert(iv == cl->phi(), "Wrong phi" );
2599 #endif
2600 
2601   // main and post loops have explicitly created zero trip guard
2602   bool needs_guard = !cl->is_main_loop() && !cl->is_post_loop();
2603   if (needs_guard) {
2604     // Skip guard if values not overlap.
2605     const TypeInt* init_t = phase->_igvn.type(cl->init_trip())->is_int();
2606     const TypeInt* limit_t = phase->_igvn.type(cl->limit())->is_int();
2607     int  stride_con = cl->stride_con();
2608     if (stride_con > 0) {
2609       needs_guard = (init_t->_hi >= limit_t->_lo);
2610     } else {
2611       needs_guard = (init_t->_lo <= limit_t->_hi);
2612     }
2613   }
2614   if (needs_guard) {
2615     // Check for an obvious zero trip guard.
2616     Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->in(LoopNode::EntryControl));
2617     if (inctrl->Opcode() == Opcodes::Op_IfTrue || inctrl->Opcode() == Opcodes::Op_IfFalse) {
2618       bool maybe_swapped = (inctrl->Opcode() == Opcodes::Op_IfFalse);
2619       // The test should look like just the backedge of a CountedLoop
2620       Node* iff = inctrl->in(0);
2621       if (iff->is_If()) {
2622         Node* bol = iff->in(1);
2623         if (bol->is_Bool()) {
2624           BoolTest test = bol->as_Bool()->_test;
2625           if (maybe_swapped) {
2626             test._test = test.commute();
2627             test._test = test.negate();
2628           }
2629           if (test._test == cl->loopexit()->test_trip()) {
2630             Node* cmp = bol->in(1);
2631             int init_idx = maybe_swapped ? 2 : 1;
2632             int limit_idx = maybe_swapped ? 1 : 2;
2633             if (cmp->is_Cmp() && cmp->in(init_idx) == cl->init_trip() && cmp->in(limit_idx) == cl->limit()) {
2634               needs_guard = false;
2635             }
2636           }
2637         }
2638       }


2911 bool PhaseIdealLoop::match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
2912                                      Node*& shift, Node*& con) {
2913   const char* msg = NULL;
2914   Node* msg_node = NULL;
2915 
2916   store_value = NULL;
2917   con = NULL;
2918   shift = NULL;
2919 
2920   // Process the loop looking for stores.  If there are multiple
2921   // stores or extra control flow give at this point.
2922   CountedLoopNode* head = lpt->_head->as_CountedLoop();
2923   for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) {
2924     Node* n = lpt->_body.at(i);
2925     if (n->outcnt() == 0) continue; // Ignore dead
2926     if (n->is_Store()) {
2927       if (store != NULL) {
2928         msg = "multiple stores";
2929         break;
2930       }
2931       Opcodes opc = n->Opcode();
2932       if (opc == Opcodes::Op_StoreP || opc == Opcodes::Op_StoreN || opc == Opcodes::Op_StoreNKlass || opc == Opcodes::Op_StoreCM) {
2933         msg = "oop fills not handled";
2934         break;
2935       }
2936       Node* value = n->in(MemNode::ValueIn);
2937       if (!lpt->is_invariant(value)) {
2938         msg  = "variant store value";
2939       } else if (!_igvn.type(n->in(MemNode::Address))->isa_aryptr()) {
2940         msg = "not array address";
2941       }
2942       store = n;
2943       store_value = value;
2944     } else if (n->is_If() && n != head->loopexit()) {
2945       msg = "extra control flow";
2946       msg_node = n;
2947     }
2948   }
2949 
2950   if (store == NULL) {
2951     // No store in loop
2952     return false;


2986 #ifndef PRODUCT
2987     if (TraceOptimizeFill) {
2988       tty->print_cr("not fill intrinsic candidate: %s", msg);
2989       if (msg_node != NULL) msg_node->dump();
2990     }
2991 #endif
2992     return false;
2993   }
2994 
2995   // Make sure the address expression can be handled.  It should be
2996   // head->phi * elsize + con.  head->phi might have a ConvI2L(CastII()).
2997   Node* elements[4];
2998   Node* cast = NULL;
2999   Node* conv = NULL;
3000   bool found_index = false;
3001   int count = store->in(MemNode::Address)->as_AddP()->unpack_offsets(elements, ARRAY_SIZE(elements));
3002   for (int e = 0; e < count; e++) {
3003     Node* n = elements[e];
3004     if (n->is_Con() && con == NULL) {
3005       con = n;
3006     } else if (n->Opcode() == Opcodes::Op_LShiftX && shift == NULL) {
3007       Node* value = n->in(1);
3008 #ifdef _LP64
3009       if (value->Opcode() == Opcodes::Op_ConvI2L) {
3010         conv = value;
3011         value = value->in(1);
3012       }
3013       if (value->Opcode() == Opcodes::Op_CastII &&
3014           value->as_CastII()->has_range_check()) {
3015         // Skip range check dependent CastII nodes
3016         cast = value;
3017         value = value->in(1);
3018       }
3019 #endif
3020       if (value != head->phi()) {
3021         msg = "unhandled shift in address";
3022       } else {
3023         if (type2aelembytes(store->as_Mem()->memory_type(), true) != (1 << n->in(2)->get_int())) {
3024           msg = "scale doesn't match";
3025         } else {
3026           found_index = true;
3027           shift = n;
3028         }
3029       }
3030     } else if (n->Opcode() == Opcodes::Op_ConvI2L && conv == NULL) {
3031       conv = n;
3032       n = n->in(1);
3033       if (n->Opcode() == Opcodes::Op_CastII &&
3034           n->as_CastII()->has_range_check()) {
3035         // Skip range check dependent CastII nodes
3036         cast = n;
3037         n = n->in(1);
3038       }
3039       if (n == head->phi()) {
3040         found_index = true;
3041       } else {
3042         msg = "unhandled input to ConvI2L";
3043       }
3044     } else if (n == head->phi()) {
3045       // no shift, check below for allowed cases
3046       found_index = true;
3047     } else {
3048       msg = "unhandled node in address";
3049       msg_node = n;
3050     }
3051   }
3052 
3053   if (count == -1) {


< prev index next >