< prev index next >

src/share/vm/opto/loopTransform.cpp

Print this page




 649 // the loop is a CountedLoop and the body is small enough.
 650 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
 651 
 652   CountedLoopNode *cl = _head->as_CountedLoop();
 653   assert(cl->is_normal_loop() || cl->is_main_loop(), "");
 654 
 655   if (!cl->is_valid_counted_loop())
 656     return false; // Malformed counted loop
 657 
 658   // Protect against over-unrolling.
 659   // After split at least one iteration will be executed in pre-loop.
 660   if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
 661 
 662   _local_loop_unroll_limit = LoopUnrollLimit;
 663   _local_loop_unroll_factor = 4;
 664   int future_unroll_ct = cl->unrolled_count() * 2;
 665   if (!cl->do_unroll_only()) {
 666     if (future_unroll_ct > LoopMaxUnroll) return false;
 667   } else {
 668     // obey user constraints on vector mapped loops with additional unrolling applied
 669     if ((future_unroll_ct / cl->slp_max_unroll()) > LoopMaxUnroll) return false;

 670   }
 671 
 672   // Check for initial stride being a small enough constant
 673   if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false;
 674 
 675   // Don't unroll if the next round of unrolling would push us
 676   // over the expected trip count of the loop.  One is subtracted
 677   // from the expected trip count because the pre-loop normally
 678   // executes 1 iteration.
 679   if (UnrollLimitForProfileCheck > 0 &&
 680       cl->profile_trip_cnt() != COUNT_UNKNOWN &&
 681       future_unroll_ct        > UnrollLimitForProfileCheck &&
 682       (float)future_unroll_ct > cl->profile_trip_cnt() - 1.0) {
 683     return false;
 684   }
 685 
 686   // When unroll count is greater than LoopUnrollMin, don't unroll if:
 687   //   the residual iterations are more than 10% of the trip count
 688   //   and rounds of "unroll,optimize" are not making significant progress
 689   //   Progress defined as current size less than 20% larger than previous size.
 690   if (UseSuperWord && cl->node_count_before_unroll() > 0 &&
 691       future_unroll_ct > LoopUnrollMin &&
 692       (future_unroll_ct - 1) * 10.0 > cl->profile_trip_cnt() &&
 693       1.2 * cl->node_count_before_unroll() < (double)_body.size()) {
 694     return false;
 695   }
 696 
 697   Node *init_n = cl->init_trip();
 698   Node *limit_n = cl->limit();
 699   int stride_con = cl->stride_con();
 700   // Non-constant bounds.
 701   // Protect against over-unrolling when init or/and limit are not constant
 702   // (so that trip_count's init value is maxint) but iv range is known.
 703   if (init_n   == NULL || !init_n->is_Con()  ||
 704       limit_n  == NULL || !limit_n->is_Con()) {
 705     Node* phi = cl->phi();
 706     if (phi != NULL) {
 707       assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi.");
 708       const TypeInt* iv_type = phase->_igvn.type(phi)->is_int();
 709       int next_stride = stride_con * 2; // stride after this unroll
 710       if (next_stride > 0) {
 711         if (iv_type->_lo + next_stride <= iv_type->_lo || // overflow
 712             iv_type->_lo + next_stride >  iv_type->_hi) {


1243 
1244   // Flag main loop
1245   main_head->set_main_loop();
1246   if( peel_only ) main_head->set_main_no_pre_loop();
1247 
1248   // Subtract a trip count for the pre-loop.
1249   main_head->set_trip_count(main_head->trip_count() - 1);
1250 
1251   // It's difficult to be precise about the trip-counts
1252   // for the pre/post loops.  They are usually very short,
1253   // so guess that 4 trips is a reasonable value.
1254   post_head->set_profile_trip_cnt(4.0);
1255   pre_head->set_profile_trip_cnt(4.0);
1256 
1257   // Now force out all loop-invariant dominating tests.  The optimizer
1258   // finds some, but we _know_ they are all useless.
1259   peeled_dom_test_elim(loop,old_new);
1260   loop->record_for_igvn();
1261 }
1262 












































































































































1263 //------------------------------is_invariant-----------------------------
1264 // Return true if n is invariant
1265 bool IdealLoopTree::is_invariant(Node* n) const {
1266   Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n;
1267   if (n_c->is_top()) return false;
1268   return !is_member(_phase->get_loop(n_c));
1269 }
1270 
1271 
1272 //------------------------------do_unroll--------------------------------------
1273 // Unroll the loop body one step - make each trip do 2 iterations.
1274 void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ) {
1275   assert(LoopUnrollLimit, "");
1276   CountedLoopNode *loop_head = loop->_head->as_CountedLoop();
1277   CountedLoopEndNode *loop_end = loop_head->loopexit();
1278   assert(loop_end, "");
1279 #ifndef PRODUCT
1280   if (PrintOpto && VerifyLoopOptimizations) {
1281     tty->print("Unrolling ");
1282     loop->dump_head();


2581 
2582   // If we have any of these conditions (RCE, alignment, unrolling) met, then
2583   // we switch to the pre-/main-/post-loop model.  This model also covers
2584   // peeling.
2585   if (should_rce || should_align || should_unroll) {
2586     if (cl->is_normal_loop())  // Convert to 'pre/main/post' loops
2587       phase->insert_pre_post_loops(this,old_new, !may_rce_align);
2588 
2589     // Adjust the pre- and main-loop limits to let the pre and post loops run
2590     // with full checks, but the main-loop with no checks.  Remove said
2591     // checks from the main body.
2592     if (should_rce)
2593       phase->do_range_check(this,old_new);
2594 
2595     // Double loop body for unrolling.  Adjust the minimum-trip test (will do
2596     // twice as many iterations as before) and the main body limit (only do
2597     // an even number of trips).  If we are peeling, we might enable some RCE
2598     // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
2599     // peeling.
2600     if (should_unroll && !should_peel) {



2601       phase->do_unroll(this, old_new, true);
2602     }
2603 
2604     // Adjust the pre-loop limits to align the main body
2605     // iterations.
2606     if (should_align)
2607       Unimplemented();
2608 
2609   } else {                      // Else we have an unchanged counted loop
2610     if (should_peel)           // Might want to peel but do nothing else
2611       phase->do_peeling(this,old_new);
2612   }
2613   return true;
2614 }
2615 
2616 
2617 //=============================================================================
2618 //------------------------------iteration_split--------------------------------
2619 bool IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) {
2620   // Recursively iteration split nested loops




 649 // the loop is a CountedLoop and the body is small enough.
 650 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
 651 
 652   CountedLoopNode *cl = _head->as_CountedLoop();
 653   assert(cl->is_normal_loop() || cl->is_main_loop(), "");
 654 
 655   if (!cl->is_valid_counted_loop())
 656     return false; // Malformed counted loop
 657 
 658   // Protect against over-unrolling.
 659   // After split at least one iteration will be executed in pre-loop.
 660   if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
 661 
 662   _local_loop_unroll_limit = LoopUnrollLimit;
 663   _local_loop_unroll_factor = 4;
 664   int future_unroll_ct = cl->unrolled_count() * 2;
 665   if (!cl->do_unroll_only()) {
 666     if (future_unroll_ct > LoopMaxUnroll) return false;
 667   } else {
 668     // obey user constraints on vector mapped loops with additional unrolling applied
 669     int unroll_constraint = (cl->slp_max_unroll()) ? cl->slp_max_unroll() : 1;
 670     if ((future_unroll_ct / unroll_constraint) > LoopMaxUnroll) return false;
 671   }
 672 
 673   // Check for initial stride being a small enough constant
 674   if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false;
 675 
 676   // Don't unroll if the next round of unrolling would push us
 677   // over the expected trip count of the loop.  One is subtracted
 678   // from the expected trip count because the pre-loop normally
 679   // executes 1 iteration.
 680   if (UnrollLimitForProfileCheck > 0 &&
 681       cl->profile_trip_cnt() != COUNT_UNKNOWN &&
 682       future_unroll_ct        > UnrollLimitForProfileCheck &&
 683       (float)future_unroll_ct > cl->profile_trip_cnt() - 1.0) {
 684     return false;
 685   }
 686 
 687   // When unroll count is greater than LoopUnrollMin, don't unroll if:
 688   //   the residual iterations are more than 10% of the trip count
 689   //   and rounds of "unroll,optimize" are not making significant progress
 690   //   Progress defined as current size less than 20% larger than previous size.
 691   if (UseSuperWord && cl->node_count_before_unroll() > 0 &&
 692       future_unroll_ct > LoopUnrollMin &&
 693       (future_unroll_ct - 1) * (100 / LoopPercentProfileLimit) > cl->profile_trip_cnt() &&
 694       1.2 * cl->node_count_before_unroll() < (double)_body.size()) {
 695     return false;
 696   }
 697 
 698   Node *init_n = cl->init_trip();
 699   Node *limit_n = cl->limit();
 700   int stride_con = cl->stride_con();
 701   // Non-constant bounds.
 702   // Protect against over-unrolling when init or/and limit are not constant
 703   // (so that trip_count's init value is maxint) but iv range is known.
 704   if (init_n   == NULL || !init_n->is_Con()  ||
 705       limit_n  == NULL || !limit_n->is_Con()) {
 706     Node* phi = cl->phi();
 707     if (phi != NULL) {
 708       assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi.");
 709       const TypeInt* iv_type = phase->_igvn.type(phi)->is_int();
 710       int next_stride = stride_con * 2; // stride after this unroll
 711       if (next_stride > 0) {
 712         if (iv_type->_lo + next_stride <= iv_type->_lo || // overflow
 713             iv_type->_lo + next_stride >  iv_type->_hi) {


1244 
1245   // Flag main loop
1246   main_head->set_main_loop();
1247   if( peel_only ) main_head->set_main_no_pre_loop();
1248 
1249   // Subtract a trip count for the pre-loop.
1250   main_head->set_trip_count(main_head->trip_count() - 1);
1251 
1252   // It's difficult to be precise about the trip-counts
1253   // for the pre/post loops.  They are usually very short,
1254   // so guess that 4 trips is a reasonable value.
1255   post_head->set_profile_trip_cnt(4.0);
1256   pre_head->set_profile_trip_cnt(4.0);
1257 
1258   // Now force out all loop-invariant dominating tests.  The optimizer
1259   // finds some, but we _know_ they are all useless.
1260   peeled_dom_test_elim(loop,old_new);
1261   loop->record_for_igvn();
1262 }
1263 
1264 //------------------------------insert_vector_post_loop------------------------
1265 // Insert a copy of the atomic unrolled vectorized main loop as a post loop,
1266 // unroll_policy has already informed us that more unrolling is about to happen to
1267 // the main loop.  The resultant post loop will serve as a vectorized drain loop.
1268 void PhaseIdealLoop::insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new) {
1269   if (!loop->_head->is_CountedLoop()) return;
1270 
1271   CountedLoopNode *cl = loop->_head->as_CountedLoop();
1272 
1273   // only process vectorized main loops
1274   if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
1275 
1276   int slp_max_unroll_factor = cl->slp_max_unroll();
1277   int cur_unroll = cl->unrolled_count();
1278 
1279   if (slp_max_unroll_factor == 0) return;
1280 
1281   // only process atomic unroll vector loops (not super unrolled after vectorization)
1282   if (cur_unroll != slp_max_unroll_factor) return;
1283 
1284   // we only ever process this one time
1285   if (cl->has_atomic_post_loop()) return;
1286 
1287 #ifndef PRODUCT
1288   if (TraceLoopOpts) {
1289     tty->print("PostVector  ");
1290     loop->dump_head();
1291   }
1292 #endif
1293   C->set_major_progress();
1294 
1295   // Find common pieces of the loop being guarded with pre & post loops
1296   CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1297   CountedLoopEndNode *main_end = main_head->loopexit();
1298   guarantee(main_end != NULL, "no loop exit node");
1299   // diagnostic to show loop end is not properly formed
1300   assert(main_end->outcnt() == 2, "1 true, 1 false path only");
1301   uint dd_main_head = dom_depth(main_head);
1302   uint max = main_head->outcnt();
1303 
1304   // mark this loop as processed
1305   main_head->mark_has_atomic_post_loop();
1306 
1307   Node *pre_header = main_head->in(LoopNode::EntryControl);
1308   Node *init = main_head->init_trip();
1309   Node *incr = main_end->incr();
1310   Node *limit = main_end->limit();
1311   Node *stride = main_end->stride();
1312   Node *cmp = main_end->cmp_node();
1313   BoolTest::mask b_test = main_end->test_trip();
1314 
1315   //------------------------------
1316   // Step A: Create a new post-Loop.
1317   Node* main_exit = main_end->proj_out(false);
1318   assert(main_exit->Opcode() == Op_IfFalse, "");
1319   int dd_main_exit = dom_depth(main_exit);
1320 
1321   // Step A1: Clone the loop body of main.  The clone becomes the vector post-loop.
1322   // The main loop pre-header illegally has 2 control users (old & new loops).
1323   clone_loop(loop, old_new, dd_main_exit);
1324   assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
1325   CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
1326   post_head->set_normal_loop();
1327   post_head->set_post_loop(main_head);
1328 
1329   // Reduce the post-loop trip count.
1330   CountedLoopEndNode* post_end = old_new[main_end->_idx]->as_CountedLoopEnd();
1331   post_end->_prob = PROB_FAIR;
1332 
1333   // Build the main-loop normal exit.
1334   IfFalseNode *new_main_exit = new IfFalseNode(main_end);
1335   _igvn.register_new_node_with_optimizer(new_main_exit);
1336   set_idom(new_main_exit, main_end, dd_main_exit);
1337   set_loop(new_main_exit, loop->_parent);
1338 
1339   // Step A2: Build a zero-trip guard for the vector post-loop.  After leaving the
1340   // main-loop, the vector post-loop may not execute at all.  We 'opaque' the incr
1341   // (the vectorized main-loop trip-counter exit value) because we will be changing
1342   // the exit value (via additional unrolling) so we cannot constant-fold away the zero
1343   // trip guard until all unrolling is done.
1344   Node *zer_opaq = new Opaque1Node(C, incr);
1345   Node *zer_cmp = new CmpINode(zer_opaq, limit);
1346   Node *zer_bol = new BoolNode(zer_cmp, b_test);
1347   register_new_node(zer_opaq, new_main_exit);
1348   register_new_node(zer_cmp, new_main_exit);
1349   register_new_node(zer_bol, new_main_exit);
1350 
1351   // Build the IfNode
1352   IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN);
1353   _igvn.register_new_node_with_optimizer(zer_iff);
1354   set_idom(zer_iff, new_main_exit, dd_main_exit);
1355   set_loop(zer_iff, loop->_parent);
1356 
1357   // Plug in the false-path, taken if we need to skip vector post-loop
1358   _igvn.replace_input_of(main_exit, 0, zer_iff);
1359   set_idom(main_exit, zer_iff, dd_main_exit);
1360   set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
1361   // Make the true-path, must enter the vector post loop
1362   Node *zer_taken = new IfTrueNode(zer_iff);
1363   _igvn.register_new_node_with_optimizer(zer_taken);
1364   set_idom(zer_taken, zer_iff, dd_main_exit);
1365   set_loop(zer_taken, loop->_parent);
1366   // Plug in the true path
1367   _igvn.hash_delete(post_head);
1368   post_head->set_req(LoopNode::EntryControl, zer_taken);
1369   set_idom(post_head, zer_taken, dd_main_exit);
1370 
1371   Arena *a = Thread::current()->resource_area();
1372   VectorSet visited(a);
1373   Node_Stack clones(a, main_head->back_control()->outcnt());
1374   // Step A3: Make the fall-in values to the vector post-loop come from the
1375   // fall-out values of the main-loop.
1376   for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
1377     Node* main_phi = main_head->fast_out(i);
1378     if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0) {
1379       Node *cur_phi = old_new[main_phi->_idx];
1380       Node *fallnew = clone_up_backedge_goo(main_head->back_control(),
1381                                             post_head->init_control(),
1382                                             main_phi->in(LoopNode::LoopBackControl),
1383                                             visited, clones);
1384       _igvn.hash_delete(cur_phi);
1385       cur_phi->set_req(LoopNode::EntryControl, fallnew);
1386     }
1387   }
1388 
1389   // CastII for the new post loop:
1390   bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
1391   assert(inserted, "no castII inserted");
1392 
1393   // It's difficult to be precise about the trip-counts
1394   // for post loops.  They are usually very short,
1395   // so guess that unit vector trips is a reasonable value.
1396   post_head->set_profile_trip_cnt((float)slp_max_unroll_factor);
1397 
1398   // Now force out all loop-invariant dominating tests.  The optimizer
1399   // finds some, but we _know_ they are all useless.
1400   peeled_dom_test_elim(loop, old_new);
1401   loop->record_for_igvn();
1402 }
1403 
1404 //------------------------------is_invariant-----------------------------
1405 // Return true if n is invariant
1406 bool IdealLoopTree::is_invariant(Node* n) const {
1407   Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n;
1408   if (n_c->is_top()) return false;
1409   return !is_member(_phase->get_loop(n_c));
1410 }
1411 
1412 
1413 //------------------------------do_unroll--------------------------------------
1414 // Unroll the loop body one step - make each trip do 2 iterations.
1415 void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ) {
1416   assert(LoopUnrollLimit, "");
1417   CountedLoopNode *loop_head = loop->_head->as_CountedLoop();
1418   CountedLoopEndNode *loop_end = loop_head->loopexit();
1419   assert(loop_end, "");
1420 #ifndef PRODUCT
1421   if (PrintOpto && VerifyLoopOptimizations) {
1422     tty->print("Unrolling ");
1423     loop->dump_head();


2722 
2723   // If we have any of these conditions (RCE, alignment, unrolling) met, then
2724   // we switch to the pre-/main-/post-loop model.  This model also covers
2725   // peeling.
2726   if (should_rce || should_align || should_unroll) {
2727     if (cl->is_normal_loop())  // Convert to 'pre/main/post' loops
2728       phase->insert_pre_post_loops(this,old_new, !may_rce_align);
2729 
2730     // Adjust the pre- and main-loop limits to let the pre and post loops run
2731     // with full checks, but the main-loop with no checks.  Remove said
2732     // checks from the main body.
2733     if (should_rce)
2734       phase->do_range_check(this,old_new);
2735 
2736     // Double loop body for unrolling.  Adjust the minimum-trip test (will do
2737     // twice as many iterations as before) and the main body limit (only do
2738     // an even number of trips).  If we are peeling, we might enable some RCE
2739     // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
2740     // peeling.
2741     if (should_unroll && !should_peel) {
2742       if (SuperWordLoopUnrollAnalysis) {
2743         phase->insert_vector_post_loop(this, old_new);
2744       }
2745       phase->do_unroll(this, old_new, true);
2746     }
2747 
2748     // Adjust the pre-loop limits to align the main body
2749     // iterations.
2750     if (should_align)
2751       Unimplemented();
2752 
2753   } else {                      // Else we have an unchanged counted loop
2754     if (should_peel)           // Might want to peel but do nothing else
2755       phase->do_peeling(this,old_new);
2756   }
2757   return true;
2758 }
2759 
2760 
2761 //=============================================================================
2762 //------------------------------iteration_split--------------------------------
2763 bool IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) {
2764   // Recursively iteration split nested loops


< prev index next >