< prev index next >

src/share/vm/opto/loopTransform.cpp

Print this page




 966 }
 967 
 968 bool PhaseIdealLoop::cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop) {
 969   Node* castii = new CastIINode(incr, TypeInt::INT, true);
 970   castii->set_req(0, ctrl);
 971   register_new_node(castii, ctrl);
 972   for (DUIterator_Fast imax, i = incr->fast_outs(imax); i < imax; i++) {
 973     Node* n = incr->fast_out(i);
 974     if (n->is_Phi() && n->in(0) == loop) {
 975       int nrep = n->replace_edge(incr, castii);
 976       return true;
 977     }
 978   }
 979   return false;
 980 }
 981 
 982 //------------------------------insert_pre_post_loops--------------------------
 983 // Insert pre and post loops.  If peel_only is set, the pre-loop can not have
 984 // more iterations added.  It acts as a 'peel' only, no lower-bound RCE, no
 985 // alignment.  Useful to unroll loops that do no array accesses.
 986 void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ) {
 987 
 988 #ifndef PRODUCT
 989   if (TraceLoopOpts) {
 990     if (peel_only)
 991       tty->print("PeelMainPost ");
 992     else
 993       tty->print("PreMainPost  ");
 994     loop->dump_head();
 995   }
 996 #endif
 997   C->set_major_progress();
 998 
 999   // Find common pieces of the loop being guarded with pre & post loops
1000   CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1001   assert( main_head->is_normal_loop(), "" );
1002   CountedLoopEndNode *main_end = main_head->loopexit();
1003   guarantee(main_end != NULL, "no loop exit node");
1004   assert( main_end->outcnt() == 2, "1 true, 1 false path only" );
1005   uint dd_main_head = dom_depth(main_head);
1006   uint max = main_head->outcnt();
1007 
1008   Node *pre_header= main_head->in(LoopNode::EntryControl);
1009   Node *init      = main_head->init_trip();
1010   Node *incr      = main_end ->incr();
1011   Node *limit     = main_end ->limit();
1012   Node *stride    = main_end ->stride();
1013   Node *cmp       = main_end ->cmp_node();
1014   BoolTest::mask b_test = main_end->test_trip();
1015 
1016   // Need only 1 user of 'bol' because I will be hacking the loop bounds.
1017   Node *bol = main_end->in(CountedLoopEndNode::TestValue);
1018   if( bol->outcnt() != 1 ) {
1019     bol = bol->clone();
1020     register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl));
1021     _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, bol);
1022   }
1023   // Need only 1 user of 'cmp' because I will be hacking the loop bounds.
1024   if( cmp->outcnt() != 1 ) {
1025     cmp = cmp->clone();
1026     register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl));
1027     _igvn.replace_input_of(bol, 1, cmp);
1028   }
1029 
1030   //------------------------------
1031   // Step A: Create Post-Loop.
1032   Node* main_exit = main_end->proj_out(false);
1033   assert( main_exit->Opcode() == Op_IfFalse, "" );
1034   int dd_main_exit = dom_depth(main_exit);
1035 
1036   // Step A1: Clone the loop body.  The clone becomes the post-loop.  The main
1037   // loop pre-header illegally has 2 control users (old & new loops).
1038   clone_loop( loop, old_new, dd_main_exit );
1039   assert( old_new[main_end ->_idx]->Opcode() == Op_CountedLoopEnd, "" );
1040   CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
1041   post_head->set_post_loop(main_head);
1042 
1043   // Reduce the post-loop trip count.
1044   CountedLoopEndNode* post_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
1045   post_end->_prob = PROB_FAIR;
1046 
1047   // Build the main-loop normal exit.
1048   IfFalseNode *new_main_exit = new IfFalseNode(main_end);
1049   _igvn.register_new_node_with_optimizer( new_main_exit );
1050   set_idom(new_main_exit, main_end, dd_main_exit );
1051   set_loop(new_main_exit, loop->_parent);
1052 
1053   // Step A2: Build a zero-trip guard for the post-loop.  After leaving the
1054   // main-loop, the post-loop may not execute at all.  We 'opaque' the incr
1055   // (the main-loop trip-counter exit value) because we will be changing
1056   // the exit value (via unrolling) so we cannot constant-fold away the zero
1057   // trip guard until all unrolling is done.
1058   Node *zer_opaq = new Opaque1Node(C, incr);
1059   Node *zer_cmp  = new CmpINode( zer_opaq, limit );
1060   Node *zer_bol  = new BoolNode( zer_cmp, b_test );
1061   register_new_node( zer_opaq, new_main_exit );
1062   register_new_node( zer_cmp , new_main_exit );
1063   register_new_node( zer_bol , new_main_exit );
1064 
1065   // Build the IfNode
1066   IfNode *zer_iff = new IfNode( new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN );
1067   _igvn.register_new_node_with_optimizer( zer_iff );
1068   set_idom(zer_iff, new_main_exit, dd_main_exit);
1069   set_loop(zer_iff, loop->_parent);
1070 
1071   // Plug in the false-path, taken if we need to skip post-loop
1072   _igvn.replace_input_of(main_exit, 0, zer_iff);
1073   set_idom(main_exit, zer_iff, dd_main_exit);
1074   set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
1075   // Make the true-path, must enter the post loop
1076   Node *zer_taken = new IfTrueNode( zer_iff );
1077   _igvn.register_new_node_with_optimizer( zer_taken );
1078   set_idom(zer_taken, zer_iff, dd_main_exit);
1079   set_loop(zer_taken, loop->_parent);
1080   // Plug in the true path
1081   _igvn.hash_delete( post_head );
1082   post_head->set_req(LoopNode::EntryControl, zer_taken);
1083   set_idom(post_head, zer_taken, dd_main_exit);
1084 
1085   Arena *a = Thread::current()->resource_area();
1086   VectorSet visited(a);
1087   Node_Stack clones(a, main_head->back_control()->outcnt());
1088   // Step A3: Make the fall-in values to the post-loop come from the
1089   // fall-out values of the main-loop.
1090   for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
1091     Node* main_phi = main_head->fast_out(i);
1092     if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0 ) {
1093       Node *post_phi = old_new[main_phi->_idx];
1094       Node *fallmain  = clone_up_backedge_goo(main_head->back_control(),
1095                                               post_head->init_control(),
1096                                               main_phi->in(LoopNode::LoopBackControl),
1097                                               visited, clones);
1098       _igvn.hash_delete(post_phi);
1099       post_phi->set_req( LoopNode::EntryControl, fallmain );
1100     }
1101   }
1102 
1103   // Update local caches for next stanza
1104   main_exit = new_main_exit;
1105 
1106 
1107   //------------------------------
1108   // Step B: Create Pre-Loop.
1109 
1110   // Step B1: Clone the loop body.  The clone becomes the pre-loop.  The main
1111   // loop pre-header illegally has 2 control users (old & new loops).
1112   clone_loop( loop, old_new, dd_main_head );
1113   CountedLoopNode*    pre_head = old_new[main_head->_idx]->as_CountedLoop();
1114   CountedLoopEndNode* pre_end  = old_new[main_end ->_idx]->as_CountedLoopEnd();
1115   pre_head->set_pre_loop(main_head);
1116   Node *pre_incr = old_new[incr->_idx];
1117 
1118   // Reduce the pre-loop trip count.
1119   pre_end->_prob = PROB_FAIR;
1120 
1121   // Find the pre-loop normal exit.
1122   Node* pre_exit = pre_end->proj_out(false);
1123   assert( pre_exit->Opcode() == Op_IfFalse, "" );
1124   IfFalseNode *new_pre_exit = new IfFalseNode(pre_end);
1125   _igvn.register_new_node_with_optimizer( new_pre_exit );
1126   set_idom(new_pre_exit, pre_end, dd_main_head);
1127   set_loop(new_pre_exit, loop->_parent);
1128 
1129   // Step B2: Build a zero-trip guard for the main-loop.  After leaving the
1130   // pre-loop, the main-loop may not execute at all.  Later in life this
1131   // zero-trip guard will become the minimum-trip guard when we unroll
1132   // the main-loop.
1133   Node *min_opaq = new Opaque1Node(C, limit);
1134   Node *min_cmp  = new CmpINode( pre_incr, min_opaq );
1135   Node *min_bol  = new BoolNode( min_cmp, b_test );
1136   register_new_node( min_opaq, new_pre_exit );
1137   register_new_node( min_cmp , new_pre_exit );
1138   register_new_node( min_bol , new_pre_exit );
1139 
1140   // Build the IfNode (assume the main-loop is executed always).
1141   IfNode *min_iff = new IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
1142   _igvn.register_new_node_with_optimizer( min_iff );
1143   set_idom(min_iff, new_pre_exit, dd_main_head);
1144   set_loop(min_iff, loop->_parent);
1145 
1146   // Plug in the false-path, taken if we need to skip main-loop
1147   _igvn.hash_delete( pre_exit );
1148   pre_exit->set_req(0, min_iff);
1149   set_idom(pre_exit, min_iff, dd_main_head);
1150   set_idom(pre_exit->unique_out(), min_iff, dd_main_head);
1151   // Make the true-path, must enter the main loop
1152   Node *min_taken = new IfTrueNode( min_iff );
1153   _igvn.register_new_node_with_optimizer( min_taken );
1154   set_idom(min_taken, min_iff, dd_main_head);
1155   set_loop(min_taken, loop->_parent);
1156   // Plug in the true path
1157   _igvn.hash_delete( main_head );
1158   main_head->set_req(LoopNode::EntryControl, min_taken);
1159   set_idom(main_head, min_taken, dd_main_head);
1160 
1161   visited.Clear();
1162   clones.clear();

1163   // Step B3: Make the fall-in values to the main-loop come from the
1164   // fall-out values of the pre-loop.
1165   for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) {
1166     Node* main_phi = main_head->fast_out(i2);
1167     if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) {
1168       Node *pre_phi = old_new[main_phi->_idx];
1169       Node *fallpre  = clone_up_backedge_goo(pre_head->back_control(),
1170                                              main_head->init_control(),
1171                                              pre_phi->in(LoopNode::LoopBackControl),
1172                                              visited, clones);
1173       _igvn.hash_delete(main_phi);
1174       main_phi->set_req( LoopNode::EntryControl, fallpre );
1175     }
1176   }
1177 
1178   // Nodes inside the loop may be control dependent on a predicate
1179   // that was moved before the preloop. If the back branch of the main
1180   // or post loops becomes dead, those nodes won't be dependent on the
1181   // test that guards that loop nest anymore which could lead to an
1182   // incorrect array access because it executes independently of the
1183   // test that was guarding the loop nest. We add a special CastII on
1184   // the if branch that enters the loop, between the input induction
1185   // variable value and the induction variable Phi to preserve correct
1186   // dependencies.
1187 
1188   // CastII for the post loop:
1189   bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
1190   assert(inserted, "no castII inserted");
1191 
1192   // CastII for the main loop:
1193   inserted = cast_incr_before_loop(pre_incr, min_taken, main_head);
1194   assert(inserted, "no castII inserted");
1195 
1196   // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
1197   // RCE and alignment may change this later.
1198   Node *cmp_end = pre_end->cmp_node();
1199   assert( cmp_end->in(2) == limit, "" );
1200   Node *pre_limit = new AddINode( init, stride );
1201 
1202   // Save the original loop limit in this Opaque1 node for
1203   // use by range check elimination.
1204   Node *pre_opaq  = new Opaque1Node(C, pre_limit, limit);
1205 
1206   register_new_node( pre_limit, pre_head->in(0) );
1207   register_new_node( pre_opaq , pre_head->in(0) );
1208 
1209   // Since no other users of pre-loop compare, I can hack limit directly
1210   assert( cmp_end->outcnt() == 1, "no other users" );
1211   _igvn.hash_delete(cmp_end);
1212   cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq);
1213 
1214   // Special case for not-equal loop bounds:
1215   // Change pre loop test, main loop test, and the
1216   // main loop guard test to use lt or gt depending on stride
1217   // direction:
1218   // positive stride use <
1219   // negative stride use >
1220   //
1221   // not-equal test is kept for post loop to handle case
1222   // when init > limit when stride > 0 (and reverse).
1223 
1224   if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) {
1225 
1226     BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt;
1227     // Modify pre loop end condition
1228     Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1229     BoolNode* new_bol0 = new BoolNode(pre_bol->in(1), new_test);
1230     register_new_node( new_bol0, pre_head->in(0) );
1231     _igvn.replace_input_of(pre_end, CountedLoopEndNode::TestValue, new_bol0);
1232     // Modify main loop guard condition
1233     assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay");
1234     BoolNode* new_bol1 = new BoolNode(min_bol->in(1), new_test);
1235     register_new_node( new_bol1, new_pre_exit );
1236     _igvn.hash_delete(min_iff);
1237     min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1);
1238     // Modify main loop end condition
1239     BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1240     BoolNode* new_bol2 = new BoolNode(main_bol->in(1), new_test);
1241     register_new_node( new_bol2, main_end->in(CountedLoopEndNode::TestControl) );
1242     _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, new_bol2);
1243   }
1244 
1245   // Flag main loop
1246   main_head->set_main_loop();
1247   if( peel_only ) main_head->set_main_no_pre_loop();
1248 
1249   // Subtract a trip count for the pre-loop.
1250   main_head->set_trip_count(main_head->trip_count() - 1);
1251 
1252   // It's difficult to be precise about the trip-counts
1253   // for the pre/post loops.  They are usually very short,
1254   // so guess that 4 trips is a reasonable value.
1255   post_head->set_profile_trip_cnt(4.0);
1256   pre_head->set_profile_trip_cnt(4.0);
1257 
1258   // Now force out all loop-invariant dominating tests.  The optimizer
1259   // finds some, but we _know_ they are all useless.
1260   peeled_dom_test_elim(loop,old_new);
1261   loop->record_for_igvn();
1262 }
1263 
1264 //------------------------------insert_vector_post_loop------------------------
1265 // Insert a copy of the atomic unrolled vectorized main loop as a post loop,
1266 // unroll_policy has already informed us that more unrolling is about to happen to
1267 // the main loop.  The resultant post loop will serve as a vectorized drain loop.
1268 void PhaseIdealLoop::insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new) {
1269   if (!loop->_head->is_CountedLoop()) return;
1270 
1271   CountedLoopNode *cl = loop->_head->as_CountedLoop();
1272 
1273   // only process vectorized main loops
1274   if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
1275 


1281   // only process atomic unroll vector loops (not super unrolled after vectorization)
1282   if (cur_unroll != slp_max_unroll_factor) return;
1283 
1284   // we only ever process this one time
1285   if (cl->has_atomic_post_loop()) return;
1286 
1287 #ifndef PRODUCT
1288   if (TraceLoopOpts) {
1289     tty->print("PostVector  ");
1290     loop->dump_head();
1291   }
1292 #endif
1293   C->set_major_progress();
1294 
1295   // Find common pieces of the loop being guarded with pre & post loops
1296   CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1297   CountedLoopEndNode *main_end = main_head->loopexit();
1298   guarantee(main_end != NULL, "no loop exit node");
1299   // diagnostic to show loop end is not properly formed
1300   assert(main_end->outcnt() == 2, "1 true, 1 false path only");
1301   uint dd_main_head = dom_depth(main_head);
1302   uint max = main_head->outcnt();
1303 
1304   // mark this loop as processed
1305   main_head->mark_has_atomic_post_loop();
1306 
1307   Node *pre_header = main_head->in(LoopNode::EntryControl);
1308   Node *init = main_head->init_trip();
1309   Node *incr = main_end->incr();
1310   Node *limit = main_end->limit();
1311   Node *stride = main_end->stride();
1312   Node *cmp = main_end->cmp_node();
1313   BoolTest::mask b_test = main_end->test_trip();


































































1314 
1315   //------------------------------
1316   // Step A: Create a new post-Loop.
1317   Node* main_exit = main_end->proj_out(false);
1318   assert(main_exit->Opcode() == Op_IfFalse, "");
1319   int dd_main_exit = dom_depth(main_exit);
1320 
1321   // Step A1: Clone the loop body of main.  The clone becomes the vector post-loop.
1322   // The main loop pre-header illegally has 2 control users (old & new loops).
1323   clone_loop(loop, old_new, dd_main_exit);
1324   assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
1325   CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
1326   post_head->set_normal_loop();
1327   post_head->set_post_loop(main_head);

1328 
1329   // Reduce the post-loop trip count.
1330   CountedLoopEndNode* post_end = old_new[main_end->_idx]->as_CountedLoopEnd();
1331   post_end->_prob = PROB_FAIR;
1332 
1333   // Build the main-loop normal exit.
1334   IfFalseNode *new_main_exit = new IfFalseNode(main_end);
1335   _igvn.register_new_node_with_optimizer(new_main_exit);
1336   set_idom(new_main_exit, main_end, dd_main_exit);
1337   set_loop(new_main_exit, loop->_parent);
1338 
1339   // Step A2: Build a zero-trip guard for the vector post-loop.  After leaving the
1340   // main-loop, the vector post-loop may not execute at all.  We 'opaque' the incr
1341   // (the vectorized main-loop trip-counter exit value) because we will be changing
1342   // the exit value (via additional unrolling) so we cannot constant-fold away the zero
1343   // trip guard until all unrolling is done.
1344   Node *zer_opaq = new Opaque1Node(C, incr);
1345   Node *zer_cmp = new CmpINode(zer_opaq, limit);
1346   Node *zer_bol = new BoolNode(zer_cmp, b_test);
1347   register_new_node(zer_opaq, new_main_exit);
1348   register_new_node(zer_cmp, new_main_exit);
1349   register_new_node(zer_bol, new_main_exit);
1350 
1351   // Build the IfNode
1352   IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN);
1353   _igvn.register_new_node_with_optimizer(zer_iff);
1354   set_idom(zer_iff, new_main_exit, dd_main_exit);
1355   set_loop(zer_iff, loop->_parent);
1356 
1357   // Plug in the false-path, taken if we need to skip vector post-loop
1358   _igvn.replace_input_of(main_exit, 0, zer_iff);
1359   set_idom(main_exit, zer_iff, dd_main_exit);
1360   set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
1361   // Make the true-path, must enter the vector post loop
1362   Node *zer_taken = new IfTrueNode(zer_iff);
1363   _igvn.register_new_node_with_optimizer(zer_taken);
1364   set_idom(zer_taken, zer_iff, dd_main_exit);
1365   set_loop(zer_taken, loop->_parent);
1366   // Plug in the true path
1367   _igvn.hash_delete(post_head);
1368   post_head->set_req(LoopNode::EntryControl, zer_taken);
1369   set_idom(post_head, zer_taken, dd_main_exit);
1370 
1371   Arena *a = Thread::current()->resource_area();
1372   VectorSet visited(a);
1373   Node_Stack clones(a, main_head->back_control()->outcnt());
1374   // Step A3: Make the fall-in values to the vector post-loop come from the
1375   // fall-out values of the main-loop.
1376   for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
1377     Node* main_phi = main_head->fast_out(i);
1378     if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0) {
1379       Node *cur_phi = old_new[main_phi->_idx];
1380       Node *fallnew = clone_up_backedge_goo(main_head->back_control(),
1381                                             post_head->init_control(),
1382                                             main_phi->in(LoopNode::LoopBackControl),
1383                                             visited, clones);
1384       _igvn.hash_delete(cur_phi);
1385       cur_phi->set_req(LoopNode::EntryControl, fallnew);
1386     }
1387   }
1388 
1389   // CastII for the new post loop:
1390   bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
1391   assert(inserted, "no castII inserted");
1392 
1393   // It's difficult to be precise about the trip-counts
1394   // for post loops.  They are usually very short,
1395   // so guess that unit vector trips is a reasonable value.
1396   post_head->set_profile_trip_cnt((float)slp_max_unroll_factor);
1397 
1398   // Now force out all loop-invariant dominating tests.  The optimizer
1399   // finds some, but we _know_ they are all useless.
1400   peeled_dom_test_elim(loop, old_new);
1401   loop->record_for_igvn();
1402 }
1403 

1404 //------------------------------is_invariant-----------------------------
1405 // Return true if n is invariant
1406 bool IdealLoopTree::is_invariant(Node* n) const {
1407   Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n;
1408   if (n_c->is_top()) return false;
1409   return !is_member(_phase->get_loop(n_c));
1410 }
1411 
1412 
1413 //------------------------------do_unroll--------------------------------------
1414 // Unroll the loop body one step - make each trip do 2 iterations.
1415 void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ) {
1416   assert(LoopUnrollLimit, "");
1417   CountedLoopNode *loop_head = loop->_head->as_CountedLoop();
1418   CountedLoopEndNode *loop_end = loop_head->loopexit();
1419   assert(loop_end, "");
1420 #ifndef PRODUCT
1421   if (PrintOpto && VerifyLoopOptimizations) {
1422     tty->print("Unrolling ");
1423     loop->dump_head();


2080         Node *ctrl_off = get_ctrl(exp->in(2));
2081         Node* offset = new SubINode(zero, exp->in(2));
2082         register_new_node(offset, ctrl_off);
2083         *p_offset = offset;
2084       }
2085       return true;
2086     }
2087     if (is_scaled_iv(exp->in(2), iv, p_scale)) {
2088       if (p_offset != NULL) {
2089         *p_scale *= -1;
2090         *p_offset = exp->in(1);
2091       }
2092       return true;
2093     }
2094   }
2095   return false;
2096 }
2097 
2098 //------------------------------do_range_check---------------------------------
2099 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
2100 void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
2101 #ifndef PRODUCT
2102   if (PrintOpto && VerifyLoopOptimizations) {
2103     tty->print("Range Check Elimination ");
2104     loop->dump_head();
2105   } else if (TraceLoopOpts) {
2106     tty->print("RangeCheck   ");
2107     loop->dump_head();
2108   }
2109 #endif
2110   assert(RangeCheckElimination, "");
2111   CountedLoopNode *cl = loop->_head->as_CountedLoop();
2112   assert(cl->is_main_loop(), "");
2113 
2114   // protect against stride not being a constant
2115   if (!cl->stride_is_con())
2116     return;
2117 
2118   // Find the trip counter; we are iteration splitting based on it
2119   Node *trip_counter = cl->phi();
2120   // Find the main loop limit; we will trim it's iterations


2124   // Need to find the main-loop zero-trip guard
2125   Node *ctrl  = cl->in(LoopNode::EntryControl);
2126   assert(ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "");
2127   Node *iffm = ctrl->in(0);
2128   assert(iffm->Opcode() == Op_If, "");
2129   Node *bolzm = iffm->in(1);
2130   assert(bolzm->Opcode() == Op_Bool, "");
2131   Node *cmpzm = bolzm->in(1);
2132   assert(cmpzm->is_Cmp(), "");
2133   Node *opqzm = cmpzm->in(2);
2134   // Can not optimize a loop if zero-trip Opaque1 node is optimized
2135   // away and then another round of loop opts attempted.
2136   if (opqzm->Opcode() != Op_Opaque1)
2137     return;
2138   assert(opqzm->in(1) == main_limit, "do not understand situation");
2139 
2140   // Find the pre-loop limit; we will expand its iterations to
2141   // not ever trip low tests.
2142   Node *p_f = iffm->in(0);
2143   // pre loop may have been optimized out
2144   if (p_f->Opcode() != Op_IfFalse) {
2145     return;
2146   }
2147   CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2148   assert(pre_end->loopnode()->is_pre_loop(), "");
2149   Node *pre_opaq1 = pre_end->limit();
2150   // Occasionally it's possible for a pre-loop Opaque1 node to be
2151   // optimized away and then another round of loop opts attempted.
2152   // We can not optimize this particular loop in that case.
2153   if (pre_opaq1->Opcode() != Op_Opaque1)
2154     return;
2155   Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
2156   Node *pre_limit = pre_opaq->in(1);
2157 
2158   // Where do we put new limit calculations
2159   Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
2160 
2161   // Ensure the original loop limit is available from the
2162   // pre-loop Opaque1 node.
2163   Node *orig_limit = pre_opaq->original_loop_limit();
2164   if (orig_limit == NULL || _igvn.type(orig_limit) == Type::TOP)
2165     return;
2166 


2168 
2169   int stride_con = cl->stride_con();
2170   Node *zero = _igvn.intcon(0);
2171   Node *one  = _igvn.intcon(1);
2172   // Use symmetrical int range [-max_jint,max_jint]
2173   Node *mini = _igvn.intcon(-max_jint);
2174   set_ctrl(zero, C->root());
2175   set_ctrl(one,  C->root());
2176   set_ctrl(mini, C->root());
2177 
2178   // Range checks that do not dominate the loop backedge (ie.
2179   // conditionally executed) can lengthen the pre loop limit beyond
2180   // the original loop limit. To prevent this, the pre limit is
2181   // (for stride > 0) MINed with the original loop limit (MAXed
2182   // stride < 0) when some range_check (rc) is conditionally
2183   // executed.
2184   bool conditional_rc = false;
2185 
2186   // Check loop body for tests of trip-counter plus loop-invariant vs
2187   // loop-invariant.
2188   for( uint i = 0; i < loop->_body.size(); i++ ) {
2189     Node *iff = loop->_body[i];
2190     if (iff->Opcode() == Op_If ||
2191         iff->Opcode() == Op_RangeCheck) { // Test?
2192       // Test is an IfNode, has 2 projections.  If BOTH are in the loop
2193       // we need loop unswitching instead of iteration splitting.

2194       Node *exit = loop->is_loop_exit(iff);
2195       if( !exit ) continue;
2196       int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0;
2197 
2198       // Get boolean condition to test
2199       Node *i1 = iff->in(1);
2200       if( !i1->is_Bool() ) continue;
2201       BoolNode *bol = i1->as_Bool();
2202       BoolTest b_test = bol->_test;
2203       // Flip sense of test if exit condition is flipped
2204       if( flip )
2205         b_test = b_test.negate();
2206 
2207       // Get compare
2208       Node *cmp = bol->in(1);
2209 
2210       // Look for trip_counter + offset vs limit
2211       Node *rc_exp = cmp->in(1);
2212       Node *limit  = cmp->in(2);
2213       jint scale_con= 1;        // Assume trip counter not scaled
2214 
2215       Node *limit_c = get_ctrl(limit);
2216       if( loop->is_member(get_loop(limit_c) ) ) {
2217         // Compare might have operands swapped; commute them
2218         b_test = b_test.commute();
2219         rc_exp = cmp->in(2);
2220         limit  = cmp->in(1);
2221         limit_c = get_ctrl(limit);
2222         if( loop->is_member(get_loop(limit_c) ) )
2223           continue;             // Both inputs are loop varying; cannot RCE
2224       }
2225       // Here we know 'limit' is loop invariant
2226 
2227       // 'limit' maybe pinned below the zero trip test (probably from a
2228       // previous round of rce), in which case, it can't be used in the
2229       // zero trip test expression which must occur before the zero test's if.
2230       if( limit_c == ctrl ) {
2231         continue;  // Don't rce this check but continue looking for other candidates.
2232       }
2233 
2234       // Check for scaled induction variable plus an offset
2235       Node *offset = NULL;
2236 
2237       if (!is_scaled_iv_plus_offset(rc_exp, trip_counter, &scale_con, &offset)) {
2238         continue;
2239       }
2240 
2241       Node *offset_c = get_ctrl(offset);
2242       if( loop->is_member( get_loop(offset_c) ) )
2243         continue;               // Offset is not really loop invariant
2244       // Here we know 'offset' is loop invariant.
2245 
2246       // As above for the 'limit', the 'offset' maybe pinned below the
2247       // zero trip test.
2248       if( offset_c == ctrl ) {
2249         continue; // Don't rce this check but continue looking for other candidates.
2250       }
2251 #ifdef ASSERT
2252       if (TraceRangeLimitCheck) {
2253         tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2254         bol->dump(2);
2255       }
2256 #endif
2257       // At this point we have the expression as:
2258       //   scale_con * trip_counter + offset :: limit
2259       // where scale_con, offset and limit are loop invariant.  Trip_counter
2260       // monotonically increases by stride_con, a constant.  Both (or either)
2261       // stride_con and scale_con can be negative which will flip about the
2262       // sense of the test.
2263 
2264       // Adjust pre and main loop limits to guard the correct iteration set
2265       if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
2266         if( b_test._test == BoolTest::lt ) { // Range checks always use lt
2267           // The underflow and overflow limits: 0 <= scale*I+offset < limit
2268           add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
2269           if (!conditional_rc) {
2270             // (0-offset)/scale could be outside of loop iterations range.
2271             conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
2272           }
2273         } else {
2274           if (PrintOpto) {
2275             tty->print_cr("missed RCE opportunity");
2276           }
2277           continue;             // In release mode, ignore it
2278         }
2279       } else {                  // Otherwise work on normal compares
2280         switch( b_test._test ) {
2281         case BoolTest::gt:
2282           // Fall into GE case
2283         case BoolTest::ge:
2284           // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2285           scale_con = -scale_con;
2286           offset = new SubINode( zero, offset );
2287           register_new_node( offset, pre_ctrl );
2288           limit  = new SubINode( zero, limit  );
2289           register_new_node( limit, pre_ctrl );
2290           // Fall into LE case
2291         case BoolTest::le:
2292           if (b_test._test != BoolTest::gt) {
2293             // Convert X <= Y to X < Y+1
2294             limit = new AddINode( limit, one );
2295             register_new_node( limit, pre_ctrl );
2296           }
2297           // Fall into LT case
2298         case BoolTest::lt:
2299           // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
2300           // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
2301           // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
2302           add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit );
2303           if (!conditional_rc) {
2304             // ((MIN_INT+1)-offset)/scale could be outside of loop iterations range.
2305             // Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could
2306             // still be outside of loop range.
2307             conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
2308           }
2309           break;
2310         default:
2311           if (PrintOpto) {
2312             tty->print_cr("missed RCE opportunity");
2313           }
2314           continue;             // Unhandled case
2315         }
2316       }
2317 
2318       // Kill the eliminated test
2319       C->set_major_progress();
2320       Node *kill_con = _igvn.intcon( 1-flip );
2321       set_ctrl(kill_con, C->root());
2322       _igvn.replace_input_of(iff, 1, kill_con);
2323       // Find surviving projection
2324       assert(iff->is_If(), "");
2325       ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
2326       // Find loads off the surviving projection; remove their control edge
2327       for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
2328         Node* cd = dp->fast_out(i); // Control-dependent node
2329         if (cd->is_Load() && cd->depends_only_on_test()) {   // Loads can now float around in the loop
2330           // Allow the load to float around in the loop, or before it
2331           // but NOT before the pre-loop.
2332           _igvn.replace_input_of(cd, 0, ctrl); // ctrl, not NULL
2333           --i;
2334           --imax;
2335         }
2336       }

2337 
2338     } // End of is IF
2339 
2340   }
2341 
2342   // Update loop limits
2343   if (conditional_rc) {
2344     pre_limit = (stride_con > 0) ? (Node*)new MinINode(pre_limit, orig_limit)
2345                                  : (Node*)new MaxINode(pre_limit, orig_limit);
2346     register_new_node(pre_limit, pre_ctrl);
2347   }
2348   _igvn.replace_input_of(pre_opaq, 1, pre_limit);
2349 
2350   // Note:: we are making the main loop limit no longer precise;
2351   // need to round up based on stride.
2352   cl->set_nonexact_trip_count();
2353   if (!LoopLimitCheck && stride_con != 1 && stride_con != -1) { // Cutout for common case
2354     // "Standard" round-up logic:  ([main_limit-init+(y-1)]/y)*y+init
2355     // Hopefully, compiler will optimize for powers of 2.
2356     Node *ctrl = get_ctrl(main_limit);
2357     Node *stride = cl->stride();
2358     Node *init = cl->init_trip()->uncast();
2359     Node *span = new SubINode(main_limit,init);
2360     register_new_node(span,ctrl);
2361     Node *rndup = _igvn.intcon(stride_con + ((stride_con>0)?-1:1));
2362     Node *add = new AddINode(span,rndup);
2363     register_new_node(add,ctrl);
2364     Node *div = new DivINode(0,add,stride);
2365     register_new_node(div,ctrl);
2366     Node *mul = new MulINode(div,stride);
2367     register_new_node(mul,ctrl);
2368     Node *newlim = new AddINode(mul,init);
2369     register_new_node(newlim,ctrl);
2370     main_limit = newlim;
2371   }
2372 
2373   Node *main_cle = cl->loopexit();
2374   Node *main_bol = main_cle->in(1);
2375   // Hacking loop bounds; need private copies of exit test
2376   if( main_bol->outcnt() > 1 ) {// BoolNode shared?
2377     main_bol = main_bol->clone();// Clone a private BoolNode
2378     register_new_node( main_bol, main_cle->in(0) );
2379     _igvn.replace_input_of(main_cle, 1, main_bol);
2380   }
2381   Node *main_cmp = main_bol->in(1);
2382   if( main_cmp->outcnt() > 1 ) { // CmpNode shared?
2383     main_cmp = main_cmp->clone();// Clone a private CmpNode
2384     register_new_node( main_cmp, main_cle->in(0) );
2385     _igvn.replace_input_of(main_bol, 1, main_cmp);
2386   }
2387   // Hack the now-private loop bounds
2388   _igvn.replace_input_of(main_cmp, 2, main_limit);
2389   // The OpaqueNode is unshared by design
2390   assert( opqzm->outcnt() == 1, "cannot hack shared node" );
2391   _igvn.replace_input_of(opqzm, 1, main_limit);
2392 }
2393 
































































































































































2394 //------------------------------DCE_loop_body----------------------------------
2395 // Remove simplistic dead code from loop body
2396 void IdealLoopTree::DCE_loop_body() {
2397   for( uint i = 0; i < _body.size(); i++ )
2398     if( _body.at(i)->outcnt() == 0 )
2399       _body.map( i--, _body.pop() );
2400 }
2401 
2402 
2403 //------------------------------adjust_loop_exit_prob--------------------------
2404 // Look for loop-exit tests with the 50/50 (or worse) guesses from the parsing stage.
2405 // Replace with a 1-in-10 exit guess.
2406 void IdealLoopTree::adjust_loop_exit_prob( PhaseIdealLoop *phase ) {
2407   Node *test = tail();
2408   while( test != _head ) {
2409     uint top = test->Opcode();
2410     if( top == Op_IfTrue || top == Op_IfFalse ) {
2411       int test_con = ((ProjNode*)test)->_con;
2412       assert(top == (uint)(test_con? Op_IfTrue: Op_IfFalse), "sanity");
2413       IfNode *iff = test->in(0)->as_If();


2731   bool should_align = policy_align(phase);
2732 
2733   // If not RCE'ing (iteration splitting) or Aligning, then we do not
2734   // need a pre-loop.  We may still need to peel an initial iteration but
2735   // we will not be needing an unknown number of pre-iterations.
2736   //
2737   // Basically, if may_rce_align reports FALSE first time through,
2738   // we will not be able to later do RCE or Aligning on this loop.
2739   bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align;
2740 
2741   // If we have any of these conditions (RCE, alignment, unrolling) met, then
2742   // we switch to the pre-/main-/post-loop model.  This model also covers
2743   // peeling.
2744   if (should_rce || should_align || should_unroll) {
2745     if (cl->is_normal_loop())  // Convert to 'pre/main/post' loops
2746       phase->insert_pre_post_loops(this,old_new, !may_rce_align);
2747 
2748     // Adjust the pre- and main-loop limits to let the pre and post loops run
2749     // with full checks, but the main-loop with no checks.  Remove said
2750     // checks from the main body.
2751     if (should_rce)
2752       phase->do_range_check(this,old_new);











2753 
2754     // Double loop body for unrolling.  Adjust the minimum-trip test (will do
2755     // twice as many iterations as before) and the main body limit (only do
2756     // an even number of trips).  If we are peeling, we might enable some RCE
2757     // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
2758     // peeling.
2759     if (should_unroll && !should_peel) {
2760       if (SuperWordLoopUnrollAnalysis) {
2761         phase->insert_vector_post_loop(this, old_new);
2762       }
2763       phase->do_unroll(this, old_new, true);
2764     }
2765 
2766     // Adjust the pre-loop limits to align the main body
2767     // iterations.
2768     if (should_align)
2769       Unimplemented();
2770 
2771   } else {                      // Else we have an unchanged counted loop
2772     if (should_peel)           // Might want to peel but do nothing else




 966 }
 967 
 968 bool PhaseIdealLoop::cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop) {
 969   Node* castii = new CastIINode(incr, TypeInt::INT, true);
 970   castii->set_req(0, ctrl);
 971   register_new_node(castii, ctrl);
 972   for (DUIterator_Fast imax, i = incr->fast_outs(imax); i < imax; i++) {
 973     Node* n = incr->fast_out(i);
 974     if (n->is_Phi() && n->in(0) == loop) {
 975       int nrep = n->replace_edge(incr, castii);
 976       return true;
 977     }
 978   }
 979   return false;
 980 }
 981 
 982 //------------------------------insert_pre_post_loops--------------------------
 983 // Insert pre and post loops.  If peel_only is set, the pre-loop can not have
 984 // more iterations added.  It acts as a 'peel' only, no lower-bound RCE, no
 985 // alignment.  Useful to unroll loops that do no array accesses.
 986 void PhaseIdealLoop::insert_pre_post_loops(IdealLoopTree *loop, Node_List &old_new, bool peel_only) {
 987 
 988 #ifndef PRODUCT
 989   if (TraceLoopOpts) {
 990     if (peel_only)
 991       tty->print("PeelMainPost ");
 992     else
 993       tty->print("PreMainPost  ");
 994     loop->dump_head();
 995   }
 996 #endif
 997   C->set_major_progress();
 998 
 999   // Find common pieces of the loop being guarded with pre & post loops
1000   CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1001   assert(main_head->is_normal_loop(), "");
1002   CountedLoopEndNode *main_end = main_head->loopexit();
1003   guarantee(main_end != NULL, "no loop exit node");
1004   assert(main_end->outcnt() == 2, "1 true, 1 false path only");
1005   uint dd_main_head = dom_depth(main_head);
1006   uint max = main_head->outcnt();
1007 
1008   Node *pre_header= main_head->in(LoopNode::EntryControl);
1009   Node *init      = main_head->init_trip();
1010   Node *incr      = main_end ->incr();
1011   Node *limit     = main_end ->limit();
1012   Node *stride    = main_end ->stride();
1013   Node *cmp       = main_end ->cmp_node();
1014   BoolTest::mask b_test = main_end->test_trip();
1015 
1016   // Need only 1 user of 'bol' because I will be hacking the loop bounds.
1017   Node *bol = main_end->in(CountedLoopEndNode::TestValue);
1018   if (bol->outcnt() != 1) {
1019     bol = bol->clone();
1020     register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl));
1021     _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, bol);
1022   }
1023   // Need only 1 user of 'cmp' because I will be hacking the loop bounds.
1024   if (cmp->outcnt() != 1) {
1025     cmp = cmp->clone();
1026     register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl));
1027     _igvn.replace_input_of(bol, 1, cmp);
1028   }
1029 
1030   // Add the post loop
1031   PostLoopInfo post_loop_info;
1032   insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_loop_info);





































































1033 
1034   // Update local caches for next stanza
1035   Node *main_exit = post_loop_info.new_main_exit;
1036 
1037 
1038   //------------------------------
1039   // Step B: Create Pre-Loop.
1040 
1041   // Step B1: Clone the loop body.  The clone becomes the pre-loop.  The main
1042   // loop pre-header illegally has 2 control users (old & new loops).
1043   clone_loop(loop, old_new, dd_main_head);
1044   CountedLoopNode*    pre_head = old_new[main_head->_idx]->as_CountedLoop();
1045   CountedLoopEndNode* pre_end  = old_new[main_end ->_idx]->as_CountedLoopEnd();
1046   pre_head->set_pre_loop(main_head);
1047   Node *pre_incr = old_new[incr->_idx];
1048 
1049   // Reduce the pre-loop trip count.
1050   pre_end->_prob = PROB_FAIR;
1051 
1052   // Find the pre-loop normal exit.
1053   Node* pre_exit = pre_end->proj_out(false);
1054   assert(pre_exit->Opcode() == Op_IfFalse, "");
1055   IfFalseNode *new_pre_exit = new IfFalseNode(pre_end);
1056   _igvn.register_new_node_with_optimizer(new_pre_exit);
1057   set_idom(new_pre_exit, pre_end, dd_main_head);
1058   set_loop(new_pre_exit, loop->_parent);
1059 
1060   // Step B2: Build a zero-trip guard for the main-loop.  After leaving the
1061   // pre-loop, the main-loop may not execute at all.  Later in life this
1062   // zero-trip guard will become the minimum-trip guard when we unroll
1063   // the main-loop.
1064   Node *min_opaq = new Opaque1Node(C, limit);
1065   Node *min_cmp  = new CmpINode(pre_incr, min_opaq);
1066   Node *min_bol  = new BoolNode(min_cmp, b_test);
1067   register_new_node(min_opaq, new_pre_exit);
1068   register_new_node(min_cmp , new_pre_exit);
1069   register_new_node(min_bol , new_pre_exit);
1070 
1071   // Build the IfNode (assume the main-loop is executed always).
1072   IfNode *min_iff = new IfNode(new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN);
1073   _igvn.register_new_node_with_optimizer(min_iff);
1074   set_idom(min_iff, new_pre_exit, dd_main_head);
1075   set_loop(min_iff, loop->_parent);
1076 
1077   // Plug in the false-path, taken if we need to skip main-loop
1078   _igvn.hash_delete(pre_exit);
1079   pre_exit->set_req(0, min_iff);
1080   set_idom(pre_exit, min_iff, dd_main_head);
1081   set_idom(pre_exit->unique_out(), min_iff, dd_main_head);
1082   // Make the true-path, must enter the main loop
1083   Node *min_taken = new IfTrueNode(min_iff);
1084   _igvn.register_new_node_with_optimizer(min_taken);
1085   set_idom(min_taken, min_iff, dd_main_head);
1086   set_loop(min_taken, loop->_parent);
1087   // Plug in the true path
1088   _igvn.hash_delete(main_head);
1089   main_head->set_req(LoopNode::EntryControl, min_taken);
1090   set_idom(main_head, min_taken, dd_main_head);
1091 
1092   Arena *a = Thread::current()->resource_area();
1093   VectorSet visited(a);
1094   Node_Stack clones(a, main_head->back_control()->outcnt());
1095   // Step B3: Make the fall-in values to the main-loop come from the
1096   // fall-out values of the pre-loop.
1097   for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) {
1098     Node* main_phi = main_head->fast_out(i2);
1099     if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0) {
1100       Node *pre_phi = old_new[main_phi->_idx];
1101       Node *fallpre  = clone_up_backedge_goo(pre_head->back_control(),
1102                                              main_head->init_control(),
1103                                              pre_phi->in(LoopNode::LoopBackControl),
1104                                              visited, clones);
1105       _igvn.hash_delete(main_phi);
1106       main_phi->set_req(LoopNode::EntryControl, fallpre);
1107     }
1108   }
1109 
1110   // Nodes inside the loop may be control dependent on a predicate
1111   // that was moved before the preloop. If the back branch of the main
1112   // or post loops becomes dead, those nodes won't be dependent on the
1113   // test that guards that loop nest anymore which could lead to an
1114   // incorrect array access because it executes independently of the
1115   // test that was guarding the loop nest. We add a special CastII on
1116   // the if branch that enters the loop, between the input induction
1117   // variable value and the induction variable Phi to preserve correct
1118   // dependencies.
1119 




1120   // CastII for the main loop:
1121   bool inserted = cast_incr_before_loop(pre_incr, min_taken, main_head);
1122   assert(inserted, "no castII inserted");
1123 
1124   // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
1125   // RCE and alignment may change this later.
1126   Node *cmp_end = pre_end->cmp_node();
1127   assert(cmp_end->in(2) == limit, "");
1128   Node *pre_limit = new AddINode(init, stride);
1129 
1130   // Save the original loop limit in this Opaque1 node for
1131   // use by range check elimination.
1132   Node *pre_opaq  = new Opaque1Node(C, pre_limit, limit);
1133 
1134   register_new_node(pre_limit, pre_head->in(0));
1135   register_new_node(pre_opaq , pre_head->in(0));
1136 
1137   // Since no other users of pre-loop compare, I can hack limit directly
1138   assert(cmp_end->outcnt() == 1, "no other users");
1139   _igvn.hash_delete(cmp_end);
1140   cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq);
1141 
1142   // Special case for not-equal loop bounds:
1143   // Change pre loop test, main loop test, and the
1144   // main loop guard test to use lt or gt depending on stride
1145   // direction:
1146   // positive stride use <
1147   // negative stride use >
1148   //
1149   // not-equal test is kept for post loop to handle case
1150   // when init > limit when stride > 0 (and reverse).
1151 
1152   if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) {
1153 
1154     BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt;
1155     // Modify pre loop end condition
1156     Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1157     BoolNode* new_bol0 = new BoolNode(pre_bol->in(1), new_test);
1158     register_new_node(new_bol0, pre_head->in(0));
1159     _igvn.replace_input_of(pre_end, CountedLoopEndNode::TestValue, new_bol0);
1160     // Modify main loop guard condition
1161     assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay");
1162     BoolNode* new_bol1 = new BoolNode(min_bol->in(1), new_test);
1163     register_new_node(new_bol1, new_pre_exit);
1164     _igvn.hash_delete(min_iff);
1165     min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1);
1166     // Modify main loop end condition
1167     BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1168     BoolNode* new_bol2 = new BoolNode(main_bol->in(1), new_test);
1169     register_new_node(new_bol2, main_end->in(CountedLoopEndNode::TestControl));
1170     _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, new_bol2);
1171   }
1172 
1173   // Flag main loop
1174   main_head->set_main_loop();
1175   if (peel_only) main_head->set_main_no_pre_loop();
1176 
1177   // Subtract a trip count for the pre-loop.
1178   main_head->set_trip_count(main_head->trip_count() - 1);
1179 
1180   // It's difficult to be precise about the trip-counts
1181   // for the pre/post loops.  They are usually very short,
1182   // so guess that 4 trips is a reasonable value.
1183   post_loop_info.post_head->set_profile_trip_cnt(4.0);
1184   pre_head->set_profile_trip_cnt(4.0);
1185 
1186   // Now force out all loop-invariant dominating tests.  The optimizer
1187   // finds some, but we _know_ they are all useless.
1188   peeled_dom_test_elim(loop,old_new);
1189   loop->record_for_igvn();
1190 }
1191 
1192 //------------------------------insert_vector_post_loop------------------------
1193 // Insert a copy of the atomic unrolled vectorized main loop as a post loop,
1194 // unroll_policy has already informed us that more unrolling is about to happen to
1195 // the main loop.  The resultant post loop will serve as a vectorized drain loop.
1196 void PhaseIdealLoop::insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new) {
1197   if (!loop->_head->is_CountedLoop()) return;
1198 
1199   CountedLoopNode *cl = loop->_head->as_CountedLoop();
1200 
1201   // only process vectorized main loops
1202   if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
1203 


1209   // only process atomic unroll vector loops (not super unrolled after vectorization)
1210   if (cur_unroll != slp_max_unroll_factor) return;
1211 
1212   // we only ever process this one time
1213   if (cl->has_atomic_post_loop()) return;
1214 
1215 #ifndef PRODUCT
1216   if (TraceLoopOpts) {
1217     tty->print("PostVector  ");
1218     loop->dump_head();
1219   }
1220 #endif
1221   C->set_major_progress();
1222 
1223   // Find common pieces of the loop being guarded with pre & post loops
1224   CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1225   CountedLoopEndNode *main_end = main_head->loopexit();
1226   guarantee(main_end != NULL, "no loop exit node");
1227   // diagnostic to show loop end is not properly formed
1228   assert(main_end->outcnt() == 2, "1 true, 1 false path only");


1229 
1230   // mark this loop as processed
1231   main_head->mark_has_atomic_post_loop();
1232 


1233   Node *incr = main_end->incr();
1234   Node *limit = main_end->limit();
1235 
1236   // In this case we throw away the result as we are not using it to connect anything else.
1237   PostLoopInfo post_loop_info;
1238   insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_loop_info);
1239 
1240   // It's difficult to be precise about the trip-counts
1241   // for post loops.  They are usually very short,
1242   // so guess that unit vector trips is a reasonable value.
1243   post_loop_info.post_head->set_profile_trip_cnt(cur_unroll);
1244 
1245   // Now force out all loop-invariant dominating tests.  The optimizer
1246   // finds some, but we _know_ they are all useless.
1247   peeled_dom_test_elim(loop, old_new);
1248   loop->record_for_igvn();
1249 }
1250 
1251 
1252 //-------------------------insert_scalar_rced_post_loop------------------------
1253 // Insert a copy of the rce'd main loop as a post loop,
1254 // We have not unrolled the main loop, so this is the right time to inject this.
1255 // Later we will examine the partner of this post loop pair which still has range checks
1256 // to see inject code which tests at runtime if the range checks are applicable.
1257 void PhaseIdealLoop::insert_scalar_rced_post_loop(IdealLoopTree *loop, Node_List &old_new) {
1258   if (!loop->_head->is_CountedLoop()) return;
1259 
1260   CountedLoopNode *cl = loop->_head->as_CountedLoop();
1261 
1262   // only process RCE'd main loops
1263   if (cl->loop_has_range_checks() || !cl->is_main_loop()) return;
1264 
1265 #ifndef PRODUCT
1266   if (TraceLoopOpts) {
1267     tty->print("PostScalarRce  ");
1268     loop->dump_head();
1269   }
1270 #endif
1271   C->set_major_progress();
1272 
1273   // Find common pieces of the loop being guarded with pre & post loops
1274   CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1275   CountedLoopEndNode *main_end = main_head->loopexit();
1276   guarantee(main_end != NULL, "no loop exit node");
1277   // diagnostic to show loop end is not properly formed
1278   assert(main_end->outcnt() == 2, "1 true, 1 false path only");
1279 
1280   Node *incr = main_end->incr();
1281   Node *limit = main_end->limit();
1282 
1283   // In this case we throw away the result as we are not using it to connect anything else.
1284   PostLoopInfo post_loop_info;
1285   insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_loop_info);
1286 
1287   // It's difficult to be precise about the trip-counts
1288   // for post loops.  They are usually very short,
1289   // so guess that unit vector trips is a reasonable value.
1290   post_loop_info.post_head->set_profile_trip_cnt(4.0);
1291 
1292   // Now force out all loop-invariant dominating tests.  The optimizer
1293   // finds some, but we _know_ they are all useless.
1294   peeled_dom_test_elim(loop, old_new);
1295   loop->record_for_igvn();
1296 }
1297 
1298 
1299 //------------------------------insert_post_loop-------------------------------
1300 // Insert post loops.  Add a post loop to the given loop passed.
1301 void PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
1302                                       CountedLoopNode *main_head, CountedLoopEndNode *main_end,
1303                                       Node *incr, Node *limit, PostLoopInfo &post_loop_info) {
1304 
1305   //------------------------------
1306   // Step A: Create a new post-Loop.
1307   Node* main_exit = main_end->proj_out(false);
1308   assert(main_exit->Opcode() == Op_IfFalse, "");
1309   int dd_main_exit = dom_depth(main_exit);
1310 
1311   // Step A1: Clone the loop body of main.  The clone becomes the vector post-loop.
1312   // The main loop pre-header illegally has 2 control users (old & new loops).
1313   clone_loop(loop, old_new, dd_main_exit);
1314   assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
1315   CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
1316   post_head->set_normal_loop();
1317   post_head->set_post_loop(main_head);
1318   post_loop_info.post_head = post_head;
1319 
1320   // Reduce the post-loop trip count.
1321   CountedLoopEndNode* post_end = old_new[main_end->_idx]->as_CountedLoopEnd();
1322   post_end->_prob = PROB_FAIR;
1323 
1324   // Build the main-loop normal exit.
1325   IfFalseNode *new_main_exit = new IfFalseNode(main_end);
1326   _igvn.register_new_node_with_optimizer(new_main_exit);
1327   set_idom(new_main_exit, main_end, dd_main_exit);
1328   set_loop(new_main_exit, loop->_parent);
1329 
1330   // Step A2: Build a zero-trip guard for the post-loop.  After leaving the
1331   // main-loop, the post-loop may not execute at all.  We 'opaque' the incr
1332   // (the previous loop trip-counter exit value) because we will be changing
1333   // the exit value (via additional unrolling) so we cannot constant-fold away the zero
1334   // trip guard until all unrolling is done.
1335   Node *zer_opaq = new Opaque1Node(C, incr);
1336   Node *zer_cmp = new CmpINode(zer_opaq, limit);
1337   Node *zer_bol = new BoolNode(zer_cmp, main_end->test_trip());
1338   register_new_node(zer_opaq, new_main_exit);
1339   register_new_node(zer_cmp, new_main_exit);
1340   register_new_node(zer_bol, new_main_exit);
1341 
1342   // Build the IfNode
1343   IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN);
1344   _igvn.register_new_node_with_optimizer(zer_iff);
1345   set_idom(zer_iff, new_main_exit, dd_main_exit);
1346   set_loop(zer_iff, loop->_parent);
1347 
1348   // Plug in the false-path, taken if we need to skip this post-loop
1349   _igvn.replace_input_of(main_exit, 0, zer_iff);
1350   set_idom(main_exit, zer_iff, dd_main_exit);
1351   set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
1352   // Make the true-path, must enter this post loop
1353   Node *zer_taken = new IfTrueNode(zer_iff);
1354   _igvn.register_new_node_with_optimizer(zer_taken);
1355   set_idom(zer_taken, zer_iff, dd_main_exit);
1356   set_loop(zer_taken, loop->_parent);
1357   // Plug in the true path
1358   _igvn.hash_delete(post_head);
1359   post_head->set_req(LoopNode::EntryControl, zer_taken);
1360   set_idom(post_head, zer_taken, dd_main_exit);
1361 
1362   Arena *a = Thread::current()->resource_area();
1363   VectorSet visited(a);
1364   Node_Stack clones(a, main_head->back_control()->outcnt());
1365   // Step A3: Make the fall-in values to the post-loop come from the
1366   // fall-out values of the main-loop.
1367   for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
1368     Node* main_phi = main_head->fast_out(i);
1369     if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0) {
1370       Node *cur_phi = old_new[main_phi->_idx];
1371       Node *fallnew = clone_up_backedge_goo(main_head->back_control(),
1372         post_head->init_control(),
1373         main_phi->in(LoopNode::LoopBackControl),
1374         visited, clones);
1375       _igvn.hash_delete(cur_phi);
1376       cur_phi->set_req(LoopNode::EntryControl, fallnew);
1377     }
1378   }
1379 
1380   // CastII for the new post loop:
1381   bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
1382   assert(inserted, "no castII inserted");
1383 
1384   post_loop_info.new_main_exit = new_main_exit;








1385 }
1386 
1387 
1388 //------------------------------is_invariant-----------------------------
1389 // Return true if n is invariant
1390 bool IdealLoopTree::is_invariant(Node* n) const {
1391   Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n;
1392   if (n_c->is_top()) return false;
1393   return !is_member(_phase->get_loop(n_c));
1394 }
1395 
1396 
1397 //------------------------------do_unroll--------------------------------------
1398 // Unroll the loop body one step - make each trip do 2 iterations.
1399 void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ) {
1400   assert(LoopUnrollLimit, "");
1401   CountedLoopNode *loop_head = loop->_head->as_CountedLoop();
1402   CountedLoopEndNode *loop_end = loop_head->loopexit();
1403   assert(loop_end, "");
1404 #ifndef PRODUCT
1405   if (PrintOpto && VerifyLoopOptimizations) {
1406     tty->print("Unrolling ");
1407     loop->dump_head();


2064         Node *ctrl_off = get_ctrl(exp->in(2));
2065         Node* offset = new SubINode(zero, exp->in(2));
2066         register_new_node(offset, ctrl_off);
2067         *p_offset = offset;
2068       }
2069       return true;
2070     }
2071     if (is_scaled_iv(exp->in(2), iv, p_scale)) {
2072       if (p_offset != NULL) {
2073         *p_scale *= -1;
2074         *p_offset = exp->in(1);
2075       }
2076       return true;
2077     }
2078   }
2079   return false;
2080 }
2081 
2082 //------------------------------do_range_check---------------------------------
2083 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
2084 void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &range_check_list) {
2085 #ifndef PRODUCT
2086   if (PrintOpto && VerifyLoopOptimizations) {
2087     tty->print("Range Check Elimination ");
2088     loop->dump_head();
2089   } else if (TraceLoopOpts) {
2090     tty->print("RangeCheck   ");
2091     loop->dump_head();
2092   }
2093 #endif
2094   assert(RangeCheckElimination, "");
2095   CountedLoopNode *cl = loop->_head->as_CountedLoop();
2096   assert(cl->is_main_loop(), "");
2097 
2098   // protect against stride not being a constant
2099   if (!cl->stride_is_con())
2100     return;
2101 
2102   // Find the trip counter; we are iteration splitting based on it
2103   Node *trip_counter = cl->phi();
2104   // Find the main loop limit; we will trim it's iterations


2108   // Need to find the main-loop zero-trip guard
2109   Node *ctrl  = cl->in(LoopNode::EntryControl);
2110   assert(ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "");
2111   Node *iffm = ctrl->in(0);
2112   assert(iffm->Opcode() == Op_If, "");
2113   Node *bolzm = iffm->in(1);
2114   assert(bolzm->Opcode() == Op_Bool, "");
2115   Node *cmpzm = bolzm->in(1);
2116   assert(cmpzm->is_Cmp(), "");
2117   Node *opqzm = cmpzm->in(2);
2118   // Can not optimize a loop if zero-trip Opaque1 node is optimized
2119   // away and then another round of loop opts attempted.
2120   if (opqzm->Opcode() != Op_Opaque1)
2121     return;
2122   assert(opqzm->in(1) == main_limit, "do not understand situation");
2123 
2124   // Find the pre-loop limit; we will expand its iterations to
2125   // not ever trip low tests.
2126   Node *p_f = iffm->in(0);
2127   // pre loop may have been optimized out
2128   if (p_f->Opcode() != Op_IfFalse)
2129     return;
2130 
2131   CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2132   assert(pre_end->loopnode()->is_pre_loop(), "");
2133   Node *pre_opaq1 = pre_end->limit();
2134   // Occasionally it's possible for a pre-loop Opaque1 node to be
2135   // optimized away and then another round of loop opts attempted.
2136   // We can not optimize this particular loop in that case.
2137   if (pre_opaq1->Opcode() != Op_Opaque1)
2138     return;
2139   Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
2140   Node *pre_limit = pre_opaq->in(1);
2141 
2142   // Where do we put new limit calculations
2143   Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
2144 
2145   // Ensure the original loop limit is available from the
2146   // pre-loop Opaque1 node.
2147   Node *orig_limit = pre_opaq->original_loop_limit();
2148   if (orig_limit == NULL || _igvn.type(orig_limit) == Type::TOP)
2149     return;
2150 


2152 
2153   int stride_con = cl->stride_con();
2154   Node *zero = _igvn.intcon(0);
2155   Node *one  = _igvn.intcon(1);
2156   // Use symmetrical int range [-max_jint,max_jint]
2157   Node *mini = _igvn.intcon(-max_jint);
2158   set_ctrl(zero, C->root());
2159   set_ctrl(one,  C->root());
2160   set_ctrl(mini, C->root());
2161 
2162   // Range checks that do not dominate the loop backedge (ie.
2163   // conditionally executed) can lengthen the pre loop limit beyond
2164   // the original loop limit. To prevent this, the pre limit is
2165   // (for stride > 0) MINed with the original loop limit (MAXed
2166   // stride < 0) when some range_check (rc) is conditionally
2167   // executed.
2168   bool conditional_rc = false;
2169 
2170   // Check loop body for tests of trip-counter plus loop-invariant vs
2171   // loop-invariant.
2172   for (uint i = 0; i < loop->_body.size(); i++) {
2173     Node *iff = loop->_body[i];
2174     if (iff->Opcode() == Op_If ||
2175         iff->Opcode() == Op_RangeCheck) { // Test?
2176       // Test is an IfNode, has 2 projections.  If BOTH are in the loop
2177       // we need loop unswitching instead of iteration splitting.
2178       range_check_list.push(iff);
2179       Node *exit = loop->is_loop_exit(iff);
2180       if (!exit) continue;
2181       int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0;
2182 
2183       // Get boolean condition to test
2184       Node *i1 = iff->in(1);
2185       if (!i1->is_Bool()) continue;
2186       BoolNode *bol = i1->as_Bool();
2187       BoolTest b_test = bol->_test;
2188       // Flip sense of test if exit condition is flipped
2189       if (flip)
2190         b_test = b_test.negate();
2191 
2192       // Get compare
2193       Node *cmp = bol->in(1);
2194 
2195       // Look for trip_counter + offset vs limit
2196       Node *rc_exp = cmp->in(1);
2197       Node *limit  = cmp->in(2);
2198       jint scale_con= 1;        // Assume trip counter not scaled
2199 
2200       Node *limit_c = get_ctrl(limit);
2201       if (loop->is_member(get_loop(limit_c))) {
2202         // Compare might have operands swapped; commute them
2203         b_test = b_test.commute();
2204         rc_exp = cmp->in(2);
2205         limit  = cmp->in(1);
2206         limit_c = get_ctrl(limit);
2207         if (loop->is_member(get_loop(limit_c)))
2208           continue;             // Both inputs are loop varying; cannot RCE
2209       }
2210       // Here we know 'limit' is loop invariant
2211 
2212       // 'limit' maybe pinned below the zero trip test (probably from a
2213       // previous round of rce), in which case, it can't be used in the
2214       // zero trip test expression which must occur before the zero test's if.
2215       if (limit_c == ctrl)
2216         continue;  // Don't rce this check but continue looking for other candidates.

2217 
2218       // Check for scaled induction variable plus an offset
2219       Node *offset = NULL;
2220 
2221       if (!is_scaled_iv_plus_offset(rc_exp, trip_counter, &scale_con, &offset))
2222         continue;

2223 
2224       Node *offset_c = get_ctrl(offset);
2225       if (loop->is_member( get_loop(offset_c)))
2226         continue;               // Offset is not really loop invariant
2227       // Here we know 'offset' is loop invariant.
2228 
2229       // As above for the 'limit', the 'offset' maybe pinned below the
2230       // zero trip test.
2231       if (offset_c == ctrl)
2232         continue; // Don't rce this check but continue looking for other candidates.
2233 
2234 #ifdef ASSERT
2235       if (TraceRangeLimitCheck) {
2236         tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2237         bol->dump(2);
2238       }
2239 #endif
2240       // At this point we have the expression as:
2241       //   scale_con * trip_counter + offset :: limit
2242       // where scale_con, offset and limit are loop invariant.  Trip_counter
2243       // monotonically increases by stride_con, a constant.  Both (or either)
2244       // stride_con and scale_con can be negative which will flip about the
2245       // sense of the test.
2246 
2247       // Adjust pre and main loop limits to guard the correct iteration set
2248       if (cmp->Opcode() == Op_CmpU) {// Unsigned compare is really 2 tests
2249         if (b_test._test == BoolTest::lt) { // Range checks always use lt
2250           // The underflow and overflow limits: 0 <= scale*I+offset < limit
2251           add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
2252           if (!conditional_rc) {
2253             // (0-offset)/scale could be outside of loop iterations range.
2254             conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
2255           }
2256         } else {
2257           if (PrintOpto) {
2258             tty->print_cr("missed RCE opportunity");
2259           }
2260           continue;             // In release mode, ignore it
2261         }
2262       } else {                  // Otherwise work on normal compares
2263         switch( b_test._test ) {
2264         case BoolTest::gt:
2265           // Fall into GE case
2266         case BoolTest::ge:
2267           // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2268           scale_con = -scale_con;
2269           offset = new SubINode(zero, offset);
2270           register_new_node(offset, pre_ctrl);
2271           limit  = new SubINode(zero, limit);
2272           register_new_node(limit, pre_ctrl);
2273           // Fall into LE case
2274         case BoolTest::le:
2275           if (b_test._test != BoolTest::gt) {
2276             // Convert X <= Y to X < Y+1
2277             limit = new AddINode(limit, one);
2278             register_new_node(limit, pre_ctrl);
2279           }
2280           // Fall into LT case
2281         case BoolTest::lt:
2282           // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
2283           // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
2284           // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
2285           add_constraint(stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit);
2286           if (!conditional_rc) {
2287             // ((MIN_INT+1)-offset)/scale could be outside of loop iterations range.
2288             // Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could
2289             // still be outside of loop range.
2290             conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
2291           }
2292           break;
2293         default:
2294           if (PrintOpto) {
2295             tty->print_cr("missed RCE opportunity");
2296           }
2297           continue;             // Unhandled case
2298         }
2299       }
2300 
2301       // Kill the eliminated test
2302       C->set_major_progress();
2303       Node *kill_con = _igvn.intcon(1-flip);
2304       set_ctrl(kill_con, C->root());
2305       _igvn.replace_input_of(iff, 1, kill_con);
2306       // Find surviving projection
2307       assert(iff->is_If(), "");
2308       ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
2309       // Find loads off the surviving projection; remove their control edge
2310       for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
2311         Node* cd = dp->fast_out(i); // Control-dependent node
2312         if (cd->is_Load() && cd->depends_only_on_test()) {   // Loads can now float around in the loop
2313           // Allow the load to float around in the loop, or before it
2314           // but NOT before the pre-loop.
2315           _igvn.replace_input_of(cd, 0, ctrl); // ctrl, not NULL
2316           --i;
2317           --imax;
2318         }
2319       }
2320       range_check_list.pop();
2321 
2322     } // End of is IF
2323 
2324   }
2325 
2326   // Update loop limits
2327   if (conditional_rc) {
2328     pre_limit = (stride_con > 0) ? (Node*)new MinINode(pre_limit, orig_limit)
2329                                  : (Node*)new MaxINode(pre_limit, orig_limit);
2330     register_new_node(pre_limit, pre_ctrl);
2331   }
2332   _igvn.replace_input_of(pre_opaq, 1, pre_limit);
2333 
2334   // Note:: we are making the main loop limit no longer precise;
2335   // need to round up based on stride.
2336   cl->set_nonexact_trip_count();
2337   if (!LoopLimitCheck && stride_con != 1 && stride_con != -1) { // Cutout for common case
2338     // "Standard" round-up logic:  ([main_limit-init+(y-1)]/y)*y+init
2339     // Hopefully, compiler will optimize for powers of 2.
2340     Node *ctrl = get_ctrl(main_limit);
2341     Node *stride = cl->stride();
2342     Node *init = cl->init_trip()->uncast();
2343     Node *span = new SubINode(main_limit,init);
2344     register_new_node(span,ctrl);
2345     Node *rndup = _igvn.intcon(stride_con + ((stride_con>0)?-1:1));
2346     Node *add = new AddINode(span,rndup);
2347     register_new_node(add,ctrl);
2348     Node *div = new DivINode(0,add,stride);
2349     register_new_node(div,ctrl);
2350     Node *mul = new MulINode(div,stride);
2351     register_new_node(mul,ctrl);
2352     Node *newlim = new AddINode(mul,init);
2353     register_new_node(newlim,ctrl);
2354     main_limit = newlim;
2355   }
2356 
2357   Node *main_cle = cl->loopexit();
2358   Node *main_bol = main_cle->in(1);
2359   // Hacking loop bounds; need private copies of exit test
2360   if (main_bol->outcnt() > 1) {// BoolNode shared?
2361     main_bol = main_bol->clone();// Clone a private BoolNode
2362     register_new_node( main_bol, main_cle->in(0) );
2363     _igvn.replace_input_of(main_cle, 1, main_bol);
2364   }
2365   Node *main_cmp = main_bol->in(1);
2366   if (main_cmp->outcnt() > 1) { // CmpNode shared?
2367     main_cmp = main_cmp->clone();// Clone a private CmpNode
2368     register_new_node(main_cmp, main_cle->in(0));
2369     _igvn.replace_input_of(main_bol, 1, main_cmp);
2370   }
2371   // Hack the now-private loop bounds
2372   _igvn.replace_input_of(main_cmp, 2, main_limit);
2373   // The OpaqueNode is unshared by design
2374   assert( opqzm->outcnt() == 1, "cannot hack shared node" );
2375   _igvn.replace_input_of(opqzm, 1, main_limit);
2376 }
2377 
2378 //------------------------------has_range_checks-------------------------------
2379 // Check to see if RCE cleaned the current loop of range-checks.
2380 void PhaseIdealLoop::has_range_checks(IdealLoopTree *loop) {
2381   assert(RangeCheckElimination, "");
2382   CountedLoopNode *cl = loop->_head->as_CountedLoop();
2383 
2384   // skip this loop if it is already marked
2385   if (cl->loop_has_range_checks()) return;
2386 
2387   if (cl->is_main_loop() || cl->is_post_loop()) {
2388     // Now check for existance of range checks
2389     for (uint i = 0; i < loop->_body.size(); i++) {
2390       Node *iff = loop->_body[i];
2391       if (iff->Opcode() == Op_If ||
2392         iff->Opcode() == Op_RangeCheck) {
2393         cl->mark_has_range_checks();
2394         break;
2395       }
2396     }
2397   }
2398 }
2399 
2400 //-------------------------multi_version_post_loops----------------------------
2401 // Check the range checks that remain, if simple, use the bounds to guard
2402 // which version to a post loop we execute, one with range checks or one without
2403 bool PhaseIdealLoop::multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop) {
2404   bool multi_version_succeeded = false;
2405   assert(RangeCheckElimination, "");
2406   CountedLoopNode *legacy_cl = legacy_loop->_head->as_CountedLoop();
2407   assert(legacy_cl->is_post_loop(), "");
2408 
2409   // Check for existance of range checks using the unique instance to make a guard with
2410   Unique_Node_List worklist;
2411   for (uint i = 0; i < legacy_loop->_body.size(); i++) {
2412     Node *iff = legacy_loop->_body[i];
2413     if (iff->Opcode() == Op_If || iff->Opcode() == Op_RangeCheck) {
2414       worklist.push(iff);
2415     }
2416   }
2417 
2418   // Find RCE'd post loop so that we can stage its guard.
2419   Node* ctrl = legacy_cl->in(LoopNode::EntryControl);
2420   if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return multi_version_succeeded;
2421   Node* iffm = ctrl->in(0);
2422   if (!iffm->is_If()) return multi_version_succeeded;
2423   Node* cur_bool = iffm->in(1);
2424   if (!cur_bool->is_Bool()) return multi_version_succeeded;
2425   Node* cur_cmp = cur_bool->in(1);
2426   if (!cur_cmp->is_Cmp()) return multi_version_succeeded;
2427   Node* cur_opq = cur_cmp->in(1);
2428   // Can not optimize a loop if zero-trip Opaque1 node is optimized away.
2429   if (cur_opq->Opcode() != Op_Opaque1) return multi_version_succeeded;
2430 
2431   // Now we test that both the post loops are connected
2432   Node* post_loop_region = iffm->in(0);
2433   if (post_loop_region == NULL) return multi_version_succeeded;
2434   if (!post_loop_region->is_Region()) return multi_version_succeeded;
2435   Node* covering_region = post_loop_region->in(RegionNode::Control+1);
2436   if (covering_region == NULL) return multi_version_succeeded;
2437   if (!covering_region->is_Region()) return multi_version_succeeded;
2438   Node* p_f = covering_region->in(RegionNode::Control);
2439   if (p_f == NULL) return multi_version_succeeded;
2440   if (!p_f->is_IfFalse()) return multi_version_succeeded;
2441   if (!p_f->in(0)->is_CountedLoopEnd()) return multi_version_succeeded;
2442   CountedLoopEndNode* rce_loop_end = p_f->in(0)->as_CountedLoopEnd();
2443   if (rce_loop_end == NULL) return multi_version_succeeded;
2444   CountedLoopNode* rce_cl = rce_loop_end->loopnode();
2445   if (rce_cl == NULL || !rce_cl->is_post_loop()) return multi_version_succeeded;
2446   CountedLoopNode *known_rce_cl = rce_loop->_head->as_CountedLoop();
2447   if (rce_cl != known_rce_cl) return multi_version_succeeded;
2448 
2449   // Then we fetch the cover entry test
2450   ctrl = rce_cl->in(LoopNode::EntryControl);
2451   if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return multi_version_succeeded;
2452 
2453 #ifndef PRODUCT
2454   if (TraceLoopOpts) {
2455     tty->print("PostMultiVersion\n");
2456     rce_loop->dump_head();
2457     legacy_loop->dump_head();
2458   }
2459 #endif
2460 
2461   // Now fetch the limit we want to compare against
2462   Node *limit = rce_cl->limit();
2463   bool first_time = true;
2464 
2465   // If we got this far, we identified the post loop which has been RCE'd and
2466   // we have a work list.  Now we will try to transform the if guard to cause
2467   // the loop pair to be multi version executed with the determination left to runtime
2468   // or the optimizer if full information is known about the given arrays at compile time.
2469   Node *last_min = NULL;
2470   while (worklist.size()) {
2471     Node* rc_iffm = worklist.pop();
2472     if (rc_iffm->is_If()) {
2473       Node *rc_bolzm = rc_iffm->in(1);
2474       if (rc_bolzm->is_Bool()) {
2475         Node *rc_cmpzm = rc_bolzm->in(1);
2476         if (rc_cmpzm->is_Cmp()) {
2477           Node *rc_left = rc_cmpzm->in(2);
2478           if (first_time) {
2479             last_min = rc_left;
2480             first_time = false;
2481           } else {
2482             Node *cur_min = new MinINode(last_min, rc_left);
2483             last_min = cur_min;
2484             _igvn.register_new_node_with_optimizer(last_min);
2485           }
2486         }
2487       }
2488     }
2489   }
2490 
2491   // All we have to do is update the limit of the rce loop
2492   // with the min of our expression and the current limit.
2493   // We will use this expression to replace the current limit.
2494   if (last_min) {
2495     Node *cur_min = new MinINode(last_min, limit);
2496     _igvn.register_new_node_with_optimizer(cur_min);
2497     Node *cmp_node = rce_loop_end->cmp_node();
2498     _igvn.replace_input_of(cmp_node, 2, cur_min);
2499     set_idom(cmp_node, cur_min, dom_depth(ctrl));
2500     set_ctrl(cur_min, ctrl);
2501     set_loop(cur_min, rce_loop->_parent);
2502 
2503     legacy_cl->mark_is_multiversioned();
2504     rce_cl->mark_is_multiversioned();
2505     multi_version_succeeded = true;
2506 
2507     C->set_major_progress();
2508   }
2509 
2510   return multi_version_succeeded;
2511 }
2512 
2513 //-------------------------poison_rce_post_loop--------------------------------
2514 // Causes the rce'd post loop to be optimized away if multiverioning fails
2515 void PhaseIdealLoop::poison_rce_post_loop(IdealLoopTree *rce_loop) {
2516   CountedLoopNode *rce_cl = rce_loop->_head->as_CountedLoop();
2517   Node* ctrl = rce_cl->in(LoopNode::EntryControl);
2518   if (ctrl->is_IfTrue() || ctrl->is_IfFalse()) {
2519     Node* iffm = ctrl->in(0);
2520     if (iffm->is_If()) {
2521       Node* cur_bool = iffm->in(1);
2522       if (cur_bool->is_Bool()) {
2523         Node* cur_cmp = cur_bool->in(1);
2524         if (cur_cmp->is_Cmp()) {
2525           BoolTest::mask new_test = BoolTest::gt;
2526           BoolNode *new_bool = new BoolNode(cur_cmp, new_test);
2527           _igvn.replace_node(cur_bool, new_bool);
2528           _igvn._worklist.push(new_bool);
2529           Node* left_op = cur_cmp->in(1);
2530           _igvn.replace_input_of(cur_cmp, 2, left_op);
2531           C->set_major_progress();
2532         }
2533       }
2534     }
2535   }
2536 }
2537 
2538 //------------------------------DCE_loop_body----------------------------------
2539 // Remove simplistic dead code from loop body
2540 void IdealLoopTree::DCE_loop_body() {
2541   for( uint i = 0; i < _body.size(); i++ )
2542     if( _body.at(i)->outcnt() == 0 )
2543       _body.map( i--, _body.pop() );
2544 }
2545 
2546 
2547 //------------------------------adjust_loop_exit_prob--------------------------
2548 // Look for loop-exit tests with the 50/50 (or worse) guesses from the parsing stage.
2549 // Replace with a 1-in-10 exit guess.
2550 void IdealLoopTree::adjust_loop_exit_prob( PhaseIdealLoop *phase ) {
2551   Node *test = tail();
2552   while( test != _head ) {
2553     uint top = test->Opcode();
2554     if( top == Op_IfTrue || top == Op_IfFalse ) {
2555       int test_con = ((ProjNode*)test)->_con;
2556       assert(top == (uint)(test_con? Op_IfTrue: Op_IfFalse), "sanity");
2557       IfNode *iff = test->in(0)->as_If();


2875   bool should_align = policy_align(phase);
2876 
2877   // If not RCE'ing (iteration splitting) or Aligning, then we do not
2878   // need a pre-loop.  We may still need to peel an initial iteration but
2879   // we will not be needing an unknown number of pre-iterations.
2880   //
2881   // Basically, if may_rce_align reports FALSE first time through,
2882   // we will not be able to later do RCE or Aligning on this loop.
2883   bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align;
2884 
2885   // If we have any of these conditions (RCE, alignment, unrolling) met, then
2886   // we switch to the pre-/main-/post-loop model.  This model also covers
2887   // peeling.
2888   if (should_rce || should_align || should_unroll) {
2889     if (cl->is_normal_loop())  // Convert to 'pre/main/post' loops
2890       phase->insert_pre_post_loops(this,old_new, !may_rce_align);
2891 
2892     // Adjust the pre- and main-loop limits to let the pre and post loops run
2893     // with full checks, but the main-loop with no checks.  Remove said
2894     // checks from the main body.
2895     if (should_rce) {
2896       Node_List range_check_list;
2897       phase->do_range_check(this, range_check_list);
2898       if (range_check_list.size() > 0) {
2899         cl->mark_has_range_checks();
2900       }
2901     }
2902 
2903     if (should_rce && should_unroll && !should_peel && PostLoopMultiversioning) {
2904       // Try to setup multiversioning on main loops before they are unrolled
2905       if (cl->is_main_loop() && (cl->unrolled_count() == 1))
2906         phase->insert_scalar_rced_post_loop(this, old_new);
2907     }
2908 
2909     // Double loop body for unrolling.  Adjust the minimum-trip test (will do
2910     // twice as many iterations as before) and the main body limit (only do
2911     // an even number of trips).  If we are peeling, we might enable some RCE
2912     // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
2913     // peeling.
2914     if (should_unroll && !should_peel) {
2915       if (SuperWordLoopUnrollAnalysis) {
2916         phase->insert_vector_post_loop(this, old_new);
2917       }
2918       phase->do_unroll(this, old_new, true);
2919     }
2920 
2921     // Adjust the pre-loop limits to align the main body
2922     // iterations.
2923     if (should_align)
2924       Unimplemented();
2925 
2926   } else {                      // Else we have an unchanged counted loop
2927     if (should_peel)           // Might want to peel but do nothing else


< prev index next >