966 }
967
968 bool PhaseIdealLoop::cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop) {
969 Node* castii = new CastIINode(incr, TypeInt::INT, true);
970 castii->set_req(0, ctrl);
971 register_new_node(castii, ctrl);
972 for (DUIterator_Fast imax, i = incr->fast_outs(imax); i < imax; i++) {
973 Node* n = incr->fast_out(i);
974 if (n->is_Phi() && n->in(0) == loop) {
975 int nrep = n->replace_edge(incr, castii);
976 return true;
977 }
978 }
979 return false;
980 }
981
982 //------------------------------insert_pre_post_loops--------------------------
983 // Insert pre and post loops. If peel_only is set, the pre-loop can not have
984 // more iterations added. It acts as a 'peel' only, no lower-bound RCE, no
985 // alignment. Useful to unroll loops that do no array accesses.
986 void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ) {
987
988 #ifndef PRODUCT
989 if (TraceLoopOpts) {
990 if (peel_only)
991 tty->print("PeelMainPost ");
992 else
993 tty->print("PreMainPost ");
994 loop->dump_head();
995 }
996 #endif
997 C->set_major_progress();
998
999 // Find common pieces of the loop being guarded with pre & post loops
1000 CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1001 assert( main_head->is_normal_loop(), "" );
1002 CountedLoopEndNode *main_end = main_head->loopexit();
1003 guarantee(main_end != NULL, "no loop exit node");
1004 assert( main_end->outcnt() == 2, "1 true, 1 false path only" );
1005 uint dd_main_head = dom_depth(main_head);
1006 uint max = main_head->outcnt();
1007
1008 Node *pre_header= main_head->in(LoopNode::EntryControl);
1009 Node *init = main_head->init_trip();
1010 Node *incr = main_end ->incr();
1011 Node *limit = main_end ->limit();
1012 Node *stride = main_end ->stride();
1013 Node *cmp = main_end ->cmp_node();
1014 BoolTest::mask b_test = main_end->test_trip();
1015
1016 // Need only 1 user of 'bol' because I will be hacking the loop bounds.
1017 Node *bol = main_end->in(CountedLoopEndNode::TestValue);
1018 if( bol->outcnt() != 1 ) {
1019 bol = bol->clone();
1020 register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl));
1021 _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, bol);
1022 }
1023 // Need only 1 user of 'cmp' because I will be hacking the loop bounds.
1024 if( cmp->outcnt() != 1 ) {
1025 cmp = cmp->clone();
1026 register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl));
1027 _igvn.replace_input_of(bol, 1, cmp);
1028 }
1029
1030 //------------------------------
1031 // Step A: Create Post-Loop.
1032 Node* main_exit = main_end->proj_out(false);
1033 assert( main_exit->Opcode() == Op_IfFalse, "" );
1034 int dd_main_exit = dom_depth(main_exit);
1035
1036 // Step A1: Clone the loop body. The clone becomes the post-loop. The main
1037 // loop pre-header illegally has 2 control users (old & new loops).
1038 clone_loop( loop, old_new, dd_main_exit );
1039 assert( old_new[main_end ->_idx]->Opcode() == Op_CountedLoopEnd, "" );
1040 CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
1041 post_head->set_post_loop(main_head);
1042
1043 // Reduce the post-loop trip count.
1044 CountedLoopEndNode* post_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
1045 post_end->_prob = PROB_FAIR;
1046
1047 // Build the main-loop normal exit.
1048 IfFalseNode *new_main_exit = new IfFalseNode(main_end);
1049 _igvn.register_new_node_with_optimizer( new_main_exit );
1050 set_idom(new_main_exit, main_end, dd_main_exit );
1051 set_loop(new_main_exit, loop->_parent);
1052
1053 // Step A2: Build a zero-trip guard for the post-loop. After leaving the
1054 // main-loop, the post-loop may not execute at all. We 'opaque' the incr
1055 // (the main-loop trip-counter exit value) because we will be changing
1056 // the exit value (via unrolling) so we cannot constant-fold away the zero
1057 // trip guard until all unrolling is done.
1058 Node *zer_opaq = new Opaque1Node(C, incr);
1059 Node *zer_cmp = new CmpINode( zer_opaq, limit );
1060 Node *zer_bol = new BoolNode( zer_cmp, b_test );
1061 register_new_node( zer_opaq, new_main_exit );
1062 register_new_node( zer_cmp , new_main_exit );
1063 register_new_node( zer_bol , new_main_exit );
1064
1065 // Build the IfNode
1066 IfNode *zer_iff = new IfNode( new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN );
1067 _igvn.register_new_node_with_optimizer( zer_iff );
1068 set_idom(zer_iff, new_main_exit, dd_main_exit);
1069 set_loop(zer_iff, loop->_parent);
1070
1071 // Plug in the false-path, taken if we need to skip post-loop
1072 _igvn.replace_input_of(main_exit, 0, zer_iff);
1073 set_idom(main_exit, zer_iff, dd_main_exit);
1074 set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
1075 // Make the true-path, must enter the post loop
1076 Node *zer_taken = new IfTrueNode( zer_iff );
1077 _igvn.register_new_node_with_optimizer( zer_taken );
1078 set_idom(zer_taken, zer_iff, dd_main_exit);
1079 set_loop(zer_taken, loop->_parent);
1080 // Plug in the true path
1081 _igvn.hash_delete( post_head );
1082 post_head->set_req(LoopNode::EntryControl, zer_taken);
1083 set_idom(post_head, zer_taken, dd_main_exit);
1084
1085 Arena *a = Thread::current()->resource_area();
1086 VectorSet visited(a);
1087 Node_Stack clones(a, main_head->back_control()->outcnt());
1088 // Step A3: Make the fall-in values to the post-loop come from the
1089 // fall-out values of the main-loop.
1090 for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
1091 Node* main_phi = main_head->fast_out(i);
1092 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0 ) {
1093 Node *post_phi = old_new[main_phi->_idx];
1094 Node *fallmain = clone_up_backedge_goo(main_head->back_control(),
1095 post_head->init_control(),
1096 main_phi->in(LoopNode::LoopBackControl),
1097 visited, clones);
1098 _igvn.hash_delete(post_phi);
1099 post_phi->set_req( LoopNode::EntryControl, fallmain );
1100 }
1101 }
1102
1103 // Update local caches for next stanza
1104 main_exit = new_main_exit;
1105
1106
1107 //------------------------------
1108 // Step B: Create Pre-Loop.
1109
1110 // Step B1: Clone the loop body. The clone becomes the pre-loop. The main
1111 // loop pre-header illegally has 2 control users (old & new loops).
1112 clone_loop( loop, old_new, dd_main_head );
1113 CountedLoopNode* pre_head = old_new[main_head->_idx]->as_CountedLoop();
1114 CountedLoopEndNode* pre_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
1115 pre_head->set_pre_loop(main_head);
1116 Node *pre_incr = old_new[incr->_idx];
1117
1118 // Reduce the pre-loop trip count.
1119 pre_end->_prob = PROB_FAIR;
1120
1121 // Find the pre-loop normal exit.
1122 Node* pre_exit = pre_end->proj_out(false);
1123 assert( pre_exit->Opcode() == Op_IfFalse, "" );
1124 IfFalseNode *new_pre_exit = new IfFalseNode(pre_end);
1125 _igvn.register_new_node_with_optimizer( new_pre_exit );
1126 set_idom(new_pre_exit, pre_end, dd_main_head);
1127 set_loop(new_pre_exit, loop->_parent);
1128
1129 // Step B2: Build a zero-trip guard for the main-loop. After leaving the
1130 // pre-loop, the main-loop may not execute at all. Later in life this
1131 // zero-trip guard will become the minimum-trip guard when we unroll
1132 // the main-loop.
1133 Node *min_opaq = new Opaque1Node(C, limit);
1134 Node *min_cmp = new CmpINode( pre_incr, min_opaq );
1135 Node *min_bol = new BoolNode( min_cmp, b_test );
1136 register_new_node( min_opaq, new_pre_exit );
1137 register_new_node( min_cmp , new_pre_exit );
1138 register_new_node( min_bol , new_pre_exit );
1139
1140 // Build the IfNode (assume the main-loop is executed always).
1141 IfNode *min_iff = new IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
1142 _igvn.register_new_node_with_optimizer( min_iff );
1143 set_idom(min_iff, new_pre_exit, dd_main_head);
1144 set_loop(min_iff, loop->_parent);
1145
1146 // Plug in the false-path, taken if we need to skip main-loop
1147 _igvn.hash_delete( pre_exit );
1148 pre_exit->set_req(0, min_iff);
1149 set_idom(pre_exit, min_iff, dd_main_head);
1150 set_idom(pre_exit->unique_out(), min_iff, dd_main_head);
1151 // Make the true-path, must enter the main loop
1152 Node *min_taken = new IfTrueNode( min_iff );
1153 _igvn.register_new_node_with_optimizer( min_taken );
1154 set_idom(min_taken, min_iff, dd_main_head);
1155 set_loop(min_taken, loop->_parent);
1156 // Plug in the true path
1157 _igvn.hash_delete( main_head );
1158 main_head->set_req(LoopNode::EntryControl, min_taken);
1159 set_idom(main_head, min_taken, dd_main_head);
1160
1161 visited.Clear();
1162 clones.clear();
1163 // Step B3: Make the fall-in values to the main-loop come from the
1164 // fall-out values of the pre-loop.
1165 for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) {
1166 Node* main_phi = main_head->fast_out(i2);
1167 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) {
1168 Node *pre_phi = old_new[main_phi->_idx];
1169 Node *fallpre = clone_up_backedge_goo(pre_head->back_control(),
1170 main_head->init_control(),
1171 pre_phi->in(LoopNode::LoopBackControl),
1172 visited, clones);
1173 _igvn.hash_delete(main_phi);
1174 main_phi->set_req( LoopNode::EntryControl, fallpre );
1175 }
1176 }
1177
1178 // Nodes inside the loop may be control dependent on a predicate
1179 // that was moved before the preloop. If the back branch of the main
1180 // or post loops becomes dead, those nodes won't be dependent on the
1181 // test that guards that loop nest anymore which could lead to an
1182 // incorrect array access because it executes independently of the
1183 // test that was guarding the loop nest. We add a special CastII on
1184 // the if branch that enters the loop, between the input induction
1185 // variable value and the induction variable Phi to preserve correct
1186 // dependencies.
1187
1188 // CastII for the post loop:
1189 bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
1190 assert(inserted, "no castII inserted");
1191
1192 // CastII for the main loop:
1193 inserted = cast_incr_before_loop(pre_incr, min_taken, main_head);
1194 assert(inserted, "no castII inserted");
1195
1196 // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
1197 // RCE and alignment may change this later.
1198 Node *cmp_end = pre_end->cmp_node();
1199 assert( cmp_end->in(2) == limit, "" );
1200 Node *pre_limit = new AddINode( init, stride );
1201
1202 // Save the original loop limit in this Opaque1 node for
1203 // use by range check elimination.
1204 Node *pre_opaq = new Opaque1Node(C, pre_limit, limit);
1205
1206 register_new_node( pre_limit, pre_head->in(0) );
1207 register_new_node( pre_opaq , pre_head->in(0) );
1208
1209 // Since no other users of pre-loop compare, I can hack limit directly
1210 assert( cmp_end->outcnt() == 1, "no other users" );
1211 _igvn.hash_delete(cmp_end);
1212 cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq);
1213
1214 // Special case for not-equal loop bounds:
1215 // Change pre loop test, main loop test, and the
1216 // main loop guard test to use lt or gt depending on stride
1217 // direction:
1218 // positive stride use <
1219 // negative stride use >
1220 //
1221 // not-equal test is kept for post loop to handle case
1222 // when init > limit when stride > 0 (and reverse).
1223
1224 if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) {
1225
1226 BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt;
1227 // Modify pre loop end condition
1228 Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1229 BoolNode* new_bol0 = new BoolNode(pre_bol->in(1), new_test);
1230 register_new_node( new_bol0, pre_head->in(0) );
1231 _igvn.replace_input_of(pre_end, CountedLoopEndNode::TestValue, new_bol0);
1232 // Modify main loop guard condition
1233 assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay");
1234 BoolNode* new_bol1 = new BoolNode(min_bol->in(1), new_test);
1235 register_new_node( new_bol1, new_pre_exit );
1236 _igvn.hash_delete(min_iff);
1237 min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1);
1238 // Modify main loop end condition
1239 BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1240 BoolNode* new_bol2 = new BoolNode(main_bol->in(1), new_test);
1241 register_new_node( new_bol2, main_end->in(CountedLoopEndNode::TestControl) );
1242 _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, new_bol2);
1243 }
1244
1245 // Flag main loop
1246 main_head->set_main_loop();
1247 if( peel_only ) main_head->set_main_no_pre_loop();
1248
1249 // Subtract a trip count for the pre-loop.
1250 main_head->set_trip_count(main_head->trip_count() - 1);
1251
1252 // It's difficult to be precise about the trip-counts
1253 // for the pre/post loops. They are usually very short,
1254 // so guess that 4 trips is a reasonable value.
1255 post_head->set_profile_trip_cnt(4.0);
1256 pre_head->set_profile_trip_cnt(4.0);
1257
1258 // Now force out all loop-invariant dominating tests. The optimizer
1259 // finds some, but we _know_ they are all useless.
1260 peeled_dom_test_elim(loop,old_new);
1261 loop->record_for_igvn();
1262 }
1263
1264 //------------------------------insert_vector_post_loop------------------------
1265 // Insert a copy of the atomic unrolled vectorized main loop as a post loop,
1266 // unroll_policy has already informed us that more unrolling is about to happen to
1267 // the main loop. The resultant post loop will serve as a vectorized drain loop.
1268 void PhaseIdealLoop::insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new) {
1269 if (!loop->_head->is_CountedLoop()) return;
1270
1271 CountedLoopNode *cl = loop->_head->as_CountedLoop();
1272
1273 // only process vectorized main loops
1274 if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
1275
1281 // only process atomic unroll vector loops (not super unrolled after vectorization)
1282 if (cur_unroll != slp_max_unroll_factor) return;
1283
1284 // we only ever process this one time
1285 if (cl->has_atomic_post_loop()) return;
1286
1287 #ifndef PRODUCT
1288 if (TraceLoopOpts) {
1289 tty->print("PostVector ");
1290 loop->dump_head();
1291 }
1292 #endif
1293 C->set_major_progress();
1294
1295 // Find common pieces of the loop being guarded with pre & post loops
1296 CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1297 CountedLoopEndNode *main_end = main_head->loopexit();
1298 guarantee(main_end != NULL, "no loop exit node");
1299 // diagnostic to show loop end is not properly formed
1300 assert(main_end->outcnt() == 2, "1 true, 1 false path only");
1301 uint dd_main_head = dom_depth(main_head);
1302 uint max = main_head->outcnt();
1303
1304 // mark this loop as processed
1305 main_head->mark_has_atomic_post_loop();
1306
1307 Node *pre_header = main_head->in(LoopNode::EntryControl);
1308 Node *init = main_head->init_trip();
1309 Node *incr = main_end->incr();
1310 Node *limit = main_end->limit();
1311 Node *stride = main_end->stride();
1312 Node *cmp = main_end->cmp_node();
1313 BoolTest::mask b_test = main_end->test_trip();
1314
1315 //------------------------------
1316 // Step A: Create a new post-Loop.
1317 Node* main_exit = main_end->proj_out(false);
1318 assert(main_exit->Opcode() == Op_IfFalse, "");
1319 int dd_main_exit = dom_depth(main_exit);
1320
1321 // Step A1: Clone the loop body of main. The clone becomes the vector post-loop.
1322 // The main loop pre-header illegally has 2 control users (old & new loops).
1323 clone_loop(loop, old_new, dd_main_exit);
1324 assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
1325 CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
1326 post_head->set_normal_loop();
1327 post_head->set_post_loop(main_head);
1328
1329 // Reduce the post-loop trip count.
1330 CountedLoopEndNode* post_end = old_new[main_end->_idx]->as_CountedLoopEnd();
1331 post_end->_prob = PROB_FAIR;
1332
1333 // Build the main-loop normal exit.
1334 IfFalseNode *new_main_exit = new IfFalseNode(main_end);
1335 _igvn.register_new_node_with_optimizer(new_main_exit);
1336 set_idom(new_main_exit, main_end, dd_main_exit);
1337 set_loop(new_main_exit, loop->_parent);
1338
1339 // Step A2: Build a zero-trip guard for the vector post-loop. After leaving the
1340 // main-loop, the vector post-loop may not execute at all. We 'opaque' the incr
1341 // (the vectorized main-loop trip-counter exit value) because we will be changing
1342 // the exit value (via additional unrolling) so we cannot constant-fold away the zero
1343 // trip guard until all unrolling is done.
1344 Node *zer_opaq = new Opaque1Node(C, incr);
1345 Node *zer_cmp = new CmpINode(zer_opaq, limit);
1346 Node *zer_bol = new BoolNode(zer_cmp, b_test);
1347 register_new_node(zer_opaq, new_main_exit);
1348 register_new_node(zer_cmp, new_main_exit);
1349 register_new_node(zer_bol, new_main_exit);
1350
1351 // Build the IfNode
1352 IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN);
1353 _igvn.register_new_node_with_optimizer(zer_iff);
1354 set_idom(zer_iff, new_main_exit, dd_main_exit);
1355 set_loop(zer_iff, loop->_parent);
1356
1357 // Plug in the false-path, taken if we need to skip vector post-loop
1358 _igvn.replace_input_of(main_exit, 0, zer_iff);
1359 set_idom(main_exit, zer_iff, dd_main_exit);
1360 set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
1361 // Make the true-path, must enter the vector post loop
1362 Node *zer_taken = new IfTrueNode(zer_iff);
1363 _igvn.register_new_node_with_optimizer(zer_taken);
1364 set_idom(zer_taken, zer_iff, dd_main_exit);
1365 set_loop(zer_taken, loop->_parent);
1366 // Plug in the true path
1367 _igvn.hash_delete(post_head);
1368 post_head->set_req(LoopNode::EntryControl, zer_taken);
1369 set_idom(post_head, zer_taken, dd_main_exit);
1370
1371 Arena *a = Thread::current()->resource_area();
1372 VectorSet visited(a);
1373 Node_Stack clones(a, main_head->back_control()->outcnt());
1374 // Step A3: Make the fall-in values to the vector post-loop come from the
1375 // fall-out values of the main-loop.
1376 for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
1377 Node* main_phi = main_head->fast_out(i);
1378 if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0) {
1379 Node *cur_phi = old_new[main_phi->_idx];
1380 Node *fallnew = clone_up_backedge_goo(main_head->back_control(),
1381 post_head->init_control(),
1382 main_phi->in(LoopNode::LoopBackControl),
1383 visited, clones);
1384 _igvn.hash_delete(cur_phi);
1385 cur_phi->set_req(LoopNode::EntryControl, fallnew);
1386 }
1387 }
1388
1389 // CastII for the new post loop:
1390 bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
1391 assert(inserted, "no castII inserted");
1392
1393 // It's difficult to be precise about the trip-counts
1394 // for post loops. They are usually very short,
1395 // so guess that unit vector trips is a reasonable value.
1396 post_head->set_profile_trip_cnt((float)slp_max_unroll_factor);
1397
1398 // Now force out all loop-invariant dominating tests. The optimizer
1399 // finds some, but we _know_ they are all useless.
1400 peeled_dom_test_elim(loop, old_new);
1401 loop->record_for_igvn();
1402 }
1403
1404 //------------------------------is_invariant-----------------------------
1405 // Return true if n is invariant
1406 bool IdealLoopTree::is_invariant(Node* n) const {
1407 Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n;
1408 if (n_c->is_top()) return false;
1409 return !is_member(_phase->get_loop(n_c));
1410 }
1411
1412
1413 //------------------------------do_unroll--------------------------------------
1414 // Unroll the loop body one step - make each trip do 2 iterations.
1415 void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ) {
1416 assert(LoopUnrollLimit, "");
1417 CountedLoopNode *loop_head = loop->_head->as_CountedLoop();
1418 CountedLoopEndNode *loop_end = loop_head->loopexit();
1419 assert(loop_end, "");
1420 #ifndef PRODUCT
1421 if (PrintOpto && VerifyLoopOptimizations) {
1422 tty->print("Unrolling ");
1423 loop->dump_head();
2080 Node *ctrl_off = get_ctrl(exp->in(2));
2081 Node* offset = new SubINode(zero, exp->in(2));
2082 register_new_node(offset, ctrl_off);
2083 *p_offset = offset;
2084 }
2085 return true;
2086 }
2087 if (is_scaled_iv(exp->in(2), iv, p_scale)) {
2088 if (p_offset != NULL) {
2089 *p_scale *= -1;
2090 *p_offset = exp->in(1);
2091 }
2092 return true;
2093 }
2094 }
2095 return false;
2096 }
2097
2098 //------------------------------do_range_check---------------------------------
2099 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
2100 void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
2101 #ifndef PRODUCT
2102 if (PrintOpto && VerifyLoopOptimizations) {
2103 tty->print("Range Check Elimination ");
2104 loop->dump_head();
2105 } else if (TraceLoopOpts) {
2106 tty->print("RangeCheck ");
2107 loop->dump_head();
2108 }
2109 #endif
2110 assert(RangeCheckElimination, "");
2111 CountedLoopNode *cl = loop->_head->as_CountedLoop();
2112 assert(cl->is_main_loop(), "");
2113
2114 // protect against stride not being a constant
2115 if (!cl->stride_is_con())
2116 return;
2117
2118 // Find the trip counter; we are iteration splitting based on it
2119 Node *trip_counter = cl->phi();
2120 // Find the main loop limit; we will trim it's iterations
2124 // Need to find the main-loop zero-trip guard
2125 Node *ctrl = cl->in(LoopNode::EntryControl);
2126 assert(ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "");
2127 Node *iffm = ctrl->in(0);
2128 assert(iffm->Opcode() == Op_If, "");
2129 Node *bolzm = iffm->in(1);
2130 assert(bolzm->Opcode() == Op_Bool, "");
2131 Node *cmpzm = bolzm->in(1);
2132 assert(cmpzm->is_Cmp(), "");
2133 Node *opqzm = cmpzm->in(2);
2134 // Can not optimize a loop if zero-trip Opaque1 node is optimized
2135 // away and then another round of loop opts attempted.
2136 if (opqzm->Opcode() != Op_Opaque1)
2137 return;
2138 assert(opqzm->in(1) == main_limit, "do not understand situation");
2139
2140 // Find the pre-loop limit; we will expand its iterations to
2141 // not ever trip low tests.
2142 Node *p_f = iffm->in(0);
2143 // pre loop may have been optimized out
2144 if (p_f->Opcode() != Op_IfFalse) {
2145 return;
2146 }
2147 CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2148 assert(pre_end->loopnode()->is_pre_loop(), "");
2149 Node *pre_opaq1 = pre_end->limit();
2150 // Occasionally it's possible for a pre-loop Opaque1 node to be
2151 // optimized away and then another round of loop opts attempted.
2152 // We can not optimize this particular loop in that case.
2153 if (pre_opaq1->Opcode() != Op_Opaque1)
2154 return;
2155 Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
2156 Node *pre_limit = pre_opaq->in(1);
2157
2158 // Where do we put new limit calculations
2159 Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
2160
2161 // Ensure the original loop limit is available from the
2162 // pre-loop Opaque1 node.
2163 Node *orig_limit = pre_opaq->original_loop_limit();
2164 if (orig_limit == NULL || _igvn.type(orig_limit) == Type::TOP)
2165 return;
2166
2168
2169 int stride_con = cl->stride_con();
2170 Node *zero = _igvn.intcon(0);
2171 Node *one = _igvn.intcon(1);
2172 // Use symmetrical int range [-max_jint,max_jint]
2173 Node *mini = _igvn.intcon(-max_jint);
2174 set_ctrl(zero, C->root());
2175 set_ctrl(one, C->root());
2176 set_ctrl(mini, C->root());
2177
2178 // Range checks that do not dominate the loop backedge (ie.
2179 // conditionally executed) can lengthen the pre loop limit beyond
2180 // the original loop limit. To prevent this, the pre limit is
2181 // (for stride > 0) MINed with the original loop limit (MAXed
2182 // stride < 0) when some range_check (rc) is conditionally
2183 // executed.
2184 bool conditional_rc = false;
2185
2186 // Check loop body for tests of trip-counter plus loop-invariant vs
2187 // loop-invariant.
2188 for( uint i = 0; i < loop->_body.size(); i++ ) {
2189 Node *iff = loop->_body[i];
2190 if (iff->Opcode() == Op_If ||
2191 iff->Opcode() == Op_RangeCheck) { // Test?
2192 // Test is an IfNode, has 2 projections. If BOTH are in the loop
2193 // we need loop unswitching instead of iteration splitting.
2194 Node *exit = loop->is_loop_exit(iff);
2195 if( !exit ) continue;
2196 int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0;
2197
2198 // Get boolean condition to test
2199 Node *i1 = iff->in(1);
2200 if( !i1->is_Bool() ) continue;
2201 BoolNode *bol = i1->as_Bool();
2202 BoolTest b_test = bol->_test;
2203 // Flip sense of test if exit condition is flipped
2204 if( flip )
2205 b_test = b_test.negate();
2206
2207 // Get compare
2208 Node *cmp = bol->in(1);
2209
2210 // Look for trip_counter + offset vs limit
2211 Node *rc_exp = cmp->in(1);
2212 Node *limit = cmp->in(2);
2213 jint scale_con= 1; // Assume trip counter not scaled
2214
2215 Node *limit_c = get_ctrl(limit);
2216 if( loop->is_member(get_loop(limit_c) ) ) {
2217 // Compare might have operands swapped; commute them
2218 b_test = b_test.commute();
2219 rc_exp = cmp->in(2);
2220 limit = cmp->in(1);
2221 limit_c = get_ctrl(limit);
2222 if( loop->is_member(get_loop(limit_c) ) )
2223 continue; // Both inputs are loop varying; cannot RCE
2224 }
2225 // Here we know 'limit' is loop invariant
2226
2227 // 'limit' maybe pinned below the zero trip test (probably from a
2228 // previous round of rce), in which case, it can't be used in the
2229 // zero trip test expression which must occur before the zero test's if.
2230 if( limit_c == ctrl ) {
2231 continue; // Don't rce this check but continue looking for other candidates.
2232 }
2233
2234 // Check for scaled induction variable plus an offset
2235 Node *offset = NULL;
2236
2237 if (!is_scaled_iv_plus_offset(rc_exp, trip_counter, &scale_con, &offset)) {
2238 continue;
2239 }
2240
2241 Node *offset_c = get_ctrl(offset);
2242 if( loop->is_member( get_loop(offset_c) ) )
2243 continue; // Offset is not really loop invariant
2244 // Here we know 'offset' is loop invariant.
2245
2246 // As above for the 'limit', the 'offset' maybe pinned below the
2247 // zero trip test.
2248 if( offset_c == ctrl ) {
2249 continue; // Don't rce this check but continue looking for other candidates.
2250 }
2251 #ifdef ASSERT
2252 if (TraceRangeLimitCheck) {
2253 tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2254 bol->dump(2);
2255 }
2256 #endif
2257 // At this point we have the expression as:
2258 // scale_con * trip_counter + offset :: limit
2259 // where scale_con, offset and limit are loop invariant. Trip_counter
2260 // monotonically increases by stride_con, a constant. Both (or either)
2261 // stride_con and scale_con can be negative which will flip about the
2262 // sense of the test.
2263
2264 // Adjust pre and main loop limits to guard the correct iteration set
2265 if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
2266 if( b_test._test == BoolTest::lt ) { // Range checks always use lt
2267 // The underflow and overflow limits: 0 <= scale*I+offset < limit
2268 add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
2269 if (!conditional_rc) {
2270 // (0-offset)/scale could be outside of loop iterations range.
2271 conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
2272 }
2273 } else {
2274 if (PrintOpto) {
2275 tty->print_cr("missed RCE opportunity");
2276 }
2277 continue; // In release mode, ignore it
2278 }
2279 } else { // Otherwise work on normal compares
2280 switch( b_test._test ) {
2281 case BoolTest::gt:
2282 // Fall into GE case
2283 case BoolTest::ge:
2284 // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2285 scale_con = -scale_con;
2286 offset = new SubINode( zero, offset );
2287 register_new_node( offset, pre_ctrl );
2288 limit = new SubINode( zero, limit );
2289 register_new_node( limit, pre_ctrl );
2290 // Fall into LE case
2291 case BoolTest::le:
2292 if (b_test._test != BoolTest::gt) {
2293 // Convert X <= Y to X < Y+1
2294 limit = new AddINode( limit, one );
2295 register_new_node( limit, pre_ctrl );
2296 }
2297 // Fall into LT case
2298 case BoolTest::lt:
2299 // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
2300 // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
2301 // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
2302 add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit );
2303 if (!conditional_rc) {
2304 // ((MIN_INT+1)-offset)/scale could be outside of loop iterations range.
2305 // Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could
2306 // still be outside of loop range.
2307 conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
2308 }
2309 break;
2310 default:
2311 if (PrintOpto) {
2312 tty->print_cr("missed RCE opportunity");
2313 }
2314 continue; // Unhandled case
2315 }
2316 }
2317
2318 // Kill the eliminated test
2319 C->set_major_progress();
2320 Node *kill_con = _igvn.intcon( 1-flip );
2321 set_ctrl(kill_con, C->root());
2322 _igvn.replace_input_of(iff, 1, kill_con);
2323 // Find surviving projection
2324 assert(iff->is_If(), "");
2325 ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
2326 // Find loads off the surviving projection; remove their control edge
2327 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
2328 Node* cd = dp->fast_out(i); // Control-dependent node
2329 if (cd->is_Load() && cd->depends_only_on_test()) { // Loads can now float around in the loop
2330 // Allow the load to float around in the loop, or before it
2331 // but NOT before the pre-loop.
2332 _igvn.replace_input_of(cd, 0, ctrl); // ctrl, not NULL
2333 --i;
2334 --imax;
2335 }
2336 }
2337
2338 } // End of is IF
2339
2340 }
2341
2342 // Update loop limits
2343 if (conditional_rc) {
2344 pre_limit = (stride_con > 0) ? (Node*)new MinINode(pre_limit, orig_limit)
2345 : (Node*)new MaxINode(pre_limit, orig_limit);
2346 register_new_node(pre_limit, pre_ctrl);
2347 }
2348 _igvn.replace_input_of(pre_opaq, 1, pre_limit);
2349
2350 // Note:: we are making the main loop limit no longer precise;
2351 // need to round up based on stride.
2352 cl->set_nonexact_trip_count();
2353 if (!LoopLimitCheck && stride_con != 1 && stride_con != -1) { // Cutout for common case
2354 // "Standard" round-up logic: ([main_limit-init+(y-1)]/y)*y+init
2355 // Hopefully, compiler will optimize for powers of 2.
2356 Node *ctrl = get_ctrl(main_limit);
2357 Node *stride = cl->stride();
2358 Node *init = cl->init_trip()->uncast();
2359 Node *span = new SubINode(main_limit,init);
2360 register_new_node(span,ctrl);
2361 Node *rndup = _igvn.intcon(stride_con + ((stride_con>0)?-1:1));
2362 Node *add = new AddINode(span,rndup);
2363 register_new_node(add,ctrl);
2364 Node *div = new DivINode(0,add,stride);
2365 register_new_node(div,ctrl);
2366 Node *mul = new MulINode(div,stride);
2367 register_new_node(mul,ctrl);
2368 Node *newlim = new AddINode(mul,init);
2369 register_new_node(newlim,ctrl);
2370 main_limit = newlim;
2371 }
2372
2373 Node *main_cle = cl->loopexit();
2374 Node *main_bol = main_cle->in(1);
2375 // Hacking loop bounds; need private copies of exit test
2376 if( main_bol->outcnt() > 1 ) {// BoolNode shared?
2377 main_bol = main_bol->clone();// Clone a private BoolNode
2378 register_new_node( main_bol, main_cle->in(0) );
2379 _igvn.replace_input_of(main_cle, 1, main_bol);
2380 }
2381 Node *main_cmp = main_bol->in(1);
2382 if( main_cmp->outcnt() > 1 ) { // CmpNode shared?
2383 main_cmp = main_cmp->clone();// Clone a private CmpNode
2384 register_new_node( main_cmp, main_cle->in(0) );
2385 _igvn.replace_input_of(main_bol, 1, main_cmp);
2386 }
2387 // Hack the now-private loop bounds
2388 _igvn.replace_input_of(main_cmp, 2, main_limit);
2389 // The OpaqueNode is unshared by design
2390 assert( opqzm->outcnt() == 1, "cannot hack shared node" );
2391 _igvn.replace_input_of(opqzm, 1, main_limit);
2392 }
2393
2394 //------------------------------DCE_loop_body----------------------------------
2395 // Remove simplistic dead code from loop body
2396 void IdealLoopTree::DCE_loop_body() {
2397 for( uint i = 0; i < _body.size(); i++ )
2398 if( _body.at(i)->outcnt() == 0 )
2399 _body.map( i--, _body.pop() );
2400 }
2401
2402
2403 //------------------------------adjust_loop_exit_prob--------------------------
2404 // Look for loop-exit tests with the 50/50 (or worse) guesses from the parsing stage.
2405 // Replace with a 1-in-10 exit guess.
2406 void IdealLoopTree::adjust_loop_exit_prob( PhaseIdealLoop *phase ) {
2407 Node *test = tail();
2408 while( test != _head ) {
2409 uint top = test->Opcode();
2410 if( top == Op_IfTrue || top == Op_IfFalse ) {
2411 int test_con = ((ProjNode*)test)->_con;
2412 assert(top == (uint)(test_con? Op_IfTrue: Op_IfFalse), "sanity");
2413 IfNode *iff = test->in(0)->as_If();
2731 bool should_align = policy_align(phase);
2732
2733 // If not RCE'ing (iteration splitting) or Aligning, then we do not
2734 // need a pre-loop. We may still need to peel an initial iteration but
2735 // we will not be needing an unknown number of pre-iterations.
2736 //
2737 // Basically, if may_rce_align reports FALSE first time through,
2738 // we will not be able to later do RCE or Aligning on this loop.
2739 bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align;
2740
2741 // If we have any of these conditions (RCE, alignment, unrolling) met, then
2742 // we switch to the pre-/main-/post-loop model. This model also covers
2743 // peeling.
2744 if (should_rce || should_align || should_unroll) {
2745 if (cl->is_normal_loop()) // Convert to 'pre/main/post' loops
2746 phase->insert_pre_post_loops(this,old_new, !may_rce_align);
2747
2748 // Adjust the pre- and main-loop limits to let the pre and post loops run
2749 // with full checks, but the main-loop with no checks. Remove said
2750 // checks from the main body.
2751 if (should_rce)
2752 phase->do_range_check(this,old_new);
2753
2754 // Double loop body for unrolling. Adjust the minimum-trip test (will do
2755 // twice as many iterations as before) and the main body limit (only do
2756 // an even number of trips). If we are peeling, we might enable some RCE
2757 // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
2758 // peeling.
2759 if (should_unroll && !should_peel) {
2760 if (SuperWordLoopUnrollAnalysis) {
2761 phase->insert_vector_post_loop(this, old_new);
2762 }
2763 phase->do_unroll(this, old_new, true);
2764 }
2765
2766 // Adjust the pre-loop limits to align the main body
2767 // iterations.
2768 if (should_align)
2769 Unimplemented();
2770
2771 } else { // Else we have an unchanged counted loop
2772 if (should_peel) // Might want to peel but do nothing else
|
966 }
967
968 bool PhaseIdealLoop::cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop) {
969 Node* castii = new CastIINode(incr, TypeInt::INT, true);
970 castii->set_req(0, ctrl);
971 register_new_node(castii, ctrl);
972 for (DUIterator_Fast imax, i = incr->fast_outs(imax); i < imax; i++) {
973 Node* n = incr->fast_out(i);
974 if (n->is_Phi() && n->in(0) == loop) {
975 int nrep = n->replace_edge(incr, castii);
976 return true;
977 }
978 }
979 return false;
980 }
981
982 //------------------------------insert_pre_post_loops--------------------------
983 // Insert pre and post loops. If peel_only is set, the pre-loop can not have
984 // more iterations added. It acts as a 'peel' only, no lower-bound RCE, no
985 // alignment. Useful to unroll loops that do no array accesses.
986 void PhaseIdealLoop::insert_pre_post_loops(IdealLoopTree *loop, Node_List &old_new, bool peel_only) {
987
988 #ifndef PRODUCT
989 if (TraceLoopOpts) {
990 if (peel_only)
991 tty->print("PeelMainPost ");
992 else
993 tty->print("PreMainPost ");
994 loop->dump_head();
995 }
996 #endif
997 C->set_major_progress();
998
999 // Find common pieces of the loop being guarded with pre & post loops
1000 CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1001 assert(main_head->is_normal_loop(), "");
1002 CountedLoopEndNode *main_end = main_head->loopexit();
1003 guarantee(main_end != NULL, "no loop exit node");
1004 assert(main_end->outcnt() == 2, "1 true, 1 false path only");
1005 uint dd_main_head = dom_depth(main_head);
1006 uint max = main_head->outcnt();
1007
1008 Node *pre_header= main_head->in(LoopNode::EntryControl);
1009 Node *init = main_head->init_trip();
1010 Node *incr = main_end ->incr();
1011 Node *limit = main_end ->limit();
1012 Node *stride = main_end ->stride();
1013 Node *cmp = main_end ->cmp_node();
1014 BoolTest::mask b_test = main_end->test_trip();
1015
1016 // Need only 1 user of 'bol' because I will be hacking the loop bounds.
1017 Node *bol = main_end->in(CountedLoopEndNode::TestValue);
1018 if (bol->outcnt() != 1) {
1019 bol = bol->clone();
1020 register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl));
1021 _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, bol);
1022 }
1023 // Need only 1 user of 'cmp' because I will be hacking the loop bounds.
1024 if (cmp->outcnt() != 1) {
1025 cmp = cmp->clone();
1026 register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl));
1027 _igvn.replace_input_of(bol, 1, cmp);
1028 }
1029
1030 // Add the post loop
1031 PostLoopInfo post_loop_info;
1032 insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_loop_info);
1033
1034 // Update local caches for next stanza
1035 Node *main_exit = post_loop_info.new_main_exit;
1036
1037
1038 //------------------------------
1039 // Step B: Create Pre-Loop.
1040
1041 // Step B1: Clone the loop body. The clone becomes the pre-loop. The main
1042 // loop pre-header illegally has 2 control users (old & new loops).
1043 clone_loop(loop, old_new, dd_main_head);
1044 CountedLoopNode* pre_head = old_new[main_head->_idx]->as_CountedLoop();
1045 CountedLoopEndNode* pre_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
1046 pre_head->set_pre_loop(main_head);
1047 Node *pre_incr = old_new[incr->_idx];
1048
1049 // Reduce the pre-loop trip count.
1050 pre_end->_prob = PROB_FAIR;
1051
1052 // Find the pre-loop normal exit.
1053 Node* pre_exit = pre_end->proj_out(false);
1054 assert(pre_exit->Opcode() == Op_IfFalse, "");
1055 IfFalseNode *new_pre_exit = new IfFalseNode(pre_end);
1056 _igvn.register_new_node_with_optimizer(new_pre_exit);
1057 set_idom(new_pre_exit, pre_end, dd_main_head);
1058 set_loop(new_pre_exit, loop->_parent);
1059
1060 // Step B2: Build a zero-trip guard for the main-loop. After leaving the
1061 // pre-loop, the main-loop may not execute at all. Later in life this
1062 // zero-trip guard will become the minimum-trip guard when we unroll
1063 // the main-loop.
1064 Node *min_opaq = new Opaque1Node(C, limit);
1065 Node *min_cmp = new CmpINode(pre_incr, min_opaq);
1066 Node *min_bol = new BoolNode(min_cmp, b_test);
1067 register_new_node(min_opaq, new_pre_exit);
1068 register_new_node(min_cmp , new_pre_exit);
1069 register_new_node(min_bol , new_pre_exit);
1070
1071 // Build the IfNode (assume the main-loop is executed always).
1072 IfNode *min_iff = new IfNode(new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN);
1073 _igvn.register_new_node_with_optimizer(min_iff);
1074 set_idom(min_iff, new_pre_exit, dd_main_head);
1075 set_loop(min_iff, loop->_parent);
1076
1077 // Plug in the false-path, taken if we need to skip main-loop
1078 _igvn.hash_delete(pre_exit);
1079 pre_exit->set_req(0, min_iff);
1080 set_idom(pre_exit, min_iff, dd_main_head);
1081 set_idom(pre_exit->unique_out(), min_iff, dd_main_head);
1082 // Make the true-path, must enter the main loop
1083 Node *min_taken = new IfTrueNode(min_iff);
1084 _igvn.register_new_node_with_optimizer(min_taken);
1085 set_idom(min_taken, min_iff, dd_main_head);
1086 set_loop(min_taken, loop->_parent);
1087 // Plug in the true path
1088 _igvn.hash_delete(main_head);
1089 main_head->set_req(LoopNode::EntryControl, min_taken);
1090 set_idom(main_head, min_taken, dd_main_head);
1091
1092 Arena *a = Thread::current()->resource_area();
1093 VectorSet visited(a);
1094 Node_Stack clones(a, main_head->back_control()->outcnt());
1095 // Step B3: Make the fall-in values to the main-loop come from the
1096 // fall-out values of the pre-loop.
1097 for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) {
1098 Node* main_phi = main_head->fast_out(i2);
1099 if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0) {
1100 Node *pre_phi = old_new[main_phi->_idx];
1101 Node *fallpre = clone_up_backedge_goo(pre_head->back_control(),
1102 main_head->init_control(),
1103 pre_phi->in(LoopNode::LoopBackControl),
1104 visited, clones);
1105 _igvn.hash_delete(main_phi);
1106 main_phi->set_req(LoopNode::EntryControl, fallpre);
1107 }
1108 }
1109
1110 // Nodes inside the loop may be control dependent on a predicate
1111 // that was moved before the preloop. If the back branch of the main
1112 // or post loops becomes dead, those nodes won't be dependent on the
1113 // test that guards that loop nest anymore which could lead to an
1114 // incorrect array access because it executes independently of the
1115 // test that was guarding the loop nest. We add a special CastII on
1116 // the if branch that enters the loop, between the input induction
1117 // variable value and the induction variable Phi to preserve correct
1118 // dependencies.
1119
1120 // CastII for the main loop:
1121 bool inserted = cast_incr_before_loop(pre_incr, min_taken, main_head);
1122 assert(inserted, "no castII inserted");
1123
1124 // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
1125 // RCE and alignment may change this later.
1126 Node *cmp_end = pre_end->cmp_node();
1127 assert(cmp_end->in(2) == limit, "");
1128 Node *pre_limit = new AddINode(init, stride);
1129
1130 // Save the original loop limit in this Opaque1 node for
1131 // use by range check elimination.
1132 Node *pre_opaq = new Opaque1Node(C, pre_limit, limit);
1133
1134 register_new_node(pre_limit, pre_head->in(0));
1135 register_new_node(pre_opaq , pre_head->in(0));
1136
1137 // Since no other users of pre-loop compare, I can hack limit directly
1138 assert(cmp_end->outcnt() == 1, "no other users");
1139 _igvn.hash_delete(cmp_end);
1140 cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq);
1141
1142 // Special case for not-equal loop bounds:
1143 // Change pre loop test, main loop test, and the
1144 // main loop guard test to use lt or gt depending on stride
1145 // direction:
1146 // positive stride use <
1147 // negative stride use >
1148 //
1149 // not-equal test is kept for post loop to handle case
1150 // when init > limit when stride > 0 (and reverse).
1151
1152 if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) {
1153
1154 BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt;
1155 // Modify pre loop end condition
1156 Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1157 BoolNode* new_bol0 = new BoolNode(pre_bol->in(1), new_test);
1158 register_new_node(new_bol0, pre_head->in(0));
1159 _igvn.replace_input_of(pre_end, CountedLoopEndNode::TestValue, new_bol0);
1160 // Modify main loop guard condition
1161 assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay");
1162 BoolNode* new_bol1 = new BoolNode(min_bol->in(1), new_test);
1163 register_new_node(new_bol1, new_pre_exit);
1164 _igvn.hash_delete(min_iff);
1165 min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1);
1166 // Modify main loop end condition
1167 BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1168 BoolNode* new_bol2 = new BoolNode(main_bol->in(1), new_test);
1169 register_new_node(new_bol2, main_end->in(CountedLoopEndNode::TestControl));
1170 _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, new_bol2);
1171 }
1172
1173 // Flag main loop
1174 main_head->set_main_loop();
1175 if (peel_only) main_head->set_main_no_pre_loop();
1176
1177 // Subtract a trip count for the pre-loop.
1178 main_head->set_trip_count(main_head->trip_count() - 1);
1179
1180 // It's difficult to be precise about the trip-counts
1181 // for the pre/post loops. They are usually very short,
1182 // so guess that 4 trips is a reasonable value.
1183 post_loop_info.post_head->set_profile_trip_cnt(4.0);
1184 pre_head->set_profile_trip_cnt(4.0);
1185
1186 // Now force out all loop-invariant dominating tests. The optimizer
1187 // finds some, but we _know_ they are all useless.
1188 peeled_dom_test_elim(loop,old_new);
1189 loop->record_for_igvn();
1190 }
1191
1192 //------------------------------insert_vector_post_loop------------------------
1193 // Insert a copy of the atomic unrolled vectorized main loop as a post loop,
1194 // unroll_policy has already informed us that more unrolling is about to happen to
1195 // the main loop. The resultant post loop will serve as a vectorized drain loop.
1196 void PhaseIdealLoop::insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new) {
1197 if (!loop->_head->is_CountedLoop()) return;
1198
1199 CountedLoopNode *cl = loop->_head->as_CountedLoop();
1200
1201 // only process vectorized main loops
1202 if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
1203
1209 // only process atomic unroll vector loops (not super unrolled after vectorization)
1210 if (cur_unroll != slp_max_unroll_factor) return;
1211
1212 // we only ever process this one time
1213 if (cl->has_atomic_post_loop()) return;
1214
1215 #ifndef PRODUCT
1216 if (TraceLoopOpts) {
1217 tty->print("PostVector ");
1218 loop->dump_head();
1219 }
1220 #endif
1221 C->set_major_progress();
1222
1223 // Find common pieces of the loop being guarded with pre & post loops
1224 CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1225 CountedLoopEndNode *main_end = main_head->loopexit();
1226 guarantee(main_end != NULL, "no loop exit node");
1227 // diagnostic to show loop end is not properly formed
1228 assert(main_end->outcnt() == 2, "1 true, 1 false path only");
1229
1230 // mark this loop as processed
1231 main_head->mark_has_atomic_post_loop();
1232
1233 Node *incr = main_end->incr();
1234 Node *limit = main_end->limit();
1235
1236 // In this case we throw away the result as we are not using it to connect anything else.
1237 PostLoopInfo post_loop_info;
1238 insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_loop_info);
1239
1240 // It's difficult to be precise about the trip-counts
1241 // for post loops. They are usually very short,
1242 // so guess that unit vector trips is a reasonable value.
1243 post_loop_info.post_head->set_profile_trip_cnt(cur_unroll);
1244
1245 // Now force out all loop-invariant dominating tests. The optimizer
1246 // finds some, but we _know_ they are all useless.
1247 peeled_dom_test_elim(loop, old_new);
1248 loop->record_for_igvn();
1249 }
1250
1251
1252 //-------------------------insert_scalar_rced_post_loop------------------------
1253 // Insert a copy of the rce'd main loop as a post loop,
1254 // We have not unrolled the main loop, so this is the right time to inject this.
1255 // Later we will examine the partner of this post loop pair which still has range checks
1256 // to see inject code which tests at runtime if the range checks are applicable.
1257 void PhaseIdealLoop::insert_scalar_rced_post_loop(IdealLoopTree *loop, Node_List &old_new) {
1258 if (!loop->_head->is_CountedLoop()) return;
1259
1260 CountedLoopNode *cl = loop->_head->as_CountedLoop();
1261
1262 // only process RCE'd main loops
1263 if (cl->loop_has_range_checks() || !cl->is_main_loop()) return;
1264
1265 #ifndef PRODUCT
1266 if (TraceLoopOpts) {
1267 tty->print("PostScalarRce ");
1268 loop->dump_head();
1269 }
1270 #endif
1271 C->set_major_progress();
1272
1273 // Find common pieces of the loop being guarded with pre & post loops
1274 CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1275 CountedLoopEndNode *main_end = main_head->loopexit();
1276 guarantee(main_end != NULL, "no loop exit node");
1277 // diagnostic to show loop end is not properly formed
1278 assert(main_end->outcnt() == 2, "1 true, 1 false path only");
1279
1280 Node *incr = main_end->incr();
1281 Node *limit = main_end->limit();
1282
1283 // In this case we throw away the result as we are not using it to connect anything else.
1284 PostLoopInfo post_loop_info;
1285 insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_loop_info);
1286
1287 // It's difficult to be precise about the trip-counts
1288 // for post loops. They are usually very short,
1289 // so guess that unit vector trips is a reasonable value.
1290 post_loop_info.post_head->set_profile_trip_cnt(4.0);
1291
1292 // Now force out all loop-invariant dominating tests. The optimizer
1293 // finds some, but we _know_ they are all useless.
1294 peeled_dom_test_elim(loop, old_new);
1295 loop->record_for_igvn();
1296 }
1297
1298
1299 //------------------------------insert_post_loop-------------------------------
1300 // Insert post loops. Add a post loop to the given loop passed.
1301 void PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
1302 CountedLoopNode *main_head, CountedLoopEndNode *main_end,
1303 Node *incr, Node *limit, PostLoopInfo &post_loop_info) {
1304
1305 //------------------------------
1306 // Step A: Create a new post-Loop.
1307 Node* main_exit = main_end->proj_out(false);
1308 assert(main_exit->Opcode() == Op_IfFalse, "");
1309 int dd_main_exit = dom_depth(main_exit);
1310
1311 // Step A1: Clone the loop body of main. The clone becomes the vector post-loop.
1312 // The main loop pre-header illegally has 2 control users (old & new loops).
1313 clone_loop(loop, old_new, dd_main_exit);
1314 assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
1315 CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
1316 post_head->set_normal_loop();
1317 post_head->set_post_loop(main_head);
1318 post_loop_info.post_head = post_head;
1319
1320 // Reduce the post-loop trip count.
1321 CountedLoopEndNode* post_end = old_new[main_end->_idx]->as_CountedLoopEnd();
1322 post_end->_prob = PROB_FAIR;
1323
1324 // Build the main-loop normal exit.
1325 IfFalseNode *new_main_exit = new IfFalseNode(main_end);
1326 _igvn.register_new_node_with_optimizer(new_main_exit);
1327 set_idom(new_main_exit, main_end, dd_main_exit);
1328 set_loop(new_main_exit, loop->_parent);
1329
1330 // Step A2: Build a zero-trip guard for the post-loop. After leaving the
1331 // main-loop, the post-loop may not execute at all. We 'opaque' the incr
1332 // (the previous loop trip-counter exit value) because we will be changing
1333 // the exit value (via additional unrolling) so we cannot constant-fold away the zero
1334 // trip guard until all unrolling is done.
1335 Node *zer_opaq = new Opaque1Node(C, incr);
1336 Node *zer_cmp = new CmpINode(zer_opaq, limit);
1337 Node *zer_bol = new BoolNode(zer_cmp, main_end->test_trip());
1338 register_new_node(zer_opaq, new_main_exit);
1339 register_new_node(zer_cmp, new_main_exit);
1340 register_new_node(zer_bol, new_main_exit);
1341
1342 // Build the IfNode
1343 IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN);
1344 _igvn.register_new_node_with_optimizer(zer_iff);
1345 set_idom(zer_iff, new_main_exit, dd_main_exit);
1346 set_loop(zer_iff, loop->_parent);
1347
1348 // Plug in the false-path, taken if we need to skip this post-loop
1349 _igvn.replace_input_of(main_exit, 0, zer_iff);
1350 set_idom(main_exit, zer_iff, dd_main_exit);
1351 set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
1352 // Make the true-path, must enter this post loop
1353 Node *zer_taken = new IfTrueNode(zer_iff);
1354 _igvn.register_new_node_with_optimizer(zer_taken);
1355 set_idom(zer_taken, zer_iff, dd_main_exit);
1356 set_loop(zer_taken, loop->_parent);
1357 // Plug in the true path
1358 _igvn.hash_delete(post_head);
1359 post_head->set_req(LoopNode::EntryControl, zer_taken);
1360 set_idom(post_head, zer_taken, dd_main_exit);
1361
1362 Arena *a = Thread::current()->resource_area();
1363 VectorSet visited(a);
1364 Node_Stack clones(a, main_head->back_control()->outcnt());
1365 // Step A3: Make the fall-in values to the post-loop come from the
1366 // fall-out values of the main-loop.
1367 for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
1368 Node* main_phi = main_head->fast_out(i);
1369 if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0) {
1370 Node *cur_phi = old_new[main_phi->_idx];
1371 Node *fallnew = clone_up_backedge_goo(main_head->back_control(),
1372 post_head->init_control(),
1373 main_phi->in(LoopNode::LoopBackControl),
1374 visited, clones);
1375 _igvn.hash_delete(cur_phi);
1376 cur_phi->set_req(LoopNode::EntryControl, fallnew);
1377 }
1378 }
1379
1380 // CastII for the new post loop:
1381 bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
1382 assert(inserted, "no castII inserted");
1383
1384 post_loop_info.new_main_exit = new_main_exit;
1385 }
1386
1387
1388 //------------------------------is_invariant-----------------------------
1389 // Return true if n is invariant
1390 bool IdealLoopTree::is_invariant(Node* n) const {
1391 Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n;
1392 if (n_c->is_top()) return false;
1393 return !is_member(_phase->get_loop(n_c));
1394 }
1395
1396
1397 //------------------------------do_unroll--------------------------------------
1398 // Unroll the loop body one step - make each trip do 2 iterations.
1399 void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ) {
1400 assert(LoopUnrollLimit, "");
1401 CountedLoopNode *loop_head = loop->_head->as_CountedLoop();
1402 CountedLoopEndNode *loop_end = loop_head->loopexit();
1403 assert(loop_end, "");
1404 #ifndef PRODUCT
1405 if (PrintOpto && VerifyLoopOptimizations) {
1406 tty->print("Unrolling ");
1407 loop->dump_head();
2064 Node *ctrl_off = get_ctrl(exp->in(2));
2065 Node* offset = new SubINode(zero, exp->in(2));
2066 register_new_node(offset, ctrl_off);
2067 *p_offset = offset;
2068 }
2069 return true;
2070 }
2071 if (is_scaled_iv(exp->in(2), iv, p_scale)) {
2072 if (p_offset != NULL) {
2073 *p_scale *= -1;
2074 *p_offset = exp->in(1);
2075 }
2076 return true;
2077 }
2078 }
2079 return false;
2080 }
2081
2082 //------------------------------do_range_check---------------------------------
2083 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
2084 void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &range_check_list) {
2085 #ifndef PRODUCT
2086 if (PrintOpto && VerifyLoopOptimizations) {
2087 tty->print("Range Check Elimination ");
2088 loop->dump_head();
2089 } else if (TraceLoopOpts) {
2090 tty->print("RangeCheck ");
2091 loop->dump_head();
2092 }
2093 #endif
2094 assert(RangeCheckElimination, "");
2095 CountedLoopNode *cl = loop->_head->as_CountedLoop();
2096 assert(cl->is_main_loop(), "");
2097
2098 // protect against stride not being a constant
2099 if (!cl->stride_is_con())
2100 return;
2101
2102 // Find the trip counter; we are iteration splitting based on it
2103 Node *trip_counter = cl->phi();
2104 // Find the main loop limit; we will trim it's iterations
2108 // Need to find the main-loop zero-trip guard
2109 Node *ctrl = cl->in(LoopNode::EntryControl);
2110 assert(ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "");
2111 Node *iffm = ctrl->in(0);
2112 assert(iffm->Opcode() == Op_If, "");
2113 Node *bolzm = iffm->in(1);
2114 assert(bolzm->Opcode() == Op_Bool, "");
2115 Node *cmpzm = bolzm->in(1);
2116 assert(cmpzm->is_Cmp(), "");
2117 Node *opqzm = cmpzm->in(2);
2118 // Can not optimize a loop if zero-trip Opaque1 node is optimized
2119 // away and then another round of loop opts attempted.
2120 if (opqzm->Opcode() != Op_Opaque1)
2121 return;
2122 assert(opqzm->in(1) == main_limit, "do not understand situation");
2123
2124 // Find the pre-loop limit; we will expand its iterations to
2125 // not ever trip low tests.
2126 Node *p_f = iffm->in(0);
2127 // pre loop may have been optimized out
2128 if (p_f->Opcode() != Op_IfFalse)
2129 return;
2130
2131 CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2132 assert(pre_end->loopnode()->is_pre_loop(), "");
2133 Node *pre_opaq1 = pre_end->limit();
2134 // Occasionally it's possible for a pre-loop Opaque1 node to be
2135 // optimized away and then another round of loop opts attempted.
2136 // We can not optimize this particular loop in that case.
2137 if (pre_opaq1->Opcode() != Op_Opaque1)
2138 return;
2139 Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
2140 Node *pre_limit = pre_opaq->in(1);
2141
2142 // Where do we put new limit calculations
2143 Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
2144
2145 // Ensure the original loop limit is available from the
2146 // pre-loop Opaque1 node.
2147 Node *orig_limit = pre_opaq->original_loop_limit();
2148 if (orig_limit == NULL || _igvn.type(orig_limit) == Type::TOP)
2149 return;
2150
2152
2153 int stride_con = cl->stride_con();
2154 Node *zero = _igvn.intcon(0);
2155 Node *one = _igvn.intcon(1);
2156 // Use symmetrical int range [-max_jint,max_jint]
2157 Node *mini = _igvn.intcon(-max_jint);
2158 set_ctrl(zero, C->root());
2159 set_ctrl(one, C->root());
2160 set_ctrl(mini, C->root());
2161
2162 // Range checks that do not dominate the loop backedge (ie.
2163 // conditionally executed) can lengthen the pre loop limit beyond
2164 // the original loop limit. To prevent this, the pre limit is
2165 // (for stride > 0) MINed with the original loop limit (MAXed
2166 // stride < 0) when some range_check (rc) is conditionally
2167 // executed.
2168 bool conditional_rc = false;
2169
2170 // Check loop body for tests of trip-counter plus loop-invariant vs
2171 // loop-invariant.
2172 for (uint i = 0; i < loop->_body.size(); i++) {
2173 Node *iff = loop->_body[i];
2174 if (iff->Opcode() == Op_If ||
2175 iff->Opcode() == Op_RangeCheck) { // Test?
2176 // Test is an IfNode, has 2 projections. If BOTH are in the loop
2177 // we need loop unswitching instead of iteration splitting.
2178 range_check_list.push(iff);
2179 Node *exit = loop->is_loop_exit(iff);
2180 if (!exit) continue;
2181 int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0;
2182
2183 // Get boolean condition to test
2184 Node *i1 = iff->in(1);
2185 if (!i1->is_Bool()) continue;
2186 BoolNode *bol = i1->as_Bool();
2187 BoolTest b_test = bol->_test;
2188 // Flip sense of test if exit condition is flipped
2189 if (flip)
2190 b_test = b_test.negate();
2191
2192 // Get compare
2193 Node *cmp = bol->in(1);
2194
2195 // Look for trip_counter + offset vs limit
2196 Node *rc_exp = cmp->in(1);
2197 Node *limit = cmp->in(2);
2198 jint scale_con= 1; // Assume trip counter not scaled
2199
2200 Node *limit_c = get_ctrl(limit);
2201 if (loop->is_member(get_loop(limit_c))) {
2202 // Compare might have operands swapped; commute them
2203 b_test = b_test.commute();
2204 rc_exp = cmp->in(2);
2205 limit = cmp->in(1);
2206 limit_c = get_ctrl(limit);
2207 if (loop->is_member(get_loop(limit_c)))
2208 continue; // Both inputs are loop varying; cannot RCE
2209 }
2210 // Here we know 'limit' is loop invariant
2211
2212 // 'limit' maybe pinned below the zero trip test (probably from a
2213 // previous round of rce), in which case, it can't be used in the
2214 // zero trip test expression which must occur before the zero test's if.
2215 if (limit_c == ctrl)
2216 continue; // Don't rce this check but continue looking for other candidates.
2217
2218 // Check for scaled induction variable plus an offset
2219 Node *offset = NULL;
2220
2221 if (!is_scaled_iv_plus_offset(rc_exp, trip_counter, &scale_con, &offset))
2222 continue;
2223
2224 Node *offset_c = get_ctrl(offset);
2225 if (loop->is_member( get_loop(offset_c)))
2226 continue; // Offset is not really loop invariant
2227 // Here we know 'offset' is loop invariant.
2228
2229 // As above for the 'limit', the 'offset' maybe pinned below the
2230 // zero trip test.
2231 if (offset_c == ctrl)
2232 continue; // Don't rce this check but continue looking for other candidates.
2233
2234 #ifdef ASSERT
2235 if (TraceRangeLimitCheck) {
2236 tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2237 bol->dump(2);
2238 }
2239 #endif
2240 // At this point we have the expression as:
2241 // scale_con * trip_counter + offset :: limit
2242 // where scale_con, offset and limit are loop invariant. Trip_counter
2243 // monotonically increases by stride_con, a constant. Both (or either)
2244 // stride_con and scale_con can be negative which will flip about the
2245 // sense of the test.
2246
2247 // Adjust pre and main loop limits to guard the correct iteration set
2248 if (cmp->Opcode() == Op_CmpU) {// Unsigned compare is really 2 tests
2249 if (b_test._test == BoolTest::lt) { // Range checks always use lt
2250 // The underflow and overflow limits: 0 <= scale*I+offset < limit
2251 add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
2252 if (!conditional_rc) {
2253 // (0-offset)/scale could be outside of loop iterations range.
2254 conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
2255 }
2256 } else {
2257 if (PrintOpto) {
2258 tty->print_cr("missed RCE opportunity");
2259 }
2260 continue; // In release mode, ignore it
2261 }
2262 } else { // Otherwise work on normal compares
2263 switch( b_test._test ) {
2264 case BoolTest::gt:
2265 // Fall into GE case
2266 case BoolTest::ge:
2267 // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2268 scale_con = -scale_con;
2269 offset = new SubINode(zero, offset);
2270 register_new_node(offset, pre_ctrl);
2271 limit = new SubINode(zero, limit);
2272 register_new_node(limit, pre_ctrl);
2273 // Fall into LE case
2274 case BoolTest::le:
2275 if (b_test._test != BoolTest::gt) {
2276 // Convert X <= Y to X < Y+1
2277 limit = new AddINode(limit, one);
2278 register_new_node(limit, pre_ctrl);
2279 }
2280 // Fall into LT case
2281 case BoolTest::lt:
2282 // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
2283 // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
2284 // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
2285 add_constraint(stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit);
2286 if (!conditional_rc) {
2287 // ((MIN_INT+1)-offset)/scale could be outside of loop iterations range.
2288 // Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could
2289 // still be outside of loop range.
2290 conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
2291 }
2292 break;
2293 default:
2294 if (PrintOpto) {
2295 tty->print_cr("missed RCE opportunity");
2296 }
2297 continue; // Unhandled case
2298 }
2299 }
2300
2301 // Kill the eliminated test
2302 C->set_major_progress();
2303 Node *kill_con = _igvn.intcon(1-flip);
2304 set_ctrl(kill_con, C->root());
2305 _igvn.replace_input_of(iff, 1, kill_con);
2306 // Find surviving projection
2307 assert(iff->is_If(), "");
2308 ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
2309 // Find loads off the surviving projection; remove their control edge
2310 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
2311 Node* cd = dp->fast_out(i); // Control-dependent node
2312 if (cd->is_Load() && cd->depends_only_on_test()) { // Loads can now float around in the loop
2313 // Allow the load to float around in the loop, or before it
2314 // but NOT before the pre-loop.
2315 _igvn.replace_input_of(cd, 0, ctrl); // ctrl, not NULL
2316 --i;
2317 --imax;
2318 }
2319 }
2320 range_check_list.pop();
2321
2322 } // End of is IF
2323
2324 }
2325
2326 // Update loop limits
2327 if (conditional_rc) {
2328 pre_limit = (stride_con > 0) ? (Node*)new MinINode(pre_limit, orig_limit)
2329 : (Node*)new MaxINode(pre_limit, orig_limit);
2330 register_new_node(pre_limit, pre_ctrl);
2331 }
2332 _igvn.replace_input_of(pre_opaq, 1, pre_limit);
2333
2334 // Note:: we are making the main loop limit no longer precise;
2335 // need to round up based on stride.
2336 cl->set_nonexact_trip_count();
2337 if (!LoopLimitCheck && stride_con != 1 && stride_con != -1) { // Cutout for common case
2338 // "Standard" round-up logic: ([main_limit-init+(y-1)]/y)*y+init
2339 // Hopefully, compiler will optimize for powers of 2.
2340 Node *ctrl = get_ctrl(main_limit);
2341 Node *stride = cl->stride();
2342 Node *init = cl->init_trip()->uncast();
2343 Node *span = new SubINode(main_limit,init);
2344 register_new_node(span,ctrl);
2345 Node *rndup = _igvn.intcon(stride_con + ((stride_con>0)?-1:1));
2346 Node *add = new AddINode(span,rndup);
2347 register_new_node(add,ctrl);
2348 Node *div = new DivINode(0,add,stride);
2349 register_new_node(div,ctrl);
2350 Node *mul = new MulINode(div,stride);
2351 register_new_node(mul,ctrl);
2352 Node *newlim = new AddINode(mul,init);
2353 register_new_node(newlim,ctrl);
2354 main_limit = newlim;
2355 }
2356
2357 Node *main_cle = cl->loopexit();
2358 Node *main_bol = main_cle->in(1);
2359 // Hacking loop bounds; need private copies of exit test
2360 if (main_bol->outcnt() > 1) {// BoolNode shared?
2361 main_bol = main_bol->clone();// Clone a private BoolNode
2362 register_new_node( main_bol, main_cle->in(0) );
2363 _igvn.replace_input_of(main_cle, 1, main_bol);
2364 }
2365 Node *main_cmp = main_bol->in(1);
2366 if (main_cmp->outcnt() > 1) { // CmpNode shared?
2367 main_cmp = main_cmp->clone();// Clone a private CmpNode
2368 register_new_node(main_cmp, main_cle->in(0));
2369 _igvn.replace_input_of(main_bol, 1, main_cmp);
2370 }
2371 // Hack the now-private loop bounds
2372 _igvn.replace_input_of(main_cmp, 2, main_limit);
2373 // The OpaqueNode is unshared by design
2374 assert( opqzm->outcnt() == 1, "cannot hack shared node" );
2375 _igvn.replace_input_of(opqzm, 1, main_limit);
2376 }
2377
2378 //------------------------------has_range_checks-------------------------------
2379 // Check to see if RCE cleaned the current loop of range-checks.
2380 void PhaseIdealLoop::has_range_checks(IdealLoopTree *loop) {
2381 assert(RangeCheckElimination, "");
2382 CountedLoopNode *cl = loop->_head->as_CountedLoop();
2383
2384 // skip this loop if it is already marked
2385 if (cl->loop_has_range_checks()) return;
2386
2387 if (cl->is_main_loop() || cl->is_post_loop()) {
2388 // Now check for existance of range checks
2389 for (uint i = 0; i < loop->_body.size(); i++) {
2390 Node *iff = loop->_body[i];
2391 if (iff->Opcode() == Op_If ||
2392 iff->Opcode() == Op_RangeCheck) {
2393 cl->mark_has_range_checks();
2394 break;
2395 }
2396 }
2397 }
2398 }
2399
2400 //-------------------------multi_version_post_loops----------------------------
2401 // Check the range checks that remain, if simple, use the bounds to guard
2402 // which version to a post loop we execute, one with range checks or one without
2403 bool PhaseIdealLoop::multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop) {
2404 bool multi_version_succeeded = false;
2405 assert(RangeCheckElimination, "");
2406 CountedLoopNode *legacy_cl = legacy_loop->_head->as_CountedLoop();
2407 assert(legacy_cl->is_post_loop(), "");
2408
2409 // Check for existance of range checks using the unique instance to make a guard with
2410 Unique_Node_List worklist;
2411 for (uint i = 0; i < legacy_loop->_body.size(); i++) {
2412 Node *iff = legacy_loop->_body[i];
2413 if (iff->Opcode() == Op_If || iff->Opcode() == Op_RangeCheck) {
2414 worklist.push(iff);
2415 }
2416 }
2417
2418 // Find RCE'd post loop so that we can stage its guard.
2419 Node* ctrl = legacy_cl->in(LoopNode::EntryControl);
2420 if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return multi_version_succeeded;
2421 Node* iffm = ctrl->in(0);
2422 if (!iffm->is_If()) return multi_version_succeeded;
2423 Node* cur_bool = iffm->in(1);
2424 if (!cur_bool->is_Bool()) return multi_version_succeeded;
2425 Node* cur_cmp = cur_bool->in(1);
2426 if (!cur_cmp->is_Cmp()) return multi_version_succeeded;
2427 Node* cur_opq = cur_cmp->in(1);
2428 // Can not optimize a loop if zero-trip Opaque1 node is optimized away.
2429 if (cur_opq->Opcode() != Op_Opaque1) return multi_version_succeeded;
2430
2431 // Now we test that both the post loops are connected
2432 Node* post_loop_region = iffm->in(0);
2433 if (post_loop_region == NULL) return multi_version_succeeded;
2434 if (!post_loop_region->is_Region()) return multi_version_succeeded;
2435 Node* covering_region = post_loop_region->in(RegionNode::Control+1);
2436 if (covering_region == NULL) return multi_version_succeeded;
2437 if (!covering_region->is_Region()) return multi_version_succeeded;
2438 Node* p_f = covering_region->in(RegionNode::Control);
2439 if (p_f == NULL) return multi_version_succeeded;
2440 if (!p_f->is_IfFalse()) return multi_version_succeeded;
2441 if (!p_f->in(0)->is_CountedLoopEnd()) return multi_version_succeeded;
2442 CountedLoopEndNode* rce_loop_end = p_f->in(0)->as_CountedLoopEnd();
2443 if (rce_loop_end == NULL) return multi_version_succeeded;
2444 CountedLoopNode* rce_cl = rce_loop_end->loopnode();
2445 if (rce_cl == NULL || !rce_cl->is_post_loop()) return multi_version_succeeded;
2446 CountedLoopNode *known_rce_cl = rce_loop->_head->as_CountedLoop();
2447 if (rce_cl != known_rce_cl) return multi_version_succeeded;
2448
2449 // Then we fetch the cover entry test
2450 ctrl = rce_cl->in(LoopNode::EntryControl);
2451 if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return multi_version_succeeded;
2452
2453 #ifndef PRODUCT
2454 if (TraceLoopOpts) {
2455 tty->print("PostMultiVersion\n");
2456 rce_loop->dump_head();
2457 legacy_loop->dump_head();
2458 }
2459 #endif
2460
2461 // Now fetch the limit we want to compare against
2462 Node *limit = rce_cl->limit();
2463 bool first_time = true;
2464
2465 // If we got this far, we identified the post loop which has been RCE'd and
2466 // we have a work list. Now we will try to transform the if guard to cause
2467 // the loop pair to be multi version executed with the determination left to runtime
2468 // or the optimizer if full information is known about the given arrays at compile time.
2469 Node *last_min = NULL;
2470 while (worklist.size()) {
2471 Node* rc_iffm = worklist.pop();
2472 if (rc_iffm->is_If()) {
2473 Node *rc_bolzm = rc_iffm->in(1);
2474 if (rc_bolzm->is_Bool()) {
2475 Node *rc_cmpzm = rc_bolzm->in(1);
2476 if (rc_cmpzm->is_Cmp()) {
2477 Node *rc_left = rc_cmpzm->in(2);
2478 if (first_time) {
2479 last_min = rc_left;
2480 first_time = false;
2481 } else {
2482 Node *cur_min = new MinINode(last_min, rc_left);
2483 last_min = cur_min;
2484 _igvn.register_new_node_with_optimizer(last_min);
2485 }
2486 }
2487 }
2488 }
2489 }
2490
2491 // All we have to do is update the limit of the rce loop
2492 // with the min of our expression and the current limit.
2493 // We will use this expression to replace the current limit.
2494 if (last_min) {
2495 Node *cur_min = new MinINode(last_min, limit);
2496 _igvn.register_new_node_with_optimizer(cur_min);
2497 Node *cmp_node = rce_loop_end->cmp_node();
2498 _igvn.replace_input_of(cmp_node, 2, cur_min);
2499 set_idom(cmp_node, cur_min, dom_depth(ctrl));
2500 set_ctrl(cur_min, ctrl);
2501 set_loop(cur_min, rce_loop->_parent);
2502
2503 legacy_cl->mark_is_multiversioned();
2504 rce_cl->mark_is_multiversioned();
2505 multi_version_succeeded = true;
2506
2507 C->set_major_progress();
2508 }
2509
2510 return multi_version_succeeded;
2511 }
2512
2513 //-------------------------poison_rce_post_loop--------------------------------
2514 // Causes the rce'd post loop to be optimized away if multiverioning fails
2515 void PhaseIdealLoop::poison_rce_post_loop(IdealLoopTree *rce_loop) {
2516 CountedLoopNode *rce_cl = rce_loop->_head->as_CountedLoop();
2517 Node* ctrl = rce_cl->in(LoopNode::EntryControl);
2518 if (ctrl->is_IfTrue() || ctrl->is_IfFalse()) {
2519 Node* iffm = ctrl->in(0);
2520 if (iffm->is_If()) {
2521 Node* cur_bool = iffm->in(1);
2522 if (cur_bool->is_Bool()) {
2523 Node* cur_cmp = cur_bool->in(1);
2524 if (cur_cmp->is_Cmp()) {
2525 BoolTest::mask new_test = BoolTest::gt;
2526 BoolNode *new_bool = new BoolNode(cur_cmp, new_test);
2527 _igvn.replace_node(cur_bool, new_bool);
2528 _igvn._worklist.push(new_bool);
2529 Node* left_op = cur_cmp->in(1);
2530 _igvn.replace_input_of(cur_cmp, 2, left_op);
2531 C->set_major_progress();
2532 }
2533 }
2534 }
2535 }
2536 }
2537
2538 //------------------------------DCE_loop_body----------------------------------
2539 // Remove simplistic dead code from loop body
2540 void IdealLoopTree::DCE_loop_body() {
2541 for( uint i = 0; i < _body.size(); i++ )
2542 if( _body.at(i)->outcnt() == 0 )
2543 _body.map( i--, _body.pop() );
2544 }
2545
2546
2547 //------------------------------adjust_loop_exit_prob--------------------------
2548 // Look for loop-exit tests with the 50/50 (or worse) guesses from the parsing stage.
2549 // Replace with a 1-in-10 exit guess.
2550 void IdealLoopTree::adjust_loop_exit_prob( PhaseIdealLoop *phase ) {
2551 Node *test = tail();
2552 while( test != _head ) {
2553 uint top = test->Opcode();
2554 if( top == Op_IfTrue || top == Op_IfFalse ) {
2555 int test_con = ((ProjNode*)test)->_con;
2556 assert(top == (uint)(test_con? Op_IfTrue: Op_IfFalse), "sanity");
2557 IfNode *iff = test->in(0)->as_If();
2875 bool should_align = policy_align(phase);
2876
2877 // If not RCE'ing (iteration splitting) or Aligning, then we do not
2878 // need a pre-loop. We may still need to peel an initial iteration but
2879 // we will not be needing an unknown number of pre-iterations.
2880 //
2881 // Basically, if may_rce_align reports FALSE first time through,
2882 // we will not be able to later do RCE or Aligning on this loop.
2883 bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align;
2884
2885 // If we have any of these conditions (RCE, alignment, unrolling) met, then
2886 // we switch to the pre-/main-/post-loop model. This model also covers
2887 // peeling.
2888 if (should_rce || should_align || should_unroll) {
2889 if (cl->is_normal_loop()) // Convert to 'pre/main/post' loops
2890 phase->insert_pre_post_loops(this,old_new, !may_rce_align);
2891
2892 // Adjust the pre- and main-loop limits to let the pre and post loops run
2893 // with full checks, but the main-loop with no checks. Remove said
2894 // checks from the main body.
2895 if (should_rce) {
2896 Node_List range_check_list;
2897 phase->do_range_check(this, range_check_list);
2898 if (range_check_list.size() > 0) {
2899 cl->mark_has_range_checks();
2900 }
2901 }
2902
2903 if (should_rce && should_unroll && !should_peel && PostLoopMultiversioning) {
2904 // Try to setup multiversioning on main loops before they are unrolled
2905 if (cl->is_main_loop() && (cl->unrolled_count() == 1))
2906 phase->insert_scalar_rced_post_loop(this, old_new);
2907 }
2908
2909 // Double loop body for unrolling. Adjust the minimum-trip test (will do
2910 // twice as many iterations as before) and the main body limit (only do
2911 // an even number of trips). If we are peeling, we might enable some RCE
2912 // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
2913 // peeling.
2914 if (should_unroll && !should_peel) {
2915 if (SuperWordLoopUnrollAnalysis) {
2916 phase->insert_vector_post_loop(this, old_new);
2917 }
2918 phase->do_unroll(this, old_new, true);
2919 }
2920
2921 // Adjust the pre-loop limits to align the main body
2922 // iterations.
2923 if (should_align)
2924 Unimplemented();
2925
2926 } else { // Else we have an unchanged counted loop
2927 if (should_peel) // Might want to peel but do nothing else
|