210 Node* inv1 = n1->in(inv1_idx);
211 Node* n2 = n1->in(3 - inv1_idx);
212 int inv2_idx = is_invariant_addition(n2, phase);
213 if (!inv2_idx) return NULL;
214 Node* x = n2->in(3 - inv2_idx);
215 Node* inv2 = n2->in(inv2_idx);
216
217 bool neg_x = n2->is_Sub() && inv2_idx == 1;
218 bool neg_inv2 = n2->is_Sub() && inv2_idx == 2;
219 bool neg_inv1 = n1->is_Sub() && inv1_idx == 2;
220 if (n1->is_Sub() && inv1_idx == 1) {
221 neg_x = !neg_x;
222 neg_inv2 = !neg_inv2;
223 }
224 Node* inv1_c = phase->get_ctrl(inv1);
225 Node* inv2_c = phase->get_ctrl(inv2);
226 Node* n_inv1;
227 if (neg_inv1) {
228 Node *zero = phase->_igvn.intcon(0);
229 phase->set_ctrl(zero, phase->C->root());
230 n_inv1 = new (phase->C) SubINode(zero, inv1);
231 phase->register_new_node(n_inv1, inv1_c);
232 } else {
233 n_inv1 = inv1;
234 }
235 Node* inv;
236 if (neg_inv2) {
237 inv = new (phase->C) SubINode(n_inv1, inv2);
238 } else {
239 inv = new (phase->C) AddINode(n_inv1, inv2);
240 }
241 phase->register_new_node(inv, phase->get_early_ctrl(inv));
242
243 Node* addx;
244 if (neg_x) {
245 addx = new (phase->C) SubINode(inv, x);
246 } else {
247 addx = new (phase->C) AddINode(x, inv);
248 }
249 phase->register_new_node(addx, phase->get_ctrl(x));
250 phase->_igvn.replace_node(n1, addx);
251 assert(phase->get_loop(phase->get_ctrl(n1)) == this, "");
252 _body.yank(n1);
253 return addx;
254 }
255
256 //---------------------reassociate_invariants-----------------------------
257 // Reassociate invariant expressions:
258 void IdealLoopTree::reassociate_invariants(PhaseIdealLoop *phase) {
259 for (int i = _body.size() - 1; i >= 0; i--) {
260 Node *n = _body.at(i);
261 for (int j = 0; j < 5; j++) {
262 Node* nn = reassociate_add_sub(n, phase);
263 if (nn == NULL) break;
264 n = nn; // again
265 };
266 }
267 }
936 }
937
938 //------------------------------
939 // Step A: Create Post-Loop.
940 Node* main_exit = main_end->proj_out(false);
941 assert( main_exit->Opcode() == Op_IfFalse, "" );
942 int dd_main_exit = dom_depth(main_exit);
943
944 // Step A1: Clone the loop body. The clone becomes the post-loop. The main
945 // loop pre-header illegally has 2 control users (old & new loops).
946 clone_loop( loop, old_new, dd_main_exit );
947 assert( old_new[main_end ->_idx]->Opcode() == Op_CountedLoopEnd, "" );
948 CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
949 post_head->set_post_loop(main_head);
950
951 // Reduce the post-loop trip count.
952 CountedLoopEndNode* post_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
953 post_end->_prob = PROB_FAIR;
954
955 // Build the main-loop normal exit.
956 IfFalseNode *new_main_exit = new (C) IfFalseNode(main_end);
957 _igvn.register_new_node_with_optimizer( new_main_exit );
958 set_idom(new_main_exit, main_end, dd_main_exit );
959 set_loop(new_main_exit, loop->_parent);
960
961 // Step A2: Build a zero-trip guard for the post-loop. After leaving the
962 // main-loop, the post-loop may not execute at all. We 'opaque' the incr
963 // (the main-loop trip-counter exit value) because we will be changing
964 // the exit value (via unrolling) so we cannot constant-fold away the zero
965 // trip guard until all unrolling is done.
966 Node *zer_opaq = new (C) Opaque1Node(C, incr);
967 Node *zer_cmp = new (C) CmpINode( zer_opaq, limit );
968 Node *zer_bol = new (C) BoolNode( zer_cmp, b_test );
969 register_new_node( zer_opaq, new_main_exit );
970 register_new_node( zer_cmp , new_main_exit );
971 register_new_node( zer_bol , new_main_exit );
972
973 // Build the IfNode
974 IfNode *zer_iff = new (C) IfNode( new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN );
975 _igvn.register_new_node_with_optimizer( zer_iff );
976 set_idom(zer_iff, new_main_exit, dd_main_exit);
977 set_loop(zer_iff, loop->_parent);
978
979 // Plug in the false-path, taken if we need to skip post-loop
980 _igvn.replace_input_of(main_exit, 0, zer_iff);
981 set_idom(main_exit, zer_iff, dd_main_exit);
982 set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
983 // Make the true-path, must enter the post loop
984 Node *zer_taken = new (C) IfTrueNode( zer_iff );
985 _igvn.register_new_node_with_optimizer( zer_taken );
986 set_idom(zer_taken, zer_iff, dd_main_exit);
987 set_loop(zer_taken, loop->_parent);
988 // Plug in the true path
989 _igvn.hash_delete( post_head );
990 post_head->set_req(LoopNode::EntryControl, zer_taken);
991 set_idom(post_head, zer_taken, dd_main_exit);
992
993 Arena *a = Thread::current()->resource_area();
994 VectorSet visited(a);
995 Node_Stack clones(a, main_head->back_control()->outcnt());
996 // Step A3: Make the fall-in values to the post-loop come from the
997 // fall-out values of the main-loop.
998 for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
999 Node* main_phi = main_head->fast_out(i);
1000 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0 ) {
1001 Node *post_phi = old_new[main_phi->_idx];
1002 Node *fallmain = clone_up_backedge_goo(main_head->back_control(),
1003 post_head->init_control(),
1004 main_phi->in(LoopNode::LoopBackControl),
1012 main_exit = new_main_exit;
1013
1014
1015 //------------------------------
1016 // Step B: Create Pre-Loop.
1017
1018 // Step B1: Clone the loop body. The clone becomes the pre-loop. The main
1019 // loop pre-header illegally has 2 control users (old & new loops).
1020 clone_loop( loop, old_new, dd_main_head );
1021 CountedLoopNode* pre_head = old_new[main_head->_idx]->as_CountedLoop();
1022 CountedLoopEndNode* pre_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
1023 pre_head->set_pre_loop(main_head);
1024 Node *pre_incr = old_new[incr->_idx];
1025
1026 // Reduce the pre-loop trip count.
1027 pre_end->_prob = PROB_FAIR;
1028
1029 // Find the pre-loop normal exit.
1030 Node* pre_exit = pre_end->proj_out(false);
1031 assert( pre_exit->Opcode() == Op_IfFalse, "" );
1032 IfFalseNode *new_pre_exit = new (C) IfFalseNode(pre_end);
1033 _igvn.register_new_node_with_optimizer( new_pre_exit );
1034 set_idom(new_pre_exit, pre_end, dd_main_head);
1035 set_loop(new_pre_exit, loop->_parent);
1036
1037 // Step B2: Build a zero-trip guard for the main-loop. After leaving the
1038 // pre-loop, the main-loop may not execute at all. Later in life this
1039 // zero-trip guard will become the minimum-trip guard when we unroll
1040 // the main-loop.
1041 Node *min_opaq = new (C) Opaque1Node(C, limit);
1042 Node *min_cmp = new (C) CmpINode( pre_incr, min_opaq );
1043 Node *min_bol = new (C) BoolNode( min_cmp, b_test );
1044 register_new_node( min_opaq, new_pre_exit );
1045 register_new_node( min_cmp , new_pre_exit );
1046 register_new_node( min_bol , new_pre_exit );
1047
1048 // Build the IfNode (assume the main-loop is executed always).
1049 IfNode *min_iff = new (C) IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
1050 _igvn.register_new_node_with_optimizer( min_iff );
1051 set_idom(min_iff, new_pre_exit, dd_main_head);
1052 set_loop(min_iff, loop->_parent);
1053
1054 // Plug in the false-path, taken if we need to skip main-loop
1055 _igvn.hash_delete( pre_exit );
1056 pre_exit->set_req(0, min_iff);
1057 set_idom(pre_exit, min_iff, dd_main_head);
1058 set_idom(pre_exit->unique_out(), min_iff, dd_main_head);
1059 // Make the true-path, must enter the main loop
1060 Node *min_taken = new (C) IfTrueNode( min_iff );
1061 _igvn.register_new_node_with_optimizer( min_taken );
1062 set_idom(min_taken, min_iff, dd_main_head);
1063 set_loop(min_taken, loop->_parent);
1064 // Plug in the true path
1065 _igvn.hash_delete( main_head );
1066 main_head->set_req(LoopNode::EntryControl, min_taken);
1067 set_idom(main_head, min_taken, dd_main_head);
1068
1069 visited.Clear();
1070 clones.clear();
1071 // Step B3: Make the fall-in values to the main-loop come from the
1072 // fall-out values of the pre-loop.
1073 for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) {
1074 Node* main_phi = main_head->fast_out(i2);
1075 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) {
1076 Node *pre_phi = old_new[main_phi->_idx];
1077 Node *fallpre = clone_up_backedge_goo(pre_head->back_control(),
1078 main_head->init_control(),
1079 pre_phi->in(LoopNode::LoopBackControl),
1080 visited, clones);
1081 _igvn.hash_delete(main_phi);
1082 main_phi->set_req( LoopNode::EntryControl, fallpre );
1083 }
1084 }
1085
1086 // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
1087 // RCE and alignment may change this later.
1088 Node *cmp_end = pre_end->cmp_node();
1089 assert( cmp_end->in(2) == limit, "" );
1090 Node *pre_limit = new (C) AddINode( init, stride );
1091
1092 // Save the original loop limit in this Opaque1 node for
1093 // use by range check elimination.
1094 Node *pre_opaq = new (C) Opaque1Node(C, pre_limit, limit);
1095
1096 register_new_node( pre_limit, pre_head->in(0) );
1097 register_new_node( pre_opaq , pre_head->in(0) );
1098
1099 // Since no other users of pre-loop compare, I can hack limit directly
1100 assert( cmp_end->outcnt() == 1, "no other users" );
1101 _igvn.hash_delete(cmp_end);
1102 cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq);
1103
1104 // Special case for not-equal loop bounds:
1105 // Change pre loop test, main loop test, and the
1106 // main loop guard test to use lt or gt depending on stride
1107 // direction:
1108 // positive stride use <
1109 // negative stride use >
1110 //
1111 // not-equal test is kept for post loop to handle case
1112 // when init > limit when stride > 0 (and reverse).
1113
1114 if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) {
1115
1116 BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt;
1117 // Modify pre loop end condition
1118 Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1119 BoolNode* new_bol0 = new (C) BoolNode(pre_bol->in(1), new_test);
1120 register_new_node( new_bol0, pre_head->in(0) );
1121 _igvn.hash_delete(pre_end);
1122 pre_end->set_req(CountedLoopEndNode::TestValue, new_bol0);
1123 // Modify main loop guard condition
1124 assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay");
1125 BoolNode* new_bol1 = new (C) BoolNode(min_bol->in(1), new_test);
1126 register_new_node( new_bol1, new_pre_exit );
1127 _igvn.hash_delete(min_iff);
1128 min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1);
1129 // Modify main loop end condition
1130 BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1131 BoolNode* new_bol2 = new (C) BoolNode(main_bol->in(1), new_test);
1132 register_new_node( new_bol2, main_end->in(CountedLoopEndNode::TestControl) );
1133 _igvn.hash_delete(main_end);
1134 main_end->set_req(CountedLoopEndNode::TestValue, new_bol2);
1135 }
1136
1137 // Flag main loop
1138 main_head->set_main_loop();
1139 if( peel_only ) main_head->set_main_no_pre_loop();
1140
1141 // Subtract a trip count for the pre-loop.
1142 main_head->set_trip_count(main_head->trip_count() - 1);
1143
1144 // It's difficult to be precise about the trip-counts
1145 // for the pre/post loops. They are usually very short,
1146 // so guess that 4 trips is a reasonable value.
1147 post_head->set_profile_trip_cnt(4.0);
1148 pre_head->set_profile_trip_cnt(4.0);
1149
1150 // Now force out all loop-invariant dominating tests. The optimizer
1151 // finds some, but we _know_ they are all useless.
1262 const TypeInt* limit_type = _igvn.type(limit)->is_int();
1263 assert(stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) ||
1264 stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo), "sanity");
1265
1266 if (limit->is_Con()) {
1267 // The check in policy_unroll and the assert above guarantee
1268 // no underflow if limit is constant.
1269 new_limit = _igvn.intcon(limit->get_int() - stride_con);
1270 set_ctrl(new_limit, C->root());
1271 } else {
1272 // Limit is not constant.
1273 if (loop_head->unrolled_count() == 1) { // only for first unroll
1274 // Separate limit by Opaque node in case it is an incremented
1275 // variable from previous loop to avoid using pre-incremented
1276 // value which could increase register pressure.
1277 // Otherwise reorg_offsets() optimization will create a separate
1278 // Opaque node for each use of trip-counter and as result
1279 // zero trip guard limit will be different from loop limit.
1280 assert(has_ctrl(opaq), "should have it");
1281 Node* opaq_ctrl = get_ctrl(opaq);
1282 limit = new (C) Opaque2Node( C, limit );
1283 register_new_node( limit, opaq_ctrl );
1284 }
1285 if (stride_con > 0 && ((limit_type->_lo - stride_con) < limit_type->_lo) ||
1286 stride_con < 0 && ((limit_type->_hi - stride_con) > limit_type->_hi)) {
1287 // No underflow.
1288 new_limit = new (C) SubINode(limit, stride);
1289 } else {
1290 // (limit - stride) may underflow.
1291 // Clamp the adjustment value with MININT or MAXINT:
1292 //
1293 // new_limit = limit-stride
1294 // if (stride > 0)
1295 // new_limit = (limit < new_limit) ? MININT : new_limit;
1296 // else
1297 // new_limit = (limit > new_limit) ? MAXINT : new_limit;
1298 //
1299 BoolTest::mask bt = loop_end->test_trip();
1300 assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected");
1301 Node* adj_max = _igvn.intcon((stride_con > 0) ? min_jint : max_jint);
1302 set_ctrl(adj_max, C->root());
1303 Node* old_limit = NULL;
1304 Node* adj_limit = NULL;
1305 Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : NULL;
1306 if (loop_head->unrolled_count() > 1 &&
1307 limit->is_CMove() && limit->Opcode() == Op_CMoveI &&
1308 limit->in(CMoveNode::IfTrue) == adj_max &&
1309 bol->as_Bool()->_test._test == bt &&
1310 bol->in(1)->Opcode() == Op_CmpI &&
1311 bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) {
1312 // Loop was unrolled before.
1313 // Optimize the limit to avoid nested CMove:
1314 // use original limit as old limit.
1315 old_limit = bol->in(1)->in(1);
1316 // Adjust previous adjusted limit.
1317 adj_limit = limit->in(CMoveNode::IfFalse);
1318 adj_limit = new (C) SubINode(adj_limit, stride);
1319 } else {
1320 old_limit = limit;
1321 adj_limit = new (C) SubINode(limit, stride);
1322 }
1323 assert(old_limit != NULL && adj_limit != NULL, "");
1324 register_new_node( adj_limit, ctrl ); // adjust amount
1325 Node* adj_cmp = new (C) CmpINode(old_limit, adj_limit);
1326 register_new_node( adj_cmp, ctrl );
1327 Node* adj_bool = new (C) BoolNode(adj_cmp, bt);
1328 register_new_node( adj_bool, ctrl );
1329 new_limit = new (C) CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT);
1330 }
1331 register_new_node(new_limit, ctrl);
1332 }
1333 assert(new_limit != NULL, "");
1334 // Replace in loop test.
1335 assert(loop_end->in(1)->in(1) == cmp, "sanity");
1336 if (cmp->outcnt() == 1 && loop_end->in(1)->outcnt() == 1) {
1337 // Don't need to create new test since only one user.
1338 _igvn.hash_delete(cmp);
1339 cmp->set_req(2, new_limit);
1340 } else {
1341 // Create new test since it is shared.
1342 Node* ctrl2 = loop_end->in(0);
1343 Node* cmp2 = cmp->clone();
1344 cmp2->set_req(2, new_limit);
1345 register_new_node(cmp2, ctrl2);
1346 Node* bol2 = loop_end->in(1)->clone();
1347 bol2->set_req(1, cmp2);
1348 register_new_node(bol2, ctrl2);
1349 _igvn.hash_delete(loop_end);
1371 loop_head->double_unrolled_count();
1372
1373 } else { // LoopLimitCheck
1374
1375 // Adjust max trip count. The trip count is intentionally rounded
1376 // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
1377 // the main, unrolled, part of the loop will never execute as it is protected
1378 // by the min-trip test. See bug 4834191 for a case where we over-unrolled
1379 // and later determined that part of the unrolled loop was dead.
1380 loop_head->set_trip_count(loop_head->trip_count() / 2);
1381
1382 // Double the count of original iterations in the unrolled loop body.
1383 loop_head->double_unrolled_count();
1384
1385 // -----------
1386 // Step 2: Cut back the trip counter for an unroll amount of 2.
1387 // Loop will normally trip (limit - init)/stride_con. Since it's a
1388 // CountedLoop this is exact (stride divides limit-init exactly).
1389 // We are going to double the loop body, so we want to knock off any
1390 // odd iteration: (trip_cnt & ~1). Then back compute a new limit.
1391 Node *span = new (C) SubINode( limit, init );
1392 register_new_node( span, ctrl );
1393 Node *trip = new (C) DivINode( 0, span, stride );
1394 register_new_node( trip, ctrl );
1395 Node *mtwo = _igvn.intcon(-2);
1396 set_ctrl(mtwo, C->root());
1397 Node *rond = new (C) AndINode( trip, mtwo );
1398 register_new_node( rond, ctrl );
1399 Node *spn2 = new (C) MulINode( rond, stride );
1400 register_new_node( spn2, ctrl );
1401 new_limit = new (C) AddINode( spn2, init );
1402 register_new_node( new_limit, ctrl );
1403
1404 // Hammer in the new limit
1405 Node *ctrl2 = loop_end->in(0);
1406 Node *cmp2 = new (C) CmpINode( loop_head->incr(), new_limit );
1407 register_new_node( cmp2, ctrl2 );
1408 Node *bol2 = new (C) BoolNode( cmp2, loop_end->test_trip() );
1409 register_new_node( bol2, ctrl2 );
1410 _igvn.hash_delete(loop_end);
1411 loop_end->set_req(CountedLoopEndNode::TestValue, bol2);
1412
1413 // Step 3: Find the min-trip test guaranteed before a 'main' loop.
1414 // Make it a 1-trip test (means at least 2 trips).
1415 if( adjust_min_trip ) {
1416 assert( new_limit != NULL, "" );
1417 // Guard test uses an 'opaque' node which is not shared. Hence I
1418 // can edit it's inputs directly. Hammer in the new limit for the
1419 // minimum-trip guard.
1420 assert( opaq->outcnt() == 1, "" );
1421 _igvn.hash_delete(opaq);
1422 opaq->set_req(1, new_limit);
1423 }
1424 } // LoopLimitCheck
1425
1426 // ---------
1427 // Step 4: Clone the loop body. Move it inside the loop. This loop body
1428 // represents the odd iterations; since the loop trips an even number of
1494 // Now its tripping an even number of times remaining. Double loop body.
1495 // Do not adjust pre-guards; they are not needed and do not exist.
1496 if (cl->trip_count() > 0) {
1497 assert((cl->trip_count() & 1) == 0, "missed peeling");
1498 do_unroll(loop, old_new, false);
1499 }
1500 }
1501
1502 //------------------------------dominates_backedge---------------------------------
1503 // Returns true if ctrl is executed on every complete iteration
1504 bool IdealLoopTree::dominates_backedge(Node* ctrl) {
1505 assert(ctrl->is_CFG(), "must be control");
1506 Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl);
1507 return _phase->dom_lca_internal(ctrl, backedge) == ctrl;
1508 }
1509
1510 //------------------------------adjust_limit-----------------------------------
1511 // Helper function for add_constraint().
1512 Node* PhaseIdealLoop::adjust_limit(int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl) {
1513 // Compute "I :: (limit-offset)/scale"
1514 Node *con = new (C) SubINode(rc_limit, offset);
1515 register_new_node(con, pre_ctrl);
1516 Node *X = new (C) DivINode(0, con, scale);
1517 register_new_node(X, pre_ctrl);
1518
1519 // Adjust loop limit
1520 loop_limit = (stride_con > 0)
1521 ? (Node*)(new (C) MinINode(loop_limit, X))
1522 : (Node*)(new (C) MaxINode(loop_limit, X));
1523 register_new_node(loop_limit, pre_ctrl);
1524 return loop_limit;
1525 }
1526
1527 //------------------------------add_constraint---------------------------------
1528 // Constrain the main loop iterations so the conditions:
1529 // low_limit <= scale_con * I + offset < upper_limit
1530 // always holds true. That is, either increase the number of iterations in
1531 // the pre-loop or the post-loop until the condition holds true in the main
1532 // loop. Stride, scale, offset and limit are all loop invariant. Further,
1533 // stride and scale are constants (offset and limit often are).
1534 void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) {
1535 // For positive stride, the pre-loop limit always uses a MAX function
1536 // and the main loop a MIN function. For negative stride these are
1537 // reversed.
1538
1539 // Also for positive stride*scale the affine function is increasing, so the
1540 // pre-loop must check for underflow and the post-loop for overflow.
1541 // Negative stride*scale reverses this; pre-loop checks for overflow and
1542 // post-loop for underflow.
1563 // NOT(scale*I+offset >= low_limit)
1564 // scale*I+offset < low_limit
1565 // ( if (scale > 0) /* and stride > 0 */
1566 // I < (low_limit-offset)/scale
1567 // else /* scale < 0 and stride < 0 */
1568 // I > (low_limit-offset)/scale
1569 // )
1570
1571 if (low_limit->get_int() == -max_jint) {
1572 if (!RangeLimitCheck) return;
1573 // We need this guard when scale*pre_limit+offset >= limit
1574 // due to underflow. So we need execute pre-loop until
1575 // scale*I+offset >= min_int. But (min_int-offset) will
1576 // underflow when offset > 0 and X will be > original_limit
1577 // when stride > 0. To avoid it we replace positive offset with 0.
1578 //
1579 // Also (min_int+1 == -max_int) is used instead of min_int here
1580 // to avoid problem with scale == -1 (min_int/(-1) == min_int).
1581 Node* shift = _igvn.intcon(31);
1582 set_ctrl(shift, C->root());
1583 Node* sign = new (C) RShiftINode(offset, shift);
1584 register_new_node(sign, pre_ctrl);
1585 offset = new (C) AndINode(offset, sign);
1586 register_new_node(offset, pre_ctrl);
1587 } else {
1588 assert(low_limit->get_int() == 0, "wrong low limit for range check");
1589 // The only problem we have here when offset == min_int
1590 // since (0-min_int) == min_int. It may be fine for stride > 0
1591 // but for stride < 0 X will be < original_limit. To avoid it
1592 // max(pre_limit, original_limit) is used in do_range_check().
1593 }
1594 // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
1595 *pre_limit = adjust_limit((-stride_con), scale, offset, low_limit, *pre_limit, pre_ctrl);
1596
1597 } else { // stride_con*scale_con < 0
1598 // For negative stride*scale pre-loop checks for overflow and
1599 // post-loop for underflow.
1600 //
1601 // The overflow limit: scale*I+offset < upper_limit
1602 // For pre-loop compute
1603 // NOT(scale*I+offset < upper_limit)
1604 // scale*I+offset >= upper_limit
1605 // scale*I+offset+1 > upper_limit
1606 // ( if (scale < 0) /* and stride > 0 */
1607 // I < (upper_limit-(offset+1))/scale
1608 // else /* scale > 0 and stride < 0 */
1609 // I > (upper_limit-(offset+1))/scale
1610 // )
1611 //
1612 // (upper_limit-offset-1) may underflow or overflow.
1613 // To avoid it min(pre_limit, original_limit) is used
1614 // in do_range_check() for stride > 0 and max() for < 0.
1615 Node *one = _igvn.intcon(1);
1616 set_ctrl(one, C->root());
1617
1618 Node *plus_one = new (C) AddINode(offset, one);
1619 register_new_node( plus_one, pre_ctrl );
1620 // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
1621 *pre_limit = adjust_limit((-stride_con), scale, plus_one, upper_limit, *pre_limit, pre_ctrl);
1622
1623 if (low_limit->get_int() == -max_jint) {
1624 if (!RangeLimitCheck) return;
1625 // We need this guard when scale*main_limit+offset >= limit
1626 // due to underflow. So we need execute main-loop while
1627 // scale*I+offset+1 > min_int. But (min_int-offset-1) will
1628 // underflow when (offset+1) > 0 and X will be < main_limit
1629 // when scale < 0 (and stride > 0). To avoid it we replace
1630 // positive (offset+1) with 0.
1631 //
1632 // Also (min_int+1 == -max_int) is used instead of min_int here
1633 // to avoid problem with scale == -1 (min_int/(-1) == min_int).
1634 Node* shift = _igvn.intcon(31);
1635 set_ctrl(shift, C->root());
1636 Node* sign = new (C) RShiftINode(plus_one, shift);
1637 register_new_node(sign, pre_ctrl);
1638 plus_one = new (C) AndINode(plus_one, sign);
1639 register_new_node(plus_one, pre_ctrl);
1640 } else {
1641 assert(low_limit->get_int() == 0, "wrong low limit for range check");
1642 // The only problem we have here when offset == max_int
1643 // since (max_int+1) == min_int and (0-min_int) == min_int.
1644 // But it is fine since main loop will either have
1645 // less iterations or will be skipped in such case.
1646 }
1647 // The underflow limit: low_limit <= scale*I+offset.
1648 // For main-loop compute
1649 // scale*I+offset+1 > low_limit
1650 // ( if (scale < 0) /* and stride > 0 */
1651 // I < (low_limit-(offset+1))/scale
1652 // else /* scale > 0 and stride < 0 */
1653 // I > (low_limit-(offset+1))/scale
1654 // )
1655
1656 *main_limit = adjust_limit(stride_con, scale, plus_one, low_limit, *main_limit, pre_ctrl);
1657 }
1658 }
1701 set_ctrl(zero, C->root());
1702 *p_offset = zero;
1703 }
1704 return true;
1705 }
1706 int opc = exp->Opcode();
1707 if (opc == Op_AddI) {
1708 if (is_scaled_iv(exp->in(1), iv, p_scale)) {
1709 if (p_offset != NULL) {
1710 *p_offset = exp->in(2);
1711 }
1712 return true;
1713 }
1714 if (exp->in(2)->is_Con()) {
1715 Node* offset2 = NULL;
1716 if (depth < 2 &&
1717 is_scaled_iv_plus_offset(exp->in(1), iv, p_scale,
1718 p_offset != NULL ? &offset2 : NULL, depth+1)) {
1719 if (p_offset != NULL) {
1720 Node *ctrl_off2 = get_ctrl(offset2);
1721 Node* offset = new (C) AddINode(offset2, exp->in(2));
1722 register_new_node(offset, ctrl_off2);
1723 *p_offset = offset;
1724 }
1725 return true;
1726 }
1727 }
1728 } else if (opc == Op_SubI) {
1729 if (is_scaled_iv(exp->in(1), iv, p_scale)) {
1730 if (p_offset != NULL) {
1731 Node *zero = _igvn.intcon(0);
1732 set_ctrl(zero, C->root());
1733 Node *ctrl_off = get_ctrl(exp->in(2));
1734 Node* offset = new (C) SubINode(zero, exp->in(2));
1735 register_new_node(offset, ctrl_off);
1736 *p_offset = offset;
1737 }
1738 return true;
1739 }
1740 if (is_scaled_iv(exp->in(2), iv, p_scale)) {
1741 if (p_offset != NULL) {
1742 *p_scale *= -1;
1743 *p_offset = exp->in(1);
1744 }
1745 return true;
1746 }
1747 }
1748 return false;
1749 }
1750
1751 //------------------------------do_range_check---------------------------------
1752 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1753 void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
1754 #ifndef PRODUCT
1917 // The underflow and overflow limits: 0 <= scale*I+offset < limit
1918 add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
1919 if (!conditional_rc) {
1920 // (0-offset)/scale could be outside of loop iterations range.
1921 conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
1922 }
1923 } else {
1924 #ifndef PRODUCT
1925 if( PrintOpto )
1926 tty->print_cr("missed RCE opportunity");
1927 #endif
1928 continue; // In release mode, ignore it
1929 }
1930 } else { // Otherwise work on normal compares
1931 switch( b_test._test ) {
1932 case BoolTest::gt:
1933 // Fall into GE case
1934 case BoolTest::ge:
1935 // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
1936 scale_con = -scale_con;
1937 offset = new (C) SubINode( zero, offset );
1938 register_new_node( offset, pre_ctrl );
1939 limit = new (C) SubINode( zero, limit );
1940 register_new_node( limit, pre_ctrl );
1941 // Fall into LE case
1942 case BoolTest::le:
1943 if (b_test._test != BoolTest::gt) {
1944 // Convert X <= Y to X < Y+1
1945 limit = new (C) AddINode( limit, one );
1946 register_new_node( limit, pre_ctrl );
1947 }
1948 // Fall into LT case
1949 case BoolTest::lt:
1950 // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
1951 // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
1952 // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
1953 add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit );
1954 if (!conditional_rc) {
1955 // ((MIN_INT+1)-offset)/scale could be outside of loop iterations range.
1956 // Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could
1957 // still be outside of loop range.
1958 conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
1959 }
1960 break;
1961 default:
1962 #ifndef PRODUCT
1963 if( PrintOpto )
1964 tty->print_cr("missed RCE opportunity");
1965 #endif
1976 assert(iff->is_If(), "");
1977 ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
1978 // Find loads off the surviving projection; remove their control edge
1979 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
1980 Node* cd = dp->fast_out(i); // Control-dependent node
1981 if (cd->is_Load() && cd->depends_only_on_test()) { // Loads can now float around in the loop
1982 // Allow the load to float around in the loop, or before it
1983 // but NOT before the pre-loop.
1984 _igvn.replace_input_of(cd, 0, ctrl); // ctrl, not NULL
1985 --i;
1986 --imax;
1987 }
1988 }
1989
1990 } // End of is IF
1991
1992 }
1993
1994 // Update loop limits
1995 if (conditional_rc) {
1996 pre_limit = (stride_con > 0) ? (Node*)new (C) MinINode(pre_limit, orig_limit)
1997 : (Node*)new (C) MaxINode(pre_limit, orig_limit);
1998 register_new_node(pre_limit, pre_ctrl);
1999 }
2000 _igvn.hash_delete(pre_opaq);
2001 pre_opaq->set_req(1, pre_limit);
2002
2003 // Note:: we are making the main loop limit no longer precise;
2004 // need to round up based on stride.
2005 cl->set_nonexact_trip_count();
2006 if (!LoopLimitCheck && stride_con != 1 && stride_con != -1) { // Cutout for common case
2007 // "Standard" round-up logic: ([main_limit-init+(y-1)]/y)*y+init
2008 // Hopefully, compiler will optimize for powers of 2.
2009 Node *ctrl = get_ctrl(main_limit);
2010 Node *stride = cl->stride();
2011 Node *init = cl->init_trip();
2012 Node *span = new (C) SubINode(main_limit,init);
2013 register_new_node(span,ctrl);
2014 Node *rndup = _igvn.intcon(stride_con + ((stride_con>0)?-1:1));
2015 Node *add = new (C) AddINode(span,rndup);
2016 register_new_node(add,ctrl);
2017 Node *div = new (C) DivINode(0,add,stride);
2018 register_new_node(div,ctrl);
2019 Node *mul = new (C) MulINode(div,stride);
2020 register_new_node(mul,ctrl);
2021 Node *newlim = new (C) AddINode(mul,init);
2022 register_new_node(newlim,ctrl);
2023 main_limit = newlim;
2024 }
2025
2026 Node *main_cle = cl->loopexit();
2027 Node *main_bol = main_cle->in(1);
2028 // Hacking loop bounds; need private copies of exit test
2029 if( main_bol->outcnt() > 1 ) {// BoolNode shared?
2030 _igvn.hash_delete(main_cle);
2031 main_bol = main_bol->clone();// Clone a private BoolNode
2032 register_new_node( main_bol, main_cle->in(0) );
2033 main_cle->set_req(1,main_bol);
2034 }
2035 Node *main_cmp = main_bol->in(1);
2036 if( main_cmp->outcnt() > 1 ) { // CmpNode shared?
2037 _igvn.hash_delete(main_bol);
2038 main_cmp = main_cmp->clone();// Clone a private CmpNode
2039 register_new_node( main_cmp, main_cle->in(0) );
2040 main_bol->set_req(1,main_cmp);
2041 }
2172 if (needs_guard) {
2173 // Peel the loop to ensure there's a zero trip guard
2174 Node_List old_new;
2175 phase->do_peeling(this, old_new);
2176 }
2177
2178 // Replace the phi at loop head with the final value of the last
2179 // iteration. Then the CountedLoopEnd will collapse (backedge never
2180 // taken) and all loop-invariant uses of the exit values will be correct.
2181 Node *phi = cl->phi();
2182 Node *exact_limit = phase->exact_limit(this);
2183 if (exact_limit != cl->limit()) {
2184 // We also need to replace the original limit to collapse loop exit.
2185 Node* cmp = cl->loopexit()->cmp_node();
2186 assert(cl->limit() == cmp->in(2), "sanity");
2187 phase->_igvn._worklist.push(cmp->in(2)); // put limit on worklist
2188 phase->_igvn.replace_input_of(cmp, 2, exact_limit); // put cmp on worklist
2189 }
2190 // Note: the final value after increment should not overflow since
2191 // counted loop has limit check predicate.
2192 Node *final = new (phase->C) SubINode( exact_limit, cl->stride() );
2193 phase->register_new_node(final,cl->in(LoopNode::EntryControl));
2194 phase->_igvn.replace_node(phi,final);
2195 phase->C->set_major_progress();
2196 return true;
2197 }
2198
2199 //------------------------------policy_do_one_iteration_loop-------------------
2200 // Convert one iteration loop into normal code.
2201 bool IdealLoopTree::policy_do_one_iteration_loop( PhaseIdealLoop *phase ) {
2202 if (!_head->as_Loop()->is_valid_counted_loop())
2203 return false; // Only for counted loop
2204
2205 CountedLoopNode *cl = _head->as_CountedLoop();
2206 if (!cl->has_exact_trip_count() || cl->trip_count() != 1) {
2207 return false;
2208 }
2209
2210 #ifndef PRODUCT
2211 if(TraceLoopOpts) {
2212 tty->print("OneIteration ");
2659 Node* shift = NULL;
2660 Node* offset = NULL;
2661 if (!match_fill_loop(lpt, store, store_value, shift, offset)) {
2662 return false;
2663 }
2664
2665 #ifndef PRODUCT
2666 if (TraceLoopOpts) {
2667 tty->print("ArrayFill ");
2668 lpt->dump_head();
2669 }
2670 #endif
2671
2672 // Now replace the whole loop body by a call to a fill routine that
2673 // covers the same region as the loop.
2674 Node* base = store->in(MemNode::Address)->as_AddP()->in(AddPNode::Base);
2675
2676 // Build an expression for the beginning of the copy region
2677 Node* index = head->init_trip();
2678 #ifdef _LP64
2679 index = new (C) ConvI2LNode(index);
2680 _igvn.register_new_node_with_optimizer(index);
2681 #endif
2682 if (shift != NULL) {
2683 // byte arrays don't require a shift but others do.
2684 index = new (C) LShiftXNode(index, shift->in(2));
2685 _igvn.register_new_node_with_optimizer(index);
2686 }
2687 index = new (C) AddPNode(base, base, index);
2688 _igvn.register_new_node_with_optimizer(index);
2689 Node* from = new (C) AddPNode(base, index, offset);
2690 _igvn.register_new_node_with_optimizer(from);
2691 // Compute the number of elements to copy
2692 Node* len = new (C) SubINode(head->limit(), head->init_trip());
2693 _igvn.register_new_node_with_optimizer(len);
2694
2695 BasicType t = store->as_Mem()->memory_type();
2696 bool aligned = false;
2697 if (offset != NULL && head->init_trip()->is_Con()) {
2698 int element_size = type2aelembytes(t);
2699 aligned = (offset->find_intptr_t_type()->get_con() + head->init_trip()->get_int() * element_size) % HeapWordSize == 0;
2700 }
2701
2702 // Build a call to the fill routine
2703 const char* fill_name;
2704 address fill = StubRoutines::select_fill_function(t, aligned, fill_name);
2705 assert(fill != NULL, "what?");
2706
2707 // Convert float/double to int/long for fill routines
2708 if (t == T_FLOAT) {
2709 store_value = new (C) MoveF2INode(store_value);
2710 _igvn.register_new_node_with_optimizer(store_value);
2711 } else if (t == T_DOUBLE) {
2712 store_value = new (C) MoveD2LNode(store_value);
2713 _igvn.register_new_node_with_optimizer(store_value);
2714 }
2715
2716 if (CCallingConventionRequiresIntsAsLongs &&
2717 // See StubRoutines::select_fill_function for types. FLOAT has been converted to INT.
2718 (t == T_FLOAT || t == T_INT || is_subword_type(t))) {
2719 store_value = new (C) ConvI2LNode(store_value);
2720 _igvn.register_new_node_with_optimizer(store_value);
2721 }
2722
2723 Node* mem_phi = store->in(MemNode::Memory);
2724 Node* result_ctrl;
2725 Node* result_mem;
2726 const TypeFunc* call_type = OptoRuntime::array_fill_Type();
2727 CallLeafNode *call = new (C) CallLeafNoFPNode(call_type, fill,
2728 fill_name, TypeAryPtr::get_array_body_type(t));
2729 uint cnt = 0;
2730 call->init_req(TypeFunc::Parms + cnt++, from);
2731 call->init_req(TypeFunc::Parms + cnt++, store_value);
2732 if (CCallingConventionRequiresIntsAsLongs) {
2733 call->init_req(TypeFunc::Parms + cnt++, C->top());
2734 }
2735 #ifdef _LP64
2736 len = new (C) ConvI2LNode(len);
2737 _igvn.register_new_node_with_optimizer(len);
2738 #endif
2739 call->init_req(TypeFunc::Parms + cnt++, len);
2740 #ifdef _LP64
2741 call->init_req(TypeFunc::Parms + cnt++, C->top());
2742 #endif
2743 call->init_req(TypeFunc::Control, head->init_control());
2744 call->init_req(TypeFunc::I_O, C->top()); // Does no I/O.
2745 call->init_req(TypeFunc::Memory, mem_phi->in(LoopNode::EntryControl));
2746 call->init_req(TypeFunc::ReturnAdr, C->start()->proj_out(TypeFunc::ReturnAdr));
2747 call->init_req(TypeFunc::FramePtr, C->start()->proj_out(TypeFunc::FramePtr));
2748 _igvn.register_new_node_with_optimizer(call);
2749 result_ctrl = new (C) ProjNode(call,TypeFunc::Control);
2750 _igvn.register_new_node_with_optimizer(result_ctrl);
2751 result_mem = new (C) ProjNode(call,TypeFunc::Memory);
2752 _igvn.register_new_node_with_optimizer(result_mem);
2753
2754 /* Disable following optimization until proper fix (add missing checks).
2755
2756 // If this fill is tightly coupled to an allocation and overwrites
2757 // the whole body, allow it to take over the zeroing.
2758 AllocateNode* alloc = AllocateNode::Ideal_allocation(base, this);
2759 if (alloc != NULL && alloc->is_AllocateArray()) {
2760 Node* length = alloc->as_AllocateArray()->Ideal_length();
2761 if (head->limit() == length &&
2762 head->init_trip() == _igvn.intcon(0)) {
2763 if (TraceOptimizeFill) {
2764 tty->print_cr("Eliminated zeroing in allocation");
2765 }
2766 alloc->maybe_set_complete(&_igvn);
2767 } else {
2768 #ifdef ASSERT
2769 if (TraceOptimizeFill) {
2770 tty->print_cr("filling array but bounds don't match");
2771 alloc->dump();
|
210 Node* inv1 = n1->in(inv1_idx);
211 Node* n2 = n1->in(3 - inv1_idx);
212 int inv2_idx = is_invariant_addition(n2, phase);
213 if (!inv2_idx) return NULL;
214 Node* x = n2->in(3 - inv2_idx);
215 Node* inv2 = n2->in(inv2_idx);
216
217 bool neg_x = n2->is_Sub() && inv2_idx == 1;
218 bool neg_inv2 = n2->is_Sub() && inv2_idx == 2;
219 bool neg_inv1 = n1->is_Sub() && inv1_idx == 2;
220 if (n1->is_Sub() && inv1_idx == 1) {
221 neg_x = !neg_x;
222 neg_inv2 = !neg_inv2;
223 }
224 Node* inv1_c = phase->get_ctrl(inv1);
225 Node* inv2_c = phase->get_ctrl(inv2);
226 Node* n_inv1;
227 if (neg_inv1) {
228 Node *zero = phase->_igvn.intcon(0);
229 phase->set_ctrl(zero, phase->C->root());
230 n_inv1 = new SubINode(zero, inv1);
231 phase->register_new_node(n_inv1, inv1_c);
232 } else {
233 n_inv1 = inv1;
234 }
235 Node* inv;
236 if (neg_inv2) {
237 inv = new SubINode(n_inv1, inv2);
238 } else {
239 inv = new AddINode(n_inv1, inv2);
240 }
241 phase->register_new_node(inv, phase->get_early_ctrl(inv));
242
243 Node* addx;
244 if (neg_x) {
245 addx = new SubINode(inv, x);
246 } else {
247 addx = new AddINode(x, inv);
248 }
249 phase->register_new_node(addx, phase->get_ctrl(x));
250 phase->_igvn.replace_node(n1, addx);
251 assert(phase->get_loop(phase->get_ctrl(n1)) == this, "");
252 _body.yank(n1);
253 return addx;
254 }
255
256 //---------------------reassociate_invariants-----------------------------
257 // Reassociate invariant expressions:
258 void IdealLoopTree::reassociate_invariants(PhaseIdealLoop *phase) {
259 for (int i = _body.size() - 1; i >= 0; i--) {
260 Node *n = _body.at(i);
261 for (int j = 0; j < 5; j++) {
262 Node* nn = reassociate_add_sub(n, phase);
263 if (nn == NULL) break;
264 n = nn; // again
265 };
266 }
267 }
936 }
937
938 //------------------------------
939 // Step A: Create Post-Loop.
940 Node* main_exit = main_end->proj_out(false);
941 assert( main_exit->Opcode() == Op_IfFalse, "" );
942 int dd_main_exit = dom_depth(main_exit);
943
944 // Step A1: Clone the loop body. The clone becomes the post-loop. The main
945 // loop pre-header illegally has 2 control users (old & new loops).
946 clone_loop( loop, old_new, dd_main_exit );
947 assert( old_new[main_end ->_idx]->Opcode() == Op_CountedLoopEnd, "" );
948 CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
949 post_head->set_post_loop(main_head);
950
951 // Reduce the post-loop trip count.
952 CountedLoopEndNode* post_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
953 post_end->_prob = PROB_FAIR;
954
955 // Build the main-loop normal exit.
956 IfFalseNode *new_main_exit = new IfFalseNode(main_end);
957 _igvn.register_new_node_with_optimizer( new_main_exit );
958 set_idom(new_main_exit, main_end, dd_main_exit );
959 set_loop(new_main_exit, loop->_parent);
960
961 // Step A2: Build a zero-trip guard for the post-loop. After leaving the
962 // main-loop, the post-loop may not execute at all. We 'opaque' the incr
963 // (the main-loop trip-counter exit value) because we will be changing
964 // the exit value (via unrolling) so we cannot constant-fold away the zero
965 // trip guard until all unrolling is done.
966 Node *zer_opaq = new Opaque1Node(C, incr);
967 Node *zer_cmp = new CmpINode( zer_opaq, limit );
968 Node *zer_bol = new BoolNode( zer_cmp, b_test );
969 register_new_node( zer_opaq, new_main_exit );
970 register_new_node( zer_cmp , new_main_exit );
971 register_new_node( zer_bol , new_main_exit );
972
973 // Build the IfNode
974 IfNode *zer_iff = new IfNode( new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN );
975 _igvn.register_new_node_with_optimizer( zer_iff );
976 set_idom(zer_iff, new_main_exit, dd_main_exit);
977 set_loop(zer_iff, loop->_parent);
978
979 // Plug in the false-path, taken if we need to skip post-loop
980 _igvn.replace_input_of(main_exit, 0, zer_iff);
981 set_idom(main_exit, zer_iff, dd_main_exit);
982 set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
983 // Make the true-path, must enter the post loop
984 Node *zer_taken = new IfTrueNode( zer_iff );
985 _igvn.register_new_node_with_optimizer( zer_taken );
986 set_idom(zer_taken, zer_iff, dd_main_exit);
987 set_loop(zer_taken, loop->_parent);
988 // Plug in the true path
989 _igvn.hash_delete( post_head );
990 post_head->set_req(LoopNode::EntryControl, zer_taken);
991 set_idom(post_head, zer_taken, dd_main_exit);
992
993 Arena *a = Thread::current()->resource_area();
994 VectorSet visited(a);
995 Node_Stack clones(a, main_head->back_control()->outcnt());
996 // Step A3: Make the fall-in values to the post-loop come from the
997 // fall-out values of the main-loop.
998 for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
999 Node* main_phi = main_head->fast_out(i);
1000 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0 ) {
1001 Node *post_phi = old_new[main_phi->_idx];
1002 Node *fallmain = clone_up_backedge_goo(main_head->back_control(),
1003 post_head->init_control(),
1004 main_phi->in(LoopNode::LoopBackControl),
1012 main_exit = new_main_exit;
1013
1014
1015 //------------------------------
1016 // Step B: Create Pre-Loop.
1017
1018 // Step B1: Clone the loop body. The clone becomes the pre-loop. The main
1019 // loop pre-header illegally has 2 control users (old & new loops).
1020 clone_loop( loop, old_new, dd_main_head );
1021 CountedLoopNode* pre_head = old_new[main_head->_idx]->as_CountedLoop();
1022 CountedLoopEndNode* pre_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
1023 pre_head->set_pre_loop(main_head);
1024 Node *pre_incr = old_new[incr->_idx];
1025
1026 // Reduce the pre-loop trip count.
1027 pre_end->_prob = PROB_FAIR;
1028
1029 // Find the pre-loop normal exit.
1030 Node* pre_exit = pre_end->proj_out(false);
1031 assert( pre_exit->Opcode() == Op_IfFalse, "" );
1032 IfFalseNode *new_pre_exit = new IfFalseNode(pre_end);
1033 _igvn.register_new_node_with_optimizer( new_pre_exit );
1034 set_idom(new_pre_exit, pre_end, dd_main_head);
1035 set_loop(new_pre_exit, loop->_parent);
1036
1037 // Step B2: Build a zero-trip guard for the main-loop. After leaving the
1038 // pre-loop, the main-loop may not execute at all. Later in life this
1039 // zero-trip guard will become the minimum-trip guard when we unroll
1040 // the main-loop.
1041 Node *min_opaq = new Opaque1Node(C, limit);
1042 Node *min_cmp = new CmpINode( pre_incr, min_opaq );
1043 Node *min_bol = new BoolNode( min_cmp, b_test );
1044 register_new_node( min_opaq, new_pre_exit );
1045 register_new_node( min_cmp , new_pre_exit );
1046 register_new_node( min_bol , new_pre_exit );
1047
1048 // Build the IfNode (assume the main-loop is executed always).
1049 IfNode *min_iff = new IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
1050 _igvn.register_new_node_with_optimizer( min_iff );
1051 set_idom(min_iff, new_pre_exit, dd_main_head);
1052 set_loop(min_iff, loop->_parent);
1053
1054 // Plug in the false-path, taken if we need to skip main-loop
1055 _igvn.hash_delete( pre_exit );
1056 pre_exit->set_req(0, min_iff);
1057 set_idom(pre_exit, min_iff, dd_main_head);
1058 set_idom(pre_exit->unique_out(), min_iff, dd_main_head);
1059 // Make the true-path, must enter the main loop
1060 Node *min_taken = new IfTrueNode( min_iff );
1061 _igvn.register_new_node_with_optimizer( min_taken );
1062 set_idom(min_taken, min_iff, dd_main_head);
1063 set_loop(min_taken, loop->_parent);
1064 // Plug in the true path
1065 _igvn.hash_delete( main_head );
1066 main_head->set_req(LoopNode::EntryControl, min_taken);
1067 set_idom(main_head, min_taken, dd_main_head);
1068
1069 visited.Clear();
1070 clones.clear();
1071 // Step B3: Make the fall-in values to the main-loop come from the
1072 // fall-out values of the pre-loop.
1073 for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) {
1074 Node* main_phi = main_head->fast_out(i2);
1075 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) {
1076 Node *pre_phi = old_new[main_phi->_idx];
1077 Node *fallpre = clone_up_backedge_goo(pre_head->back_control(),
1078 main_head->init_control(),
1079 pre_phi->in(LoopNode::LoopBackControl),
1080 visited, clones);
1081 _igvn.hash_delete(main_phi);
1082 main_phi->set_req( LoopNode::EntryControl, fallpre );
1083 }
1084 }
1085
1086 // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
1087 // RCE and alignment may change this later.
1088 Node *cmp_end = pre_end->cmp_node();
1089 assert( cmp_end->in(2) == limit, "" );
1090 Node *pre_limit = new AddINode( init, stride );
1091
1092 // Save the original loop limit in this Opaque1 node for
1093 // use by range check elimination.
1094 Node *pre_opaq = new Opaque1Node(C, pre_limit, limit);
1095
1096 register_new_node( pre_limit, pre_head->in(0) );
1097 register_new_node( pre_opaq , pre_head->in(0) );
1098
1099 // Since no other users of pre-loop compare, I can hack limit directly
1100 assert( cmp_end->outcnt() == 1, "no other users" );
1101 _igvn.hash_delete(cmp_end);
1102 cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq);
1103
1104 // Special case for not-equal loop bounds:
1105 // Change pre loop test, main loop test, and the
1106 // main loop guard test to use lt or gt depending on stride
1107 // direction:
1108 // positive stride use <
1109 // negative stride use >
1110 //
1111 // not-equal test is kept for post loop to handle case
1112 // when init > limit when stride > 0 (and reverse).
1113
1114 if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) {
1115
1116 BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt;
1117 // Modify pre loop end condition
1118 Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1119 BoolNode* new_bol0 = new BoolNode(pre_bol->in(1), new_test);
1120 register_new_node( new_bol0, pre_head->in(0) );
1121 _igvn.hash_delete(pre_end);
1122 pre_end->set_req(CountedLoopEndNode::TestValue, new_bol0);
1123 // Modify main loop guard condition
1124 assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay");
1125 BoolNode* new_bol1 = new BoolNode(min_bol->in(1), new_test);
1126 register_new_node( new_bol1, new_pre_exit );
1127 _igvn.hash_delete(min_iff);
1128 min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1);
1129 // Modify main loop end condition
1130 BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1131 BoolNode* new_bol2 = new BoolNode(main_bol->in(1), new_test);
1132 register_new_node( new_bol2, main_end->in(CountedLoopEndNode::TestControl) );
1133 _igvn.hash_delete(main_end);
1134 main_end->set_req(CountedLoopEndNode::TestValue, new_bol2);
1135 }
1136
1137 // Flag main loop
1138 main_head->set_main_loop();
1139 if( peel_only ) main_head->set_main_no_pre_loop();
1140
1141 // Subtract a trip count for the pre-loop.
1142 main_head->set_trip_count(main_head->trip_count() - 1);
1143
1144 // It's difficult to be precise about the trip-counts
1145 // for the pre/post loops. They are usually very short,
1146 // so guess that 4 trips is a reasonable value.
1147 post_head->set_profile_trip_cnt(4.0);
1148 pre_head->set_profile_trip_cnt(4.0);
1149
1150 // Now force out all loop-invariant dominating tests. The optimizer
1151 // finds some, but we _know_ they are all useless.
1262 const TypeInt* limit_type = _igvn.type(limit)->is_int();
1263 assert(stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) ||
1264 stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo), "sanity");
1265
1266 if (limit->is_Con()) {
1267 // The check in policy_unroll and the assert above guarantee
1268 // no underflow if limit is constant.
1269 new_limit = _igvn.intcon(limit->get_int() - stride_con);
1270 set_ctrl(new_limit, C->root());
1271 } else {
1272 // Limit is not constant.
1273 if (loop_head->unrolled_count() == 1) { // only for first unroll
1274 // Separate limit by Opaque node in case it is an incremented
1275 // variable from previous loop to avoid using pre-incremented
1276 // value which could increase register pressure.
1277 // Otherwise reorg_offsets() optimization will create a separate
1278 // Opaque node for each use of trip-counter and as result
1279 // zero trip guard limit will be different from loop limit.
1280 assert(has_ctrl(opaq), "should have it");
1281 Node* opaq_ctrl = get_ctrl(opaq);
1282 limit = new Opaque2Node( C, limit );
1283 register_new_node( limit, opaq_ctrl );
1284 }
1285 if (stride_con > 0 && ((limit_type->_lo - stride_con) < limit_type->_lo) ||
1286 stride_con < 0 && ((limit_type->_hi - stride_con) > limit_type->_hi)) {
1287 // No underflow.
1288 new_limit = new SubINode(limit, stride);
1289 } else {
1290 // (limit - stride) may underflow.
1291 // Clamp the adjustment value with MININT or MAXINT:
1292 //
1293 // new_limit = limit-stride
1294 // if (stride > 0)
1295 // new_limit = (limit < new_limit) ? MININT : new_limit;
1296 // else
1297 // new_limit = (limit > new_limit) ? MAXINT : new_limit;
1298 //
1299 BoolTest::mask bt = loop_end->test_trip();
1300 assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected");
1301 Node* adj_max = _igvn.intcon((stride_con > 0) ? min_jint : max_jint);
1302 set_ctrl(adj_max, C->root());
1303 Node* old_limit = NULL;
1304 Node* adj_limit = NULL;
1305 Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : NULL;
1306 if (loop_head->unrolled_count() > 1 &&
1307 limit->is_CMove() && limit->Opcode() == Op_CMoveI &&
1308 limit->in(CMoveNode::IfTrue) == adj_max &&
1309 bol->as_Bool()->_test._test == bt &&
1310 bol->in(1)->Opcode() == Op_CmpI &&
1311 bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) {
1312 // Loop was unrolled before.
1313 // Optimize the limit to avoid nested CMove:
1314 // use original limit as old limit.
1315 old_limit = bol->in(1)->in(1);
1316 // Adjust previous adjusted limit.
1317 adj_limit = limit->in(CMoveNode::IfFalse);
1318 adj_limit = new SubINode(adj_limit, stride);
1319 } else {
1320 old_limit = limit;
1321 adj_limit = new SubINode(limit, stride);
1322 }
1323 assert(old_limit != NULL && adj_limit != NULL, "");
1324 register_new_node( adj_limit, ctrl ); // adjust amount
1325 Node* adj_cmp = new CmpINode(old_limit, adj_limit);
1326 register_new_node( adj_cmp, ctrl );
1327 Node* adj_bool = new BoolNode(adj_cmp, bt);
1328 register_new_node( adj_bool, ctrl );
1329 new_limit = new CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT);
1330 }
1331 register_new_node(new_limit, ctrl);
1332 }
1333 assert(new_limit != NULL, "");
1334 // Replace in loop test.
1335 assert(loop_end->in(1)->in(1) == cmp, "sanity");
1336 if (cmp->outcnt() == 1 && loop_end->in(1)->outcnt() == 1) {
1337 // Don't need to create new test since only one user.
1338 _igvn.hash_delete(cmp);
1339 cmp->set_req(2, new_limit);
1340 } else {
1341 // Create new test since it is shared.
1342 Node* ctrl2 = loop_end->in(0);
1343 Node* cmp2 = cmp->clone();
1344 cmp2->set_req(2, new_limit);
1345 register_new_node(cmp2, ctrl2);
1346 Node* bol2 = loop_end->in(1)->clone();
1347 bol2->set_req(1, cmp2);
1348 register_new_node(bol2, ctrl2);
1349 _igvn.hash_delete(loop_end);
1371 loop_head->double_unrolled_count();
1372
1373 } else { // LoopLimitCheck
1374
1375 // Adjust max trip count. The trip count is intentionally rounded
1376 // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
1377 // the main, unrolled, part of the loop will never execute as it is protected
1378 // by the min-trip test. See bug 4834191 for a case where we over-unrolled
1379 // and later determined that part of the unrolled loop was dead.
1380 loop_head->set_trip_count(loop_head->trip_count() / 2);
1381
1382 // Double the count of original iterations in the unrolled loop body.
1383 loop_head->double_unrolled_count();
1384
1385 // -----------
1386 // Step 2: Cut back the trip counter for an unroll amount of 2.
1387 // Loop will normally trip (limit - init)/stride_con. Since it's a
1388 // CountedLoop this is exact (stride divides limit-init exactly).
1389 // We are going to double the loop body, so we want to knock off any
1390 // odd iteration: (trip_cnt & ~1). Then back compute a new limit.
1391 Node *span = new SubINode( limit, init );
1392 register_new_node( span, ctrl );
1393 Node *trip = new DivINode( 0, span, stride );
1394 register_new_node( trip, ctrl );
1395 Node *mtwo = _igvn.intcon(-2);
1396 set_ctrl(mtwo, C->root());
1397 Node *rond = new AndINode( trip, mtwo );
1398 register_new_node( rond, ctrl );
1399 Node *spn2 = new MulINode( rond, stride );
1400 register_new_node( spn2, ctrl );
1401 new_limit = new AddINode( spn2, init );
1402 register_new_node( new_limit, ctrl );
1403
1404 // Hammer in the new limit
1405 Node *ctrl2 = loop_end->in(0);
1406 Node *cmp2 = new CmpINode( loop_head->incr(), new_limit );
1407 register_new_node( cmp2, ctrl2 );
1408 Node *bol2 = new BoolNode( cmp2, loop_end->test_trip() );
1409 register_new_node( bol2, ctrl2 );
1410 _igvn.hash_delete(loop_end);
1411 loop_end->set_req(CountedLoopEndNode::TestValue, bol2);
1412
1413 // Step 3: Find the min-trip test guaranteed before a 'main' loop.
1414 // Make it a 1-trip test (means at least 2 trips).
1415 if( adjust_min_trip ) {
1416 assert( new_limit != NULL, "" );
1417 // Guard test uses an 'opaque' node which is not shared. Hence I
1418 // can edit it's inputs directly. Hammer in the new limit for the
1419 // minimum-trip guard.
1420 assert( opaq->outcnt() == 1, "" );
1421 _igvn.hash_delete(opaq);
1422 opaq->set_req(1, new_limit);
1423 }
1424 } // LoopLimitCheck
1425
1426 // ---------
1427 // Step 4: Clone the loop body. Move it inside the loop. This loop body
1428 // represents the odd iterations; since the loop trips an even number of
1494 // Now its tripping an even number of times remaining. Double loop body.
1495 // Do not adjust pre-guards; they are not needed and do not exist.
1496 if (cl->trip_count() > 0) {
1497 assert((cl->trip_count() & 1) == 0, "missed peeling");
1498 do_unroll(loop, old_new, false);
1499 }
1500 }
1501
1502 //------------------------------dominates_backedge---------------------------------
1503 // Returns true if ctrl is executed on every complete iteration
1504 bool IdealLoopTree::dominates_backedge(Node* ctrl) {
1505 assert(ctrl->is_CFG(), "must be control");
1506 Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl);
1507 return _phase->dom_lca_internal(ctrl, backedge) == ctrl;
1508 }
1509
1510 //------------------------------adjust_limit-----------------------------------
1511 // Helper function for add_constraint().
1512 Node* PhaseIdealLoop::adjust_limit(int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl) {
1513 // Compute "I :: (limit-offset)/scale"
1514 Node *con = new SubINode(rc_limit, offset);
1515 register_new_node(con, pre_ctrl);
1516 Node *X = new DivINode(0, con, scale);
1517 register_new_node(X, pre_ctrl);
1518
1519 // Adjust loop limit
1520 loop_limit = (stride_con > 0)
1521 ? (Node*)(new MinINode(loop_limit, X))
1522 : (Node*)(new MaxINode(loop_limit, X));
1523 register_new_node(loop_limit, pre_ctrl);
1524 return loop_limit;
1525 }
1526
1527 //------------------------------add_constraint---------------------------------
1528 // Constrain the main loop iterations so the conditions:
1529 // low_limit <= scale_con * I + offset < upper_limit
1530 // always holds true. That is, either increase the number of iterations in
1531 // the pre-loop or the post-loop until the condition holds true in the main
1532 // loop. Stride, scale, offset and limit are all loop invariant. Further,
1533 // stride and scale are constants (offset and limit often are).
1534 void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) {
1535 // For positive stride, the pre-loop limit always uses a MAX function
1536 // and the main loop a MIN function. For negative stride these are
1537 // reversed.
1538
1539 // Also for positive stride*scale the affine function is increasing, so the
1540 // pre-loop must check for underflow and the post-loop for overflow.
1541 // Negative stride*scale reverses this; pre-loop checks for overflow and
1542 // post-loop for underflow.
1563 // NOT(scale*I+offset >= low_limit)
1564 // scale*I+offset < low_limit
1565 // ( if (scale > 0) /* and stride > 0 */
1566 // I < (low_limit-offset)/scale
1567 // else /* scale < 0 and stride < 0 */
1568 // I > (low_limit-offset)/scale
1569 // )
1570
1571 if (low_limit->get_int() == -max_jint) {
1572 if (!RangeLimitCheck) return;
1573 // We need this guard when scale*pre_limit+offset >= limit
1574 // due to underflow. So we need execute pre-loop until
1575 // scale*I+offset >= min_int. But (min_int-offset) will
1576 // underflow when offset > 0 and X will be > original_limit
1577 // when stride > 0. To avoid it we replace positive offset with 0.
1578 //
1579 // Also (min_int+1 == -max_int) is used instead of min_int here
1580 // to avoid problem with scale == -1 (min_int/(-1) == min_int).
1581 Node* shift = _igvn.intcon(31);
1582 set_ctrl(shift, C->root());
1583 Node* sign = new RShiftINode(offset, shift);
1584 register_new_node(sign, pre_ctrl);
1585 offset = new AndINode(offset, sign);
1586 register_new_node(offset, pre_ctrl);
1587 } else {
1588 assert(low_limit->get_int() == 0, "wrong low limit for range check");
1589 // The only problem we have here when offset == min_int
1590 // since (0-min_int) == min_int. It may be fine for stride > 0
1591 // but for stride < 0 X will be < original_limit. To avoid it
1592 // max(pre_limit, original_limit) is used in do_range_check().
1593 }
1594 // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
1595 *pre_limit = adjust_limit((-stride_con), scale, offset, low_limit, *pre_limit, pre_ctrl);
1596
1597 } else { // stride_con*scale_con < 0
1598 // For negative stride*scale pre-loop checks for overflow and
1599 // post-loop for underflow.
1600 //
1601 // The overflow limit: scale*I+offset < upper_limit
1602 // For pre-loop compute
1603 // NOT(scale*I+offset < upper_limit)
1604 // scale*I+offset >= upper_limit
1605 // scale*I+offset+1 > upper_limit
1606 // ( if (scale < 0) /* and stride > 0 */
1607 // I < (upper_limit-(offset+1))/scale
1608 // else /* scale > 0 and stride < 0 */
1609 // I > (upper_limit-(offset+1))/scale
1610 // )
1611 //
1612 // (upper_limit-offset-1) may underflow or overflow.
1613 // To avoid it min(pre_limit, original_limit) is used
1614 // in do_range_check() for stride > 0 and max() for < 0.
1615 Node *one = _igvn.intcon(1);
1616 set_ctrl(one, C->root());
1617
1618 Node *plus_one = new AddINode(offset, one);
1619 register_new_node( plus_one, pre_ctrl );
1620 // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
1621 *pre_limit = adjust_limit((-stride_con), scale, plus_one, upper_limit, *pre_limit, pre_ctrl);
1622
1623 if (low_limit->get_int() == -max_jint) {
1624 if (!RangeLimitCheck) return;
1625 // We need this guard when scale*main_limit+offset >= limit
1626 // due to underflow. So we need execute main-loop while
1627 // scale*I+offset+1 > min_int. But (min_int-offset-1) will
1628 // underflow when (offset+1) > 0 and X will be < main_limit
1629 // when scale < 0 (and stride > 0). To avoid it we replace
1630 // positive (offset+1) with 0.
1631 //
1632 // Also (min_int+1 == -max_int) is used instead of min_int here
1633 // to avoid problem with scale == -1 (min_int/(-1) == min_int).
1634 Node* shift = _igvn.intcon(31);
1635 set_ctrl(shift, C->root());
1636 Node* sign = new RShiftINode(plus_one, shift);
1637 register_new_node(sign, pre_ctrl);
1638 plus_one = new AndINode(plus_one, sign);
1639 register_new_node(plus_one, pre_ctrl);
1640 } else {
1641 assert(low_limit->get_int() == 0, "wrong low limit for range check");
1642 // The only problem we have here when offset == max_int
1643 // since (max_int+1) == min_int and (0-min_int) == min_int.
1644 // But it is fine since main loop will either have
1645 // less iterations or will be skipped in such case.
1646 }
1647 // The underflow limit: low_limit <= scale*I+offset.
1648 // For main-loop compute
1649 // scale*I+offset+1 > low_limit
1650 // ( if (scale < 0) /* and stride > 0 */
1651 // I < (low_limit-(offset+1))/scale
1652 // else /* scale > 0 and stride < 0 */
1653 // I > (low_limit-(offset+1))/scale
1654 // )
1655
1656 *main_limit = adjust_limit(stride_con, scale, plus_one, low_limit, *main_limit, pre_ctrl);
1657 }
1658 }
1701 set_ctrl(zero, C->root());
1702 *p_offset = zero;
1703 }
1704 return true;
1705 }
1706 int opc = exp->Opcode();
1707 if (opc == Op_AddI) {
1708 if (is_scaled_iv(exp->in(1), iv, p_scale)) {
1709 if (p_offset != NULL) {
1710 *p_offset = exp->in(2);
1711 }
1712 return true;
1713 }
1714 if (exp->in(2)->is_Con()) {
1715 Node* offset2 = NULL;
1716 if (depth < 2 &&
1717 is_scaled_iv_plus_offset(exp->in(1), iv, p_scale,
1718 p_offset != NULL ? &offset2 : NULL, depth+1)) {
1719 if (p_offset != NULL) {
1720 Node *ctrl_off2 = get_ctrl(offset2);
1721 Node* offset = new AddINode(offset2, exp->in(2));
1722 register_new_node(offset, ctrl_off2);
1723 *p_offset = offset;
1724 }
1725 return true;
1726 }
1727 }
1728 } else if (opc == Op_SubI) {
1729 if (is_scaled_iv(exp->in(1), iv, p_scale)) {
1730 if (p_offset != NULL) {
1731 Node *zero = _igvn.intcon(0);
1732 set_ctrl(zero, C->root());
1733 Node *ctrl_off = get_ctrl(exp->in(2));
1734 Node* offset = new SubINode(zero, exp->in(2));
1735 register_new_node(offset, ctrl_off);
1736 *p_offset = offset;
1737 }
1738 return true;
1739 }
1740 if (is_scaled_iv(exp->in(2), iv, p_scale)) {
1741 if (p_offset != NULL) {
1742 *p_scale *= -1;
1743 *p_offset = exp->in(1);
1744 }
1745 return true;
1746 }
1747 }
1748 return false;
1749 }
1750
1751 //------------------------------do_range_check---------------------------------
1752 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1753 void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
1754 #ifndef PRODUCT
1917 // The underflow and overflow limits: 0 <= scale*I+offset < limit
1918 add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
1919 if (!conditional_rc) {
1920 // (0-offset)/scale could be outside of loop iterations range.
1921 conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
1922 }
1923 } else {
1924 #ifndef PRODUCT
1925 if( PrintOpto )
1926 tty->print_cr("missed RCE opportunity");
1927 #endif
1928 continue; // In release mode, ignore it
1929 }
1930 } else { // Otherwise work on normal compares
1931 switch( b_test._test ) {
1932 case BoolTest::gt:
1933 // Fall into GE case
1934 case BoolTest::ge:
1935 // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
1936 scale_con = -scale_con;
1937 offset = new SubINode( zero, offset );
1938 register_new_node( offset, pre_ctrl );
1939 limit = new SubINode( zero, limit );
1940 register_new_node( limit, pre_ctrl );
1941 // Fall into LE case
1942 case BoolTest::le:
1943 if (b_test._test != BoolTest::gt) {
1944 // Convert X <= Y to X < Y+1
1945 limit = new AddINode( limit, one );
1946 register_new_node( limit, pre_ctrl );
1947 }
1948 // Fall into LT case
1949 case BoolTest::lt:
1950 // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
1951 // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
1952 // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
1953 add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit );
1954 if (!conditional_rc) {
1955 // ((MIN_INT+1)-offset)/scale could be outside of loop iterations range.
1956 // Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could
1957 // still be outside of loop range.
1958 conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
1959 }
1960 break;
1961 default:
1962 #ifndef PRODUCT
1963 if( PrintOpto )
1964 tty->print_cr("missed RCE opportunity");
1965 #endif
1976 assert(iff->is_If(), "");
1977 ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
1978 // Find loads off the surviving projection; remove their control edge
1979 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
1980 Node* cd = dp->fast_out(i); // Control-dependent node
1981 if (cd->is_Load() && cd->depends_only_on_test()) { // Loads can now float around in the loop
1982 // Allow the load to float around in the loop, or before it
1983 // but NOT before the pre-loop.
1984 _igvn.replace_input_of(cd, 0, ctrl); // ctrl, not NULL
1985 --i;
1986 --imax;
1987 }
1988 }
1989
1990 } // End of is IF
1991
1992 }
1993
1994 // Update loop limits
1995 if (conditional_rc) {
1996 pre_limit = (stride_con > 0) ? (Node*)new MinINode(pre_limit, orig_limit)
1997 : (Node*)new MaxINode(pre_limit, orig_limit);
1998 register_new_node(pre_limit, pre_ctrl);
1999 }
2000 _igvn.hash_delete(pre_opaq);
2001 pre_opaq->set_req(1, pre_limit);
2002
2003 // Note:: we are making the main loop limit no longer precise;
2004 // need to round up based on stride.
2005 cl->set_nonexact_trip_count();
2006 if (!LoopLimitCheck && stride_con != 1 && stride_con != -1) { // Cutout for common case
2007 // "Standard" round-up logic: ([main_limit-init+(y-1)]/y)*y+init
2008 // Hopefully, compiler will optimize for powers of 2.
2009 Node *ctrl = get_ctrl(main_limit);
2010 Node *stride = cl->stride();
2011 Node *init = cl->init_trip();
2012 Node *span = new SubINode(main_limit,init);
2013 register_new_node(span,ctrl);
2014 Node *rndup = _igvn.intcon(stride_con + ((stride_con>0)?-1:1));
2015 Node *add = new AddINode(span,rndup);
2016 register_new_node(add,ctrl);
2017 Node *div = new DivINode(0,add,stride);
2018 register_new_node(div,ctrl);
2019 Node *mul = new MulINode(div,stride);
2020 register_new_node(mul,ctrl);
2021 Node *newlim = new AddINode(mul,init);
2022 register_new_node(newlim,ctrl);
2023 main_limit = newlim;
2024 }
2025
2026 Node *main_cle = cl->loopexit();
2027 Node *main_bol = main_cle->in(1);
2028 // Hacking loop bounds; need private copies of exit test
2029 if( main_bol->outcnt() > 1 ) {// BoolNode shared?
2030 _igvn.hash_delete(main_cle);
2031 main_bol = main_bol->clone();// Clone a private BoolNode
2032 register_new_node( main_bol, main_cle->in(0) );
2033 main_cle->set_req(1,main_bol);
2034 }
2035 Node *main_cmp = main_bol->in(1);
2036 if( main_cmp->outcnt() > 1 ) { // CmpNode shared?
2037 _igvn.hash_delete(main_bol);
2038 main_cmp = main_cmp->clone();// Clone a private CmpNode
2039 register_new_node( main_cmp, main_cle->in(0) );
2040 main_bol->set_req(1,main_cmp);
2041 }
2172 if (needs_guard) {
2173 // Peel the loop to ensure there's a zero trip guard
2174 Node_List old_new;
2175 phase->do_peeling(this, old_new);
2176 }
2177
2178 // Replace the phi at loop head with the final value of the last
2179 // iteration. Then the CountedLoopEnd will collapse (backedge never
2180 // taken) and all loop-invariant uses of the exit values will be correct.
2181 Node *phi = cl->phi();
2182 Node *exact_limit = phase->exact_limit(this);
2183 if (exact_limit != cl->limit()) {
2184 // We also need to replace the original limit to collapse loop exit.
2185 Node* cmp = cl->loopexit()->cmp_node();
2186 assert(cl->limit() == cmp->in(2), "sanity");
2187 phase->_igvn._worklist.push(cmp->in(2)); // put limit on worklist
2188 phase->_igvn.replace_input_of(cmp, 2, exact_limit); // put cmp on worklist
2189 }
2190 // Note: the final value after increment should not overflow since
2191 // counted loop has limit check predicate.
2192 Node *final = new SubINode( exact_limit, cl->stride() );
2193 phase->register_new_node(final,cl->in(LoopNode::EntryControl));
2194 phase->_igvn.replace_node(phi,final);
2195 phase->C->set_major_progress();
2196 return true;
2197 }
2198
2199 //------------------------------policy_do_one_iteration_loop-------------------
2200 // Convert one iteration loop into normal code.
2201 bool IdealLoopTree::policy_do_one_iteration_loop( PhaseIdealLoop *phase ) {
2202 if (!_head->as_Loop()->is_valid_counted_loop())
2203 return false; // Only for counted loop
2204
2205 CountedLoopNode *cl = _head->as_CountedLoop();
2206 if (!cl->has_exact_trip_count() || cl->trip_count() != 1) {
2207 return false;
2208 }
2209
2210 #ifndef PRODUCT
2211 if(TraceLoopOpts) {
2212 tty->print("OneIteration ");
2659 Node* shift = NULL;
2660 Node* offset = NULL;
2661 if (!match_fill_loop(lpt, store, store_value, shift, offset)) {
2662 return false;
2663 }
2664
2665 #ifndef PRODUCT
2666 if (TraceLoopOpts) {
2667 tty->print("ArrayFill ");
2668 lpt->dump_head();
2669 }
2670 #endif
2671
2672 // Now replace the whole loop body by a call to a fill routine that
2673 // covers the same region as the loop.
2674 Node* base = store->in(MemNode::Address)->as_AddP()->in(AddPNode::Base);
2675
2676 // Build an expression for the beginning of the copy region
2677 Node* index = head->init_trip();
2678 #ifdef _LP64
2679 index = new ConvI2LNode(index);
2680 _igvn.register_new_node_with_optimizer(index);
2681 #endif
2682 if (shift != NULL) {
2683 // byte arrays don't require a shift but others do.
2684 index = new LShiftXNode(index, shift->in(2));
2685 _igvn.register_new_node_with_optimizer(index);
2686 }
2687 index = new AddPNode(base, base, index);
2688 _igvn.register_new_node_with_optimizer(index);
2689 Node* from = new AddPNode(base, index, offset);
2690 _igvn.register_new_node_with_optimizer(from);
2691 // Compute the number of elements to copy
2692 Node* len = new SubINode(head->limit(), head->init_trip());
2693 _igvn.register_new_node_with_optimizer(len);
2694
2695 BasicType t = store->as_Mem()->memory_type();
2696 bool aligned = false;
2697 if (offset != NULL && head->init_trip()->is_Con()) {
2698 int element_size = type2aelembytes(t);
2699 aligned = (offset->find_intptr_t_type()->get_con() + head->init_trip()->get_int() * element_size) % HeapWordSize == 0;
2700 }
2701
2702 // Build a call to the fill routine
2703 const char* fill_name;
2704 address fill = StubRoutines::select_fill_function(t, aligned, fill_name);
2705 assert(fill != NULL, "what?");
2706
2707 // Convert float/double to int/long for fill routines
2708 if (t == T_FLOAT) {
2709 store_value = new MoveF2INode(store_value);
2710 _igvn.register_new_node_with_optimizer(store_value);
2711 } else if (t == T_DOUBLE) {
2712 store_value = new MoveD2LNode(store_value);
2713 _igvn.register_new_node_with_optimizer(store_value);
2714 }
2715
2716 if (CCallingConventionRequiresIntsAsLongs &&
2717 // See StubRoutines::select_fill_function for types. FLOAT has been converted to INT.
2718 (t == T_FLOAT || t == T_INT || is_subword_type(t))) {
2719 store_value = new ConvI2LNode(store_value);
2720 _igvn.register_new_node_with_optimizer(store_value);
2721 }
2722
2723 Node* mem_phi = store->in(MemNode::Memory);
2724 Node* result_ctrl;
2725 Node* result_mem;
2726 const TypeFunc* call_type = OptoRuntime::array_fill_Type();
2727 CallLeafNode *call = new CallLeafNoFPNode(call_type, fill,
2728 fill_name, TypeAryPtr::get_array_body_type(t));
2729 uint cnt = 0;
2730 call->init_req(TypeFunc::Parms + cnt++, from);
2731 call->init_req(TypeFunc::Parms + cnt++, store_value);
2732 if (CCallingConventionRequiresIntsAsLongs) {
2733 call->init_req(TypeFunc::Parms + cnt++, C->top());
2734 }
2735 #ifdef _LP64
2736 len = new ConvI2LNode(len);
2737 _igvn.register_new_node_with_optimizer(len);
2738 #endif
2739 call->init_req(TypeFunc::Parms + cnt++, len);
2740 #ifdef _LP64
2741 call->init_req(TypeFunc::Parms + cnt++, C->top());
2742 #endif
2743 call->init_req(TypeFunc::Control, head->init_control());
2744 call->init_req(TypeFunc::I_O, C->top()); // Does no I/O.
2745 call->init_req(TypeFunc::Memory, mem_phi->in(LoopNode::EntryControl));
2746 call->init_req(TypeFunc::ReturnAdr, C->start()->proj_out(TypeFunc::ReturnAdr));
2747 call->init_req(TypeFunc::FramePtr, C->start()->proj_out(TypeFunc::FramePtr));
2748 _igvn.register_new_node_with_optimizer(call);
2749 result_ctrl = new ProjNode(call,TypeFunc::Control);
2750 _igvn.register_new_node_with_optimizer(result_ctrl);
2751 result_mem = new ProjNode(call,TypeFunc::Memory);
2752 _igvn.register_new_node_with_optimizer(result_mem);
2753
2754 /* Disable following optimization until proper fix (add missing checks).
2755
2756 // If this fill is tightly coupled to an allocation and overwrites
2757 // the whole body, allow it to take over the zeroing.
2758 AllocateNode* alloc = AllocateNode::Ideal_allocation(base, this);
2759 if (alloc != NULL && alloc->is_AllocateArray()) {
2760 Node* length = alloc->as_AllocateArray()->Ideal_length();
2761 if (head->limit() == length &&
2762 head->init_trip() == _igvn.intcon(0)) {
2763 if (TraceOptimizeFill) {
2764 tty->print_cr("Eliminated zeroing in allocation");
2765 }
2766 alloc->maybe_set_complete(&_igvn);
2767 } else {
2768 #ifdef ASSERT
2769 if (TraceOptimizeFill) {
2770 tty->print_cr("filling array but bounds don't match");
2771 alloc->dump();
|