50 // Test is an IfNode, has 2 projections. If BOTH are in the loop
51 // we need loop unswitching instead of peeling.
52 if( !is_member(phase->get_loop( iff->raw_out(0) )) )
53 return iff->raw_out(0);
54 if( !is_member(phase->get_loop( iff->raw_out(1) )) )
55 return iff->raw_out(1);
56 return NULL;
57 }
58
59
60 //=============================================================================
61
62
63 //------------------------------record_for_igvn----------------------------
64 // Put loop body on igvn work list
65 void IdealLoopTree::record_for_igvn() {
66 for( uint i = 0; i < _body.size(); i++ ) {
67 Node *n = _body.at(i);
68 _phase->_igvn._worklist.push(n);
69 }
70 }
71
72 //------------------------------compute_exact_trip_count-----------------------
73 // Compute loop trip count if possible. Do not recalculate trip count for
74 // split loops (pre-main-post) which have their limits and inits behind Opaque node.
75 void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase) {
76 if (!_head->as_Loop()->is_valid_counted_loop()) {
77 return;
78 }
79 CountedLoopNode* cl = _head->as_CountedLoop();
80 // Trip count may become nonexact for iteration split loops since
81 // RCE modifies limits. Note, _trip_count value is not reset since
82 // it is used to limit unrolling of main loop.
83 cl->set_nonexact_trip_count();
84
85 // Loop's test should be part of loop.
86 if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue))))
87 return; // Infinite loop
88
89 #ifdef ASSERT
477 // v v --+
478 // region
479 // |
480 // v
481 // exit
482 //
483 void PhaseIdealLoop::do_peeling( IdealLoopTree *loop, Node_List &old_new ) {
484
485 C->set_major_progress();
486 // Peeling a 'main' loop in a pre/main/post situation obfuscates the
487 // 'pre' loop from the main and the 'pre' can no longer have its
488 // iterations adjusted. Therefore, we need to declare this loop as
489 // no longer a 'main' loop; it will need new pre and post loops before
490 // we can do further RCE.
491 #ifndef PRODUCT
492 if (TraceLoopOpts) {
493 tty->print("Peel ");
494 loop->dump_head();
495 }
496 #endif
497 Node* head = loop->_head;
498 bool counted_loop = head->is_CountedLoop();
499 if (counted_loop) {
500 CountedLoopNode *cl = head->as_CountedLoop();
501 assert(cl->trip_count() > 0, "peeling a fully unrolled loop");
502 cl->set_trip_count(cl->trip_count() - 1);
503 if (cl->is_main_loop()) {
504 cl->set_normal_loop();
505 #ifndef PRODUCT
506 if (PrintOpto && VerifyLoopOptimizations) {
507 tty->print("Peeling a 'main' loop; resetting to 'normal' ");
508 loop->dump_head();
509 }
510 #endif
511 }
512 }
513 Node* entry = head->in(LoopNode::EntryControl);
514
515 // Step 1: Clone the loop body. The clone becomes the peeled iteration.
516 // The pre-loop illegally has 2 control users (old & new loops).
517 clone_loop( loop, old_new, dom_depth(head) );
518
519 // Step 2: Make the old-loop fall-in edges point to the peeled iteration.
520 // Do this by making the old-loop fall-in edges act as if they came
521 // around the loopback from the prior iteration (follow the old-loop
522 // backedges) and then map to the new peeled iteration. This leaves
523 // the pre-loop with only 1 user (the new peeled iteration), but the
524 // peeled-loop backedge has 2 users.
525 Node* new_entry = old_new[head->in(LoopNode::LoopBackControl)->_idx];
526 _igvn.hash_delete(head);
527 head->set_req(LoopNode::EntryControl, new_entry);
528 for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
529 Node* old = head->fast_out(j);
530 if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) {
531 Node* new_exit_value = old_new[old->in(LoopNode::LoopBackControl)->_idx];
532 if (!new_exit_value ) // Backedge value is ALSO loop invariant?
533 // Then loop body backedge value remains the same.
534 new_exit_value = old->in(LoopNode::LoopBackControl);
535 _igvn.hash_delete(old);
536 old->set_req(LoopNode::EntryControl, new_exit_value);
537 }
538 }
539
540
541 // Step 3: Cut the backedge on the clone (so its not a loop) and remove the
542 // extra backedge user.
543 Node* new_head = old_new[head->_idx];
544 _igvn.hash_delete(new_head);
545 new_head->set_req(LoopNode::LoopBackControl, C->top());
546 for (DUIterator_Fast j2max, j2 = new_head->fast_outs(j2max); j2 < j2max; j2++) {
547 Node* use = new_head->fast_out(j2);
992 // alignment. Useful to unroll loops that do no array accesses.
993 void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ) {
994
995 #ifndef PRODUCT
996 if (TraceLoopOpts) {
997 if (peel_only)
998 tty->print("PeelMainPost ");
999 else
1000 tty->print("PreMainPost ");
1001 loop->dump_head();
1002 }
1003 #endif
1004 C->set_major_progress();
1005
1006 // Find common pieces of the loop being guarded with pre & post loops
1007 CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1008 assert( main_head->is_normal_loop(), "" );
1009 CountedLoopEndNode *main_end = main_head->loopexit();
1010 guarantee(main_end != NULL, "no loop exit node");
1011 assert( main_end->outcnt() == 2, "1 true, 1 false path only" );
1012 uint dd_main_head = dom_depth(main_head);
1013 uint max = main_head->outcnt();
1014
1015 Node *pre_header= main_head->in(LoopNode::EntryControl);
1016 Node *init = main_head->init_trip();
1017 Node *incr = main_end ->incr();
1018 Node *limit = main_end ->limit();
1019 Node *stride = main_end ->stride();
1020 Node *cmp = main_end ->cmp_node();
1021 BoolTest::mask b_test = main_end->test_trip();
1022
1023 // Need only 1 user of 'bol' because I will be hacking the loop bounds.
1024 Node *bol = main_end->in(CountedLoopEndNode::TestValue);
1025 if( bol->outcnt() != 1 ) {
1026 bol = bol->clone();
1027 register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl));
1028 _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, bol);
1029 }
1030 // Need only 1 user of 'cmp' because I will be hacking the loop bounds.
1031 if( cmp->outcnt() != 1 ) {
1032 cmp = cmp->clone();
1033 register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl));
1034 _igvn.replace_input_of(bol, 1, cmp);
1035 }
1036
1037 // Add the post loop
1038 CountedLoopNode *post_head = NULL;
1039 Node *main_exit = insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_head);
1040
1041 //------------------------------
1042 // Step B: Create Pre-Loop.
1043
1044 // Step B1: Clone the loop body. The clone becomes the pre-loop. The main
1045 // loop pre-header illegally has 2 control users (old & new loops).
1046 clone_loop( loop, old_new, dd_main_head );
1047 CountedLoopNode* pre_head = old_new[main_head->_idx]->as_CountedLoop();
1048 CountedLoopEndNode* pre_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
1049 pre_head->set_pre_loop(main_head);
1050 Node *pre_incr = old_new[incr->_idx];
1051
1052 // Reduce the pre-loop trip count.
1053 pre_end->_prob = PROB_FAIR;
1054
1055 // Find the pre-loop normal exit.
1056 Node* pre_exit = pre_end->proj_out(false);
1057 assert( pre_exit->Opcode() == Op_IfFalse, "" );
1058 IfFalseNode *new_pre_exit = new IfFalseNode(pre_end);
1059 _igvn.register_new_node_with_optimizer( new_pre_exit );
1060 set_idom(new_pre_exit, pre_end, dd_main_head);
1061 set_loop(new_pre_exit, loop->_parent);
1062
1063 // Step B2: Build a zero-trip guard for the main-loop. After leaving the
1064 // pre-loop, the main-loop may not execute at all. Later in life this
1065 // zero-trip guard will become the minimum-trip guard when we unroll
1066 // the main-loop.
1067 Node *min_opaq = new Opaque1Node(C, limit);
1068 Node *min_cmp = new CmpINode( pre_incr, min_opaq );
1069 Node *min_bol = new BoolNode( min_cmp, b_test );
1070 register_new_node( min_opaq, new_pre_exit );
1071 register_new_node( min_cmp , new_pre_exit );
1072 register_new_node( min_bol , new_pre_exit );
1073
1074 // Build the IfNode (assume the main-loop is executed always).
1075 IfNode *min_iff = new IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
1076 _igvn.register_new_node_with_optimizer( min_iff );
1077 set_idom(min_iff, new_pre_exit, dd_main_head);
1078 set_loop(min_iff, loop->_parent);
1079
1080 // Plug in the false-path, taken if we need to skip main-loop
1081 _igvn.hash_delete( pre_exit );
1082 pre_exit->set_req(0, min_iff);
1083 set_idom(pre_exit, min_iff, dd_main_head);
1084 set_idom(pre_exit->unique_out(), min_iff, dd_main_head);
1085 // Make the true-path, must enter the main loop
1086 Node *min_taken = new IfTrueNode( min_iff );
1087 _igvn.register_new_node_with_optimizer( min_taken );
1088 set_idom(min_taken, min_iff, dd_main_head);
1089 set_loop(min_taken, loop->_parent);
1090 // Plug in the true path
1091 _igvn.hash_delete( main_head );
1092 main_head->set_req(LoopNode::EntryControl, min_taken);
1093 set_idom(main_head, min_taken, dd_main_head);
1094
1095 Arena *a = Thread::current()->resource_area();
1096 VectorSet visited(a);
1097 Node_Stack clones(a, main_head->back_control()->outcnt());
1098 // Step B3: Make the fall-in values to the main-loop come from the
1099 // fall-out values of the pre-loop.
1100 for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) {
1101 Node* main_phi = main_head->fast_out(i2);
1102 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) {
1103 Node *pre_phi = old_new[main_phi->_idx];
1104 Node *fallpre = clone_up_backedge_goo(pre_head->back_control(),
1105 main_head->init_control(),
1106 pre_phi->in(LoopNode::LoopBackControl),
1107 visited, clones);
1108 _igvn.hash_delete(main_phi);
1109 main_phi->set_req( LoopNode::EntryControl, fallpre );
1110 }
1111 }
1112
1113 // Nodes inside the loop may be control dependent on a predicate
1114 // that was moved before the preloop. If the back branch of the main
1115 // or post loops becomes dead, those nodes won't be dependent on the
1116 // test that guards that loop nest anymore which could lead to an
1117 // incorrect array access because it executes independently of the
1118 // test that was guarding the loop nest. We add a special CastII on
1119 // the if branch that enters the loop, between the input induction
1120 // variable value and the induction variable Phi to preserve correct
1121 // dependencies.
1122
1123 // CastII for the main loop:
1124 bool inserted = cast_incr_before_loop( pre_incr, min_taken, main_head );
1125 assert(inserted, "no castII inserted");
1154
1155 if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) {
1156
1157 BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt;
1158 // Modify pre loop end condition
1159 Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1160 BoolNode* new_bol0 = new BoolNode(pre_bol->in(1), new_test);
1161 register_new_node( new_bol0, pre_head->in(0) );
1162 _igvn.replace_input_of(pre_end, CountedLoopEndNode::TestValue, new_bol0);
1163 // Modify main loop guard condition
1164 assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay");
1165 BoolNode* new_bol1 = new BoolNode(min_bol->in(1), new_test);
1166 register_new_node( new_bol1, new_pre_exit );
1167 _igvn.hash_delete(min_iff);
1168 min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1);
1169 // Modify main loop end condition
1170 BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1171 BoolNode* new_bol2 = new BoolNode(main_bol->in(1), new_test);
1172 register_new_node( new_bol2, main_end->in(CountedLoopEndNode::TestControl) );
1173 _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, new_bol2);
1174 }
1175
1176 // Flag main loop
1177 main_head->set_main_loop();
1178 if( peel_only ) main_head->set_main_no_pre_loop();
1179
1180 // Subtract a trip count for the pre-loop.
1181 main_head->set_trip_count(main_head->trip_count() - 1);
1182
1183 // It's difficult to be precise about the trip-counts
1184 // for the pre/post loops. They are usually very short,
1185 // so guess that 4 trips is a reasonable value.
1186 post_head->set_profile_trip_cnt(4.0);
1187 pre_head->set_profile_trip_cnt(4.0);
1188
1189 // Now force out all loop-invariant dominating tests. The optimizer
1190 // finds some, but we _know_ they are all useless.
1191 peeled_dom_test_elim(loop,old_new);
1192 loop->record_for_igvn();
1193 }
1288 insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_head);
1289
1290 // It's difficult to be precise about the trip-counts
1291 // for post loops. They are usually very short,
1292 // so guess that unit vector trips is a reasonable value.
1293 post_head->set_profile_trip_cnt(4.0);
1294 post_head->set_is_rce_post_loop();
1295
1296 // Now force out all loop-invariant dominating tests. The optimizer
1297 // finds some, but we _know_ they are all useless.
1298 peeled_dom_test_elim(loop, old_new);
1299 loop->record_for_igvn();
1300 }
1301
1302
1303 //------------------------------insert_post_loop-------------------------------
1304 // Insert post loops. Add a post loop to the given loop passed.
1305 Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
1306 CountedLoopNode *main_head, CountedLoopEndNode *main_end,
1307 Node *incr, Node *limit, CountedLoopNode *&post_head) {
1308
1309 //------------------------------
1310 // Step A: Create a new post-Loop.
1311 Node* main_exit = main_end->proj_out(false);
1312 assert(main_exit->Opcode() == Op_IfFalse, "");
1313 int dd_main_exit = dom_depth(main_exit);
1314
1315 // Step A1: Clone the loop body of main. The clone becomes the post-loop.
1316 // The main loop pre-header illegally has 2 control users (old & new loops).
1317 clone_loop(loop, old_new, dd_main_exit);
1318 assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
1319 post_head = old_new[main_head->_idx]->as_CountedLoop();
1320 post_head->set_normal_loop();
1321 post_head->set_post_loop(main_head);
1322
1323 // Reduce the post-loop trip count.
1324 CountedLoopEndNode* post_end = old_new[main_end->_idx]->as_CountedLoopEnd();
1325 post_end->_prob = PROB_FAIR;
1326
1327 // Build the main-loop normal exit.
1328 IfFalseNode *new_main_exit = new IfFalseNode(main_end);
1329 _igvn.register_new_node_with_optimizer(new_main_exit);
1330 set_idom(new_main_exit, main_end, dd_main_exit);
1331 set_loop(new_main_exit, loop->_parent);
1332
1333 // Step A2: Build a zero-trip guard for the post-loop. After leaving the
1334 // main-loop, the post-loop may not execute at all. We 'opaque' the incr
1335 // (the previous loop trip-counter exit value) because we will be changing
1336 // the exit value (via additional unrolling) so we cannot constant-fold away the zero
1337 // trip guard until all unrolling is done.
1338 Node *zer_opaq = new Opaque1Node(C, incr);
1339 Node *zer_cmp = new CmpINode(zer_opaq, limit);
1340 Node *zer_bol = new BoolNode(zer_cmp, main_end->test_trip());
1341 register_new_node(zer_opaq, new_main_exit);
1342 register_new_node(zer_cmp, new_main_exit);
1343 register_new_node(zer_bol, new_main_exit);
1344
1345 // Build the IfNode
1346 IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN);
1347 _igvn.register_new_node_with_optimizer(zer_iff);
1348 set_idom(zer_iff, new_main_exit, dd_main_exit);
1349 set_loop(zer_iff, loop->_parent);
1350
1351 // Plug in the false-path, taken if we need to skip this post-loop
1352 _igvn.replace_input_of(main_exit, 0, zer_iff);
1353 set_idom(main_exit, zer_iff, dd_main_exit);
1354 set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
1355 // Make the true-path, must enter this post loop
1356 Node *zer_taken = new IfTrueNode(zer_iff);
1357 _igvn.register_new_node_with_optimizer(zer_taken);
1358 set_idom(zer_taken, zer_iff, dd_main_exit);
1359 set_loop(zer_taken, loop->_parent);
1360 // Plug in the true path
1361 _igvn.hash_delete(post_head);
1362 post_head->set_req(LoopNode::EntryControl, zer_taken);
1363 set_idom(post_head, zer_taken, dd_main_exit);
1364
1365 Arena *a = Thread::current()->resource_area();
1366 VectorSet visited(a);
1367 Node_Stack clones(a, main_head->back_control()->outcnt());
1368 // Step A3: Make the fall-in values to the post-loop come from the
1369 // fall-out values of the main-loop.
1370 for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
1371 Node* main_phi = main_head->fast_out(i);
1372 if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0) {
1373 Node *cur_phi = old_new[main_phi->_idx];
1374 Node *fallnew = clone_up_backedge_goo(main_head->back_control(),
1375 post_head->init_control(),
1376 main_phi->in(LoopNode::LoopBackControl),
1377 visited, clones);
1378 _igvn.hash_delete(cur_phi);
1379 cur_phi->set_req(LoopNode::EntryControl, fallnew);
1414 tty->print("Unroll %d ", loop_head->unrolled_count()*2);
1415 }
1416 loop->dump_head();
1417 }
1418
1419 if (C->do_vector_loop() && (PrintOpto && (VerifyLoopOptimizations || TraceLoopOpts))) {
1420 Arena* arena = Thread::current()->resource_area();
1421 Node_Stack stack(arena, C->live_nodes() >> 2);
1422 Node_List rpo_list;
1423 VectorSet visited(arena);
1424 visited.set(loop_head->_idx);
1425 rpo( loop_head, stack, visited, rpo_list );
1426 dump(loop, rpo_list.size(), rpo_list );
1427 }
1428 #endif
1429
1430 // Remember loop node count before unrolling to detect
1431 // if rounds of unroll,optimize are making progress
1432 loop_head->set_node_count_before_unroll(loop->_body.size());
1433
1434 Node *ctrl = loop_head->in(LoopNode::EntryControl);
1435 Node *limit = loop_head->limit();
1436 Node *init = loop_head->init_trip();
1437 Node *stride = loop_head->stride();
1438
1439 Node *opaq = NULL;
1440 if (adjust_min_trip) { // If not maximally unrolling, need adjustment
1441 // Search for zero-trip guard.
1442
1443 // Check the shape of the graph at the loop entry. If an inappropriate
1444 // graph shape is encountered, the compiler bails out loop unrolling;
1445 // compilation of the method will still succeed.
1446 if (!is_canonical_loop_entry(loop_head)) {
1447 return;
1448 }
1449 opaq = ctrl->in(0)->in(1)->in(1)->in(2);
1450 // Zero-trip test uses an 'opaque' node which is not shared.
1451 assert(opaq->outcnt() == 1 && opaq->in(1) == limit, "");
1452 }
1453
1454 C->set_major_progress();
1593 // minimum-trip guard.
1594 assert(opaq->outcnt() == 1, "");
1595 _igvn.replace_input_of(opaq, 1, new_limit);
1596 }
1597
1598 // Adjust max trip count. The trip count is intentionally rounded
1599 // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
1600 // the main, unrolled, part of the loop will never execute as it is protected
1601 // by the min-trip test. See bug 4834191 for a case where we over-unrolled
1602 // and later determined that part of the unrolled loop was dead.
1603 loop_head->set_trip_count(old_trip_count / 2);
1604
1605 // Double the count of original iterations in the unrolled loop body.
1606 loop_head->double_unrolled_count();
1607
1608 // ---------
1609 // Step 4: Clone the loop body. Move it inside the loop. This loop body
1610 // represents the odd iterations; since the loop trips an even number of
1611 // times its backedge is never taken. Kill the backedge.
1612 uint dd = dom_depth(loop_head);
1613 clone_loop( loop, old_new, dd );
1614
1615 // Make backedges of the clone equal to backedges of the original.
1616 // Make the fall-in from the original come from the fall-out of the clone.
1617 for (DUIterator_Fast jmax, j = loop_head->fast_outs(jmax); j < jmax; j++) {
1618 Node* phi = loop_head->fast_out(j);
1619 if( phi->is_Phi() && phi->in(0) == loop_head && phi->outcnt() > 0 ) {
1620 Node *newphi = old_new[phi->_idx];
1621 _igvn.hash_delete( phi );
1622 _igvn.hash_delete( newphi );
1623
1624 phi ->set_req(LoopNode:: EntryControl, newphi->in(LoopNode::LoopBackControl));
1625 newphi->set_req(LoopNode::LoopBackControl, phi ->in(LoopNode::LoopBackControl));
1626 phi ->set_req(LoopNode::LoopBackControl, C->top());
1627 }
1628 }
1629 Node *clone_head = old_new[loop_head->_idx];
1630 _igvn.hash_delete( clone_head );
1631 loop_head ->set_req(LoopNode:: EntryControl, clone_head->in(LoopNode::LoopBackControl));
1632 clone_head->set_req(LoopNode::LoopBackControl, loop_head ->in(LoopNode::LoopBackControl));
1633 loop_head ->set_req(LoopNode::LoopBackControl, C->top());
1636 set_idom(loop_head, loop_head ->in(LoopNode::EntryControl), dd);
1637 set_idom(clone_head, clone_head->in(LoopNode::EntryControl), dd);
1638
1639 // Kill the clone's backedge
1640 Node *newcle = old_new[loop_end->_idx];
1641 _igvn.hash_delete( newcle );
1642 Node *one = _igvn.intcon(1);
1643 set_ctrl(one, C->root());
1644 newcle->set_req(1, one);
1645 // Force clone into same loop body
1646 uint max = loop->_body.size();
1647 for( uint k = 0; k < max; k++ ) {
1648 Node *old = loop->_body.at(k);
1649 Node *nnn = old_new[old->_idx];
1650 loop->_body.push(nnn);
1651 if (!has_ctrl(old))
1652 set_loop(nnn, loop);
1653 }
1654
1655 loop->record_for_igvn();
1656
1657 #ifndef PRODUCT
1658 if (C->do_vector_loop() && (PrintOpto && (VerifyLoopOptimizations || TraceLoopOpts))) {
1659 tty->print("\nnew loop after unroll\n"); loop->dump_head();
1660 for (uint i = 0; i < loop->_body.size(); i++) {
1661 loop->_body.at(i)->dump();
1662 }
1663 if(C->clone_map().is_debug()) {
1664 tty->print("\nCloneMap\n");
1665 Dict* dict = C->clone_map().dict();
1666 DictI i(dict);
1667 tty->print_cr("Dict@%p[%d] = ", dict, dict->Size());
1668 for (int ii = 0; i.test(); ++i, ++ii) {
1669 NodeCloneInfo cl((uint64_t)dict->operator[]((void*)i._key));
1670 tty->print("%d->%d:%d,", (int)(intptr_t)i._key, cl.idx(), cl.gen());
1671 if (ii % 10 == 9) {
1672 tty->print_cr(" ");
1673 }
1674 }
1675 tty->print_cr(" ");
2030 int closed_range_checks = 1;
2031
2032 // protect against stride not being a constant
2033 if (!cl->stride_is_con())
2034 return closed_range_checks;
2035
2036 // Find the trip counter; we are iteration splitting based on it
2037 Node *trip_counter = cl->phi();
2038 // Find the main loop limit; we will trim it's iterations
2039 // to not ever trip end tests
2040 Node *main_limit = cl->limit();
2041
2042 // Check graph shape. Cannot optimize a loop if zero-trip
2043 // Opaque1 node is optimized away and then another round
2044 // of loop opts attempted.
2045 if (!is_canonical_loop_entry(cl)) {
2046 return closed_range_checks;
2047 }
2048
2049 // Need to find the main-loop zero-trip guard
2050 Node *ctrl = cl->in(LoopNode::EntryControl);
2051 Node *iffm = ctrl->in(0);
2052 Node *opqzm = iffm->in(1)->in(1)->in(2);
2053 assert(opqzm->in(1) == main_limit, "do not understand situation");
2054
2055 // Find the pre-loop limit; we will expand its iterations to
2056 // not ever trip low tests.
2057 Node *p_f = iffm->in(0);
2058 // pre loop may have been optimized out
2059 if (p_f->Opcode() != Op_IfFalse) {
2060 return closed_range_checks;
2061 }
2062 CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2063 assert(pre_end->loopnode()->is_pre_loop(), "");
2064 Node *pre_opaq1 = pre_end->limit();
2065 // Occasionally it's possible for a pre-loop Opaque1 node to be
2066 // optimized away and then another round of loop opts attempted.
2067 // We can not optimize this particular loop in that case.
2068 if (pre_opaq1->Opcode() != Op_Opaque1)
2069 return closed_range_checks;
2070 Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
2396 last_min = rc_left;
2397 first_time = false;
2398 } else {
2399 Node *cur_min = new MinINode(last_min, rc_left);
2400 last_min = cur_min;
2401 _igvn.register_new_node_with_optimizer(last_min);
2402 }
2403 }
2404 }
2405 }
2406 }
2407
2408 // All we have to do is update the limit of the rce loop
2409 // with the min of our expression and the current limit.
2410 // We will use this expression to replace the current limit.
2411 if (last_min && multi_version_succeeded) {
2412 Node *cur_min = new MinINode(last_min, limit);
2413 _igvn.register_new_node_with_optimizer(cur_min);
2414 Node *cmp_node = rce_loop_end->cmp_node();
2415 _igvn.replace_input_of(cmp_node, 2, cur_min);
2416 set_idom(cmp_node, cur_min, dom_depth(ctrl));
2417 set_ctrl(cur_min, ctrl);
2418 set_loop(cur_min, rce_loop->_parent);
2419
2420 legacy_cl->mark_is_multiversioned();
2421 rce_cl->mark_is_multiversioned();
2422 multi_version_succeeded = true;
2423
2424 C->set_major_progress();
2425 }
2426
2427 return multi_version_succeeded;
2428 }
2429
2430 //-------------------------poison_rce_post_loop--------------------------------
2431 // Causes the rce'd post loop to be optimized away if multiversioning fails
2432 void PhaseIdealLoop::poison_rce_post_loop(IdealLoopTree *rce_loop) {
2433 CountedLoopNode *rce_cl = rce_loop->_head->as_CountedLoop();
2434 Node* ctrl = rce_cl->in(LoopNode::EntryControl);
2435 if (ctrl->is_IfTrue() || ctrl->is_IfFalse()) {
2436 Node* iffm = ctrl->in(0);
2502 float p = iff->_prob;
2503 if( !phase->is_member( this, ex ) && iff->_fcnt == COUNT_UNKNOWN ) {
2504 if( top == Op_IfTrue ) {
2505 if( p < (PROB_FAIR + PROB_UNLIKELY_MAG(3))) {
2506 iff->_prob = PROB_STATIC_FREQUENT;
2507 }
2508 } else {
2509 if( p > (PROB_FAIR - PROB_UNLIKELY_MAG(3))) {
2510 iff->_prob = PROB_STATIC_INFREQUENT;
2511 }
2512 }
2513 }
2514 }
2515 }
2516 test = phase->idom(test);
2517 }
2518 }
2519
2520 #ifdef ASSERT
2521 static CountedLoopNode* locate_pre_from_main(CountedLoopNode *cl) {
2522 Node *ctrl = cl->in(LoopNode::EntryControl);
2523 assert(ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "");
2524 Node *iffm = ctrl->in(0);
2525 assert(iffm->Opcode() == Op_If, "");
2526 Node *p_f = iffm->in(0);
2527 assert(p_f->Opcode() == Op_IfFalse, "");
2528 CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2529 assert(pre_end->loopnode()->is_pre_loop(), "");
2530 return pre_end->loopnode();
2531 }
2532 #endif
2533
2534 // Remove the main and post loops and make the pre loop execute all
2535 // iterations. Useful when the pre loop is found empty.
2536 void IdealLoopTree::remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase) {
2537 CountedLoopEndNode* pre_end = cl->loopexit();
2538 Node* pre_cmp = pre_end->cmp_node();
2539 if (pre_cmp->in(2)->Opcode() != Op_Opaque1) {
2540 // Only safe to remove the main loop if the compiler optimized it
2541 // out based on an unknown number of iterations
2542 return;
2543 }
2544
2545 // Can we find the main loop?
2546 if (_next == NULL) {
2547 return;
2548 }
2549
2550 Node* next_head = _next->_head;
2551 if (!next_head->is_CountedLoop()) {
2552 return;
2553 }
2554
2555 CountedLoopNode* main_head = next_head->as_CountedLoop();
2556 if (!main_head->is_main_loop()) {
2557 return;
2558 }
2559
2560 assert(locate_pre_from_main(main_head) == cl, "bad main loop");
2561 Node* main_iff = main_head->in(LoopNode::EntryControl)->in(0);
2562
2563 // Remove the Opaque1Node of the pre loop and make it execute all iterations
2564 phase->_igvn.replace_input_of(pre_cmp, 2, pre_cmp->in(2)->in(2));
2565 // Remove the Opaque1Node of the main loop so it can be optimized out
2566 Node* main_cmp = main_iff->in(1)->in(1);
2567 assert(main_cmp->in(2)->Opcode() == Op_Opaque1, "main loop has no opaque node?");
2568 phase->_igvn.replace_input_of(main_cmp, 2, main_cmp->in(2)->in(1));
2569 }
2570
2571 //------------------------------policy_do_remove_empty_loop--------------------
2572 // Micro-benchmark spamming. Policy is to always remove empty loops.
2573 // The 'DO' part is to replace the trip counter with the value it will
2574 // have on the last iteration. This will break the loop.
2575 bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) {
2576 // Minimum size must be empty loop
2577 if (_body.size() > EMPTY_LOOP_SIZE)
2578 return false;
2579
2580 if (!_head->is_CountedLoop())
2581 return false; // Dead loop
2602 }
2603 }
2604 assert(iv == cl->phi(), "Wrong phi" );
2605 #endif
2606
2607 // main and post loops have explicitly created zero trip guard
2608 bool needs_guard = !cl->is_main_loop() && !cl->is_post_loop();
2609 if (needs_guard) {
2610 // Skip guard if values not overlap.
2611 const TypeInt* init_t = phase->_igvn.type(cl->init_trip())->is_int();
2612 const TypeInt* limit_t = phase->_igvn.type(cl->limit())->is_int();
2613 int stride_con = cl->stride_con();
2614 if (stride_con > 0) {
2615 needs_guard = (init_t->_hi >= limit_t->_lo);
2616 } else {
2617 needs_guard = (init_t->_lo <= limit_t->_hi);
2618 }
2619 }
2620 if (needs_guard) {
2621 // Check for an obvious zero trip guard.
2622 Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->in(LoopNode::EntryControl));
2623 if (inctrl->Opcode() == Op_IfTrue || inctrl->Opcode() == Op_IfFalse) {
2624 bool maybe_swapped = (inctrl->Opcode() == Op_IfFalse);
2625 // The test should look like just the backedge of a CountedLoop
2626 Node* iff = inctrl->in(0);
2627 if (iff->is_If()) {
2628 Node* bol = iff->in(1);
2629 if (bol->is_Bool()) {
2630 BoolTest test = bol->as_Bool()->_test;
2631 if (maybe_swapped) {
2632 test._test = test.commute();
2633 test._test = test.negate();
2634 }
2635 if (test._test == cl->loopexit()->test_trip()) {
2636 Node* cmp = bol->in(1);
2637 int init_idx = maybe_swapped ? 2 : 1;
2638 int limit_idx = maybe_swapped ? 1 : 2;
2639 if (cmp->is_Cmp() && cmp->in(init_idx) == cl->init_trip() && cmp->in(limit_idx) == cl->limit()) {
2640 needs_guard = false;
2641 }
2642 }
3150 }
3151 #endif
3152
3153 return msg == NULL;
3154 }
3155
3156
3157
3158 bool PhaseIdealLoop::intrinsify_fill(IdealLoopTree* lpt) {
3159 // Only for counted inner loops
3160 if (!lpt->is_counted() || !lpt->is_inner()) {
3161 return false;
3162 }
3163
3164 // Must have constant stride
3165 CountedLoopNode* head = lpt->_head->as_CountedLoop();
3166 if (!head->is_valid_counted_loop() || !head->is_normal_loop()) {
3167 return false;
3168 }
3169
3170 // Check that the body only contains a store of a loop invariant
3171 // value that is indexed by the loop phi.
3172 Node* store = NULL;
3173 Node* store_value = NULL;
3174 Node* shift = NULL;
3175 Node* offset = NULL;
3176 if (!match_fill_loop(lpt, store, store_value, shift, offset)) {
3177 return false;
3178 }
3179
3180 Node* exit = head->loopexit()->proj_out(0);
3181 if (exit == NULL) {
3182 return false;
3183 }
3184
3185 #ifndef PRODUCT
3186 if (TraceLoopOpts) {
3187 tty->print("ArrayFill ");
3188 lpt->dump_head();
3189 }
3270 Node* length = alloc->as_AllocateArray()->Ideal_length();
3271 if (head->limit() == length &&
3272 head->init_trip() == _igvn.intcon(0)) {
3273 if (TraceOptimizeFill) {
3274 tty->print_cr("Eliminated zeroing in allocation");
3275 }
3276 alloc->maybe_set_complete(&_igvn);
3277 } else {
3278 #ifdef ASSERT
3279 if (TraceOptimizeFill) {
3280 tty->print_cr("filling array but bounds don't match");
3281 alloc->dump();
3282 head->init_trip()->dump();
3283 head->limit()->dump();
3284 length->dump();
3285 }
3286 #endif
3287 }
3288 }
3289 */
3290
3291 // Redirect the old control and memory edges that are outside the loop.
3292 // Sometimes the memory phi of the head is used as the outgoing
3293 // state of the loop. It's safe in this case to replace it with the
3294 // result_mem.
3295 _igvn.replace_node(store->in(MemNode::Memory), result_mem);
3296 lazy_replace(exit, result_ctrl);
3297 _igvn.replace_node(store, result_mem);
3298 // Any uses the increment outside of the loop become the loop limit.
3299 _igvn.replace_node(head->incr(), head->limit());
3300
3301 // Disconnect the head from the loop.
3302 for (uint i = 0; i < lpt->_body.size(); i++) {
3303 Node* n = lpt->_body.at(i);
3304 _igvn.replace_node(n, C->top());
3305 }
3306
3307 return true;
3308 }
|
50 // Test is an IfNode, has 2 projections. If BOTH are in the loop
51 // we need loop unswitching instead of peeling.
52 if( !is_member(phase->get_loop( iff->raw_out(0) )) )
53 return iff->raw_out(0);
54 if( !is_member(phase->get_loop( iff->raw_out(1) )) )
55 return iff->raw_out(1);
56 return NULL;
57 }
58
59
60 //=============================================================================
61
62
63 //------------------------------record_for_igvn----------------------------
64 // Put loop body on igvn work list
65 void IdealLoopTree::record_for_igvn() {
66 for( uint i = 0; i < _body.size(); i++ ) {
67 Node *n = _body.at(i);
68 _phase->_igvn._worklist.push(n);
69 }
70 // put body of outer strip mined loop on igvn work list as well
71 if (_head->is_CountedLoop() && _head->as_Loop()->is_strip_mined()) {
72 CountedLoopNode* l = _head->as_CountedLoop();
73 _phase->_igvn._worklist.push(l->outer_loop());
74 _phase->_igvn._worklist.push(l->outer_loop_tail());
75 _phase->_igvn._worklist.push(l->outer_loop_end());
76 _phase->_igvn._worklist.push(l->outer_safepoint());
77 _phase->_igvn._worklist.push(l->outer_bol());
78 _phase->_igvn._worklist.push(l->outer_cmp());
79 _phase->_igvn._worklist.push(l->outer_opaq());
80 Node* cle_out = _head->as_CountedLoop()->loopexit()->proj_out(false);
81 _phase->_igvn._worklist.push(cle_out);
82 }
83 }
84
85 //------------------------------compute_exact_trip_count-----------------------
86 // Compute loop trip count if possible. Do not recalculate trip count for
87 // split loops (pre-main-post) which have their limits and inits behind Opaque node.
88 void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase) {
89 if (!_head->as_Loop()->is_valid_counted_loop()) {
90 return;
91 }
92 CountedLoopNode* cl = _head->as_CountedLoop();
93 // Trip count may become nonexact for iteration split loops since
94 // RCE modifies limits. Note, _trip_count value is not reset since
95 // it is used to limit unrolling of main loop.
96 cl->set_nonexact_trip_count();
97
98 // Loop's test should be part of loop.
99 if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue))))
100 return; // Infinite loop
101
102 #ifdef ASSERT
490 // v v --+
491 // region
492 // |
493 // v
494 // exit
495 //
496 void PhaseIdealLoop::do_peeling( IdealLoopTree *loop, Node_List &old_new ) {
497
498 C->set_major_progress();
499 // Peeling a 'main' loop in a pre/main/post situation obfuscates the
500 // 'pre' loop from the main and the 'pre' can no longer have its
501 // iterations adjusted. Therefore, we need to declare this loop as
502 // no longer a 'main' loop; it will need new pre and post loops before
503 // we can do further RCE.
504 #ifndef PRODUCT
505 if (TraceLoopOpts) {
506 tty->print("Peel ");
507 loop->dump_head();
508 }
509 #endif
510 LoopNode* head = loop->_head->as_Loop();
511 bool counted_loop = head->is_CountedLoop();
512 if (counted_loop) {
513 CountedLoopNode *cl = head->as_CountedLoop();
514 assert(cl->trip_count() > 0, "peeling a fully unrolled loop");
515 cl->set_trip_count(cl->trip_count() - 1);
516 if (cl->is_main_loop()) {
517 cl->set_normal_loop();
518 #ifndef PRODUCT
519 if (PrintOpto && VerifyLoopOptimizations) {
520 tty->print("Peeling a 'main' loop; resetting to 'normal' ");
521 loop->dump_head();
522 }
523 #endif
524 }
525 }
526 Node* entry = head->in(LoopNode::EntryControl);
527
528 // Step 1: Clone the loop body. The clone becomes the peeled iteration.
529 // The pre-loop illegally has 2 control users (old & new loops).
530 clone_loop(loop, old_new, dom_depth(head->skip_strip_mined()), ControlAroundStripMined);
531
532 // Step 2: Make the old-loop fall-in edges point to the peeled iteration.
533 // Do this by making the old-loop fall-in edges act as if they came
534 // around the loopback from the prior iteration (follow the old-loop
535 // backedges) and then map to the new peeled iteration. This leaves
536 // the pre-loop with only 1 user (the new peeled iteration), but the
537 // peeled-loop backedge has 2 users.
538 Node* new_entry = old_new[head->in(LoopNode::LoopBackControl)->_idx];
539 _igvn.hash_delete(head->skip_strip_mined());
540 head->skip_strip_mined()->set_req(LoopNode::EntryControl, new_entry);
541 for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
542 Node* old = head->fast_out(j);
543 if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) {
544 Node* new_exit_value = old_new[old->in(LoopNode::LoopBackControl)->_idx];
545 if (!new_exit_value ) // Backedge value is ALSO loop invariant?
546 // Then loop body backedge value remains the same.
547 new_exit_value = old->in(LoopNode::LoopBackControl);
548 _igvn.hash_delete(old);
549 old->set_req(LoopNode::EntryControl, new_exit_value);
550 }
551 }
552
553
554 // Step 3: Cut the backedge on the clone (so its not a loop) and remove the
555 // extra backedge user.
556 Node* new_head = old_new[head->_idx];
557 _igvn.hash_delete(new_head);
558 new_head->set_req(LoopNode::LoopBackControl, C->top());
559 for (DUIterator_Fast j2max, j2 = new_head->fast_outs(j2max); j2 < j2max; j2++) {
560 Node* use = new_head->fast_out(j2);
1005 // alignment. Useful to unroll loops that do no array accesses.
1006 void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ) {
1007
1008 #ifndef PRODUCT
1009 if (TraceLoopOpts) {
1010 if (peel_only)
1011 tty->print("PeelMainPost ");
1012 else
1013 tty->print("PreMainPost ");
1014 loop->dump_head();
1015 }
1016 #endif
1017 C->set_major_progress();
1018
1019 // Find common pieces of the loop being guarded with pre & post loops
1020 CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1021 assert( main_head->is_normal_loop(), "" );
1022 CountedLoopEndNode *main_end = main_head->loopexit();
1023 guarantee(main_end != NULL, "no loop exit node");
1024 assert( main_end->outcnt() == 2, "1 true, 1 false path only" );
1025
1026 Node *pre_header= main_head->in(LoopNode::EntryControl);
1027 Node *init = main_head->init_trip();
1028 Node *incr = main_end ->incr();
1029 Node *limit = main_end ->limit();
1030 Node *stride = main_end ->stride();
1031 Node *cmp = main_end ->cmp_node();
1032 BoolTest::mask b_test = main_end->test_trip();
1033
1034 // Need only 1 user of 'bol' because I will be hacking the loop bounds.
1035 Node *bol = main_end->in(CountedLoopEndNode::TestValue);
1036 if( bol->outcnt() != 1 ) {
1037 bol = bol->clone();
1038 register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl));
1039 _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, bol);
1040 }
1041 // Need only 1 user of 'cmp' because I will be hacking the loop bounds.
1042 if( cmp->outcnt() != 1 ) {
1043 cmp = cmp->clone();
1044 register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl));
1045 _igvn.replace_input_of(bol, 1, cmp);
1046 }
1047
1048 // Add the post loop
1049 CountedLoopNode *post_head = NULL;
1050 Node *main_exit = insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_head);
1051
1052 //------------------------------
1053 // Step B: Create Pre-Loop.
1054
1055 // Step B1: Clone the loop body. The clone becomes the pre-loop. The main
1056 // loop pre-header illegally has 2 control users (old & new loops).
1057 LoopNode* outer_main_head = main_head;
1058 IdealLoopTree* outer_loop = loop;
1059 if (main_head->is_strip_mined()) {
1060 main_head->verify_strip_mined(1);
1061 outer_main_head = main_head->outer_loop();
1062 outer_loop = loop->_parent;
1063 assert(outer_loop->_head == outer_main_head, "broken loop tree");
1064 }
1065 uint dd_main_head = dom_depth(outer_main_head);
1066 clone_loop(loop, old_new, dd_main_head, ControlAroundStripMined);
1067 CountedLoopNode* pre_head = old_new[main_head->_idx]->as_CountedLoop();
1068 CountedLoopEndNode* pre_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
1069 pre_head->set_pre_loop(main_head);
1070 Node *pre_incr = old_new[incr->_idx];
1071
1072 // Reduce the pre-loop trip count.
1073 pre_end->_prob = PROB_FAIR;
1074
1075 // Find the pre-loop normal exit.
1076 Node* pre_exit = pre_end->proj_out(false);
1077 assert( pre_exit->Opcode() == Op_IfFalse, "" );
1078 IfFalseNode *new_pre_exit = new IfFalseNode(pre_end);
1079 _igvn.register_new_node_with_optimizer( new_pre_exit );
1080 set_idom(new_pre_exit, pre_end, dd_main_head);
1081 set_loop(new_pre_exit, outer_loop->_parent);
1082
1083 // Step B2: Build a zero-trip guard for the main-loop. After leaving the
1084 // pre-loop, the main-loop may not execute at all. Later in life this
1085 // zero-trip guard will become the minimum-trip guard when we unroll
1086 // the main-loop.
1087 Node *min_opaq = new Opaque1Node(C, limit);
1088 Node *min_cmp = new CmpINode( pre_incr, min_opaq );
1089 Node *min_bol = new BoolNode( min_cmp, b_test );
1090 register_new_node( min_opaq, new_pre_exit );
1091 register_new_node( min_cmp , new_pre_exit );
1092 register_new_node( min_bol , new_pre_exit );
1093
1094 // Build the IfNode (assume the main-loop is executed always).
1095 IfNode *min_iff = new IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
1096 _igvn.register_new_node_with_optimizer( min_iff );
1097 set_idom(min_iff, new_pre_exit, dd_main_head);
1098 set_loop(min_iff, outer_loop->_parent);
1099
1100 // Plug in the false-path, taken if we need to skip main-loop
1101 _igvn.hash_delete( pre_exit );
1102 pre_exit->set_req(0, min_iff);
1103 set_idom(pre_exit, min_iff, dd_main_head);
1104 set_idom(pre_exit->unique_ctrl_out(), min_iff, dd_main_head);
1105 // Make the true-path, must enter the main loop
1106 Node *min_taken = new IfTrueNode( min_iff );
1107 _igvn.register_new_node_with_optimizer( min_taken );
1108 set_idom(min_taken, min_iff, dd_main_head);
1109 set_loop(min_taken, outer_loop->_parent);
1110 // Plug in the true path
1111 _igvn.hash_delete(outer_main_head);
1112 outer_main_head->set_req(LoopNode::EntryControl, min_taken);
1113 set_idom(outer_main_head, min_taken, dd_main_head);
1114
1115 Arena *a = Thread::current()->resource_area();
1116 VectorSet visited(a);
1117 Node_Stack clones(a, main_head->back_control()->outcnt());
1118 // Step B3: Make the fall-in values to the main-loop come from the
1119 // fall-out values of the pre-loop.
1120 for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) {
1121 Node* main_phi = main_head->fast_out(i2);
1122 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) {
1123 Node *pre_phi = old_new[main_phi->_idx];
1124 Node *fallpre = clone_up_backedge_goo(pre_head->back_control(),
1125 main_head->skip_strip_mined()->in(LoopNode::EntryControl),
1126 pre_phi->in(LoopNode::LoopBackControl),
1127 visited, clones);
1128 _igvn.hash_delete(main_phi);
1129 main_phi->set_req( LoopNode::EntryControl, fallpre );
1130 }
1131 }
1132
1133 // Nodes inside the loop may be control dependent on a predicate
1134 // that was moved before the preloop. If the back branch of the main
1135 // or post loops becomes dead, those nodes won't be dependent on the
1136 // test that guards that loop nest anymore which could lead to an
1137 // incorrect array access because it executes independently of the
1138 // test that was guarding the loop nest. We add a special CastII on
1139 // the if branch that enters the loop, between the input induction
1140 // variable value and the induction variable Phi to preserve correct
1141 // dependencies.
1142
1143 // CastII for the main loop:
1144 bool inserted = cast_incr_before_loop( pre_incr, min_taken, main_head );
1145 assert(inserted, "no castII inserted");
1174
1175 if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) {
1176
1177 BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt;
1178 // Modify pre loop end condition
1179 Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1180 BoolNode* new_bol0 = new BoolNode(pre_bol->in(1), new_test);
1181 register_new_node( new_bol0, pre_head->in(0) );
1182 _igvn.replace_input_of(pre_end, CountedLoopEndNode::TestValue, new_bol0);
1183 // Modify main loop guard condition
1184 assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay");
1185 BoolNode* new_bol1 = new BoolNode(min_bol->in(1), new_test);
1186 register_new_node( new_bol1, new_pre_exit );
1187 _igvn.hash_delete(min_iff);
1188 min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1);
1189 // Modify main loop end condition
1190 BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1191 BoolNode* new_bol2 = new BoolNode(main_bol->in(1), new_test);
1192 register_new_node( new_bol2, main_end->in(CountedLoopEndNode::TestControl) );
1193 _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, new_bol2);
1194 if (main_head->is_strip_mined()) {
1195 Node* le = outer_main_head->outer_loop_end();
1196 Node* bol = outer_main_head->outer_bol();
1197 Node* new_bol3 = new_bol2->clone();
1198 new_bol3->set_req(1, bol->in(1));
1199 register_new_node(new_bol3, le->in(0));
1200 _igvn.replace_input_of(le, 1, new_bol3);
1201 }
1202 }
1203
1204 // Flag main loop
1205 main_head->set_main_loop();
1206 if( peel_only ) main_head->set_main_no_pre_loop();
1207
1208 // Subtract a trip count for the pre-loop.
1209 main_head->set_trip_count(main_head->trip_count() - 1);
1210
1211 // It's difficult to be precise about the trip-counts
1212 // for the pre/post loops. They are usually very short,
1213 // so guess that 4 trips is a reasonable value.
1214 post_head->set_profile_trip_cnt(4.0);
1215 pre_head->set_profile_trip_cnt(4.0);
1216
1217 // Now force out all loop-invariant dominating tests. The optimizer
1218 // finds some, but we _know_ they are all useless.
1219 peeled_dom_test_elim(loop,old_new);
1220 loop->record_for_igvn();
1221 }
1316 insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_head);
1317
1318 // It's difficult to be precise about the trip-counts
1319 // for post loops. They are usually very short,
1320 // so guess that unit vector trips is a reasonable value.
1321 post_head->set_profile_trip_cnt(4.0);
1322 post_head->set_is_rce_post_loop();
1323
1324 // Now force out all loop-invariant dominating tests. The optimizer
1325 // finds some, but we _know_ they are all useless.
1326 peeled_dom_test_elim(loop, old_new);
1327 loop->record_for_igvn();
1328 }
1329
1330
1331 //------------------------------insert_post_loop-------------------------------
1332 // Insert post loops. Add a post loop to the given loop passed.
1333 Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
1334 CountedLoopNode *main_head, CountedLoopEndNode *main_end,
1335 Node *incr, Node *limit, CountedLoopNode *&post_head) {
1336 IfNode* outer_main_end = main_end;
1337 IdealLoopTree* outer_loop = loop;
1338 if (main_head->is_strip_mined()) {
1339 main_head->verify_strip_mined(1);
1340 outer_main_end = main_head->outer_loop_end();
1341 outer_loop = loop->_parent;
1342 assert(outer_loop->_head == main_head->in(LoopNode::EntryControl), "broken loop tree");
1343 }
1344
1345 //------------------------------
1346 // Step A: Create a new post-Loop.
1347 Node* main_exit = outer_main_end->proj_out(false);
1348 assert(main_exit->Opcode() == Op_IfFalse, "");
1349 int dd_main_exit = dom_depth(main_exit);
1350
1351 // Step A1: Clone the loop body of main. The clone becomes the post-loop.
1352 // The main loop pre-header illegally has 2 control users (old & new loops).
1353 clone_loop(loop, old_new, dd_main_exit, ControlAroundStripMined);
1354 assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
1355 post_head = old_new[main_head->_idx]->as_CountedLoop();
1356 post_head->set_normal_loop();
1357 post_head->set_post_loop(main_head);
1358
1359 // Reduce the post-loop trip count.
1360 CountedLoopEndNode* post_end = old_new[main_end->_idx]->as_CountedLoopEnd();
1361 post_end->_prob = PROB_FAIR;
1362
1363 // Build the main-loop normal exit.
1364 IfFalseNode *new_main_exit = new IfFalseNode(outer_main_end);
1365 _igvn.register_new_node_with_optimizer(new_main_exit);
1366 set_idom(new_main_exit, outer_main_end, dd_main_exit);
1367 set_loop(new_main_exit, outer_loop->_parent);
1368
1369 // Step A2: Build a zero-trip guard for the post-loop. After leaving the
1370 // main-loop, the post-loop may not execute at all. We 'opaque' the incr
1371 // (the previous loop trip-counter exit value) because we will be changing
1372 // the exit value (via additional unrolling) so we cannot constant-fold away the zero
1373 // trip guard until all unrolling is done.
1374 Node *zer_opaq = new Opaque1Node(C, incr);
1375 Node *zer_cmp = new CmpINode(zer_opaq, limit);
1376 Node *zer_bol = new BoolNode(zer_cmp, main_end->test_trip());
1377 register_new_node(zer_opaq, new_main_exit);
1378 register_new_node(zer_cmp, new_main_exit);
1379 register_new_node(zer_bol, new_main_exit);
1380
1381 // Build the IfNode
1382 IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN);
1383 _igvn.register_new_node_with_optimizer(zer_iff);
1384 set_idom(zer_iff, new_main_exit, dd_main_exit);
1385 set_loop(zer_iff, outer_loop->_parent);
1386
1387 // Plug in the false-path, taken if we need to skip this post-loop
1388 _igvn.replace_input_of(main_exit, 0, zer_iff);
1389 set_idom(main_exit, zer_iff, dd_main_exit);
1390 set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
1391 // Make the true-path, must enter this post loop
1392 Node *zer_taken = new IfTrueNode(zer_iff);
1393 _igvn.register_new_node_with_optimizer(zer_taken);
1394 set_idom(zer_taken, zer_iff, dd_main_exit);
1395 set_loop(zer_taken, outer_loop->_parent);
1396 // Plug in the true path
1397 _igvn.hash_delete(post_head);
1398 post_head->set_req(LoopNode::EntryControl, zer_taken);
1399 set_idom(post_head, zer_taken, dd_main_exit);
1400
1401 Arena *a = Thread::current()->resource_area();
1402 VectorSet visited(a);
1403 Node_Stack clones(a, main_head->back_control()->outcnt());
1404 // Step A3: Make the fall-in values to the post-loop come from the
1405 // fall-out values of the main-loop.
1406 for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
1407 Node* main_phi = main_head->fast_out(i);
1408 if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0) {
1409 Node *cur_phi = old_new[main_phi->_idx];
1410 Node *fallnew = clone_up_backedge_goo(main_head->back_control(),
1411 post_head->init_control(),
1412 main_phi->in(LoopNode::LoopBackControl),
1413 visited, clones);
1414 _igvn.hash_delete(cur_phi);
1415 cur_phi->set_req(LoopNode::EntryControl, fallnew);
1450 tty->print("Unroll %d ", loop_head->unrolled_count()*2);
1451 }
1452 loop->dump_head();
1453 }
1454
1455 if (C->do_vector_loop() && (PrintOpto && (VerifyLoopOptimizations || TraceLoopOpts))) {
1456 Arena* arena = Thread::current()->resource_area();
1457 Node_Stack stack(arena, C->live_nodes() >> 2);
1458 Node_List rpo_list;
1459 VectorSet visited(arena);
1460 visited.set(loop_head->_idx);
1461 rpo( loop_head, stack, visited, rpo_list );
1462 dump(loop, rpo_list.size(), rpo_list );
1463 }
1464 #endif
1465
1466 // Remember loop node count before unrolling to detect
1467 // if rounds of unroll,optimize are making progress
1468 loop_head->set_node_count_before_unroll(loop->_body.size());
1469
1470 Node *ctrl = loop_head->skip_strip_mined()->in(LoopNode::EntryControl);
1471 Node *limit = loop_head->limit();
1472 Node *init = loop_head->init_trip();
1473 Node *stride = loop_head->stride();
1474
1475 Node *opaq = NULL;
1476 if (adjust_min_trip) { // If not maximally unrolling, need adjustment
1477 // Search for zero-trip guard.
1478
1479 // Check the shape of the graph at the loop entry. If an inappropriate
1480 // graph shape is encountered, the compiler bails out loop unrolling;
1481 // compilation of the method will still succeed.
1482 if (!is_canonical_loop_entry(loop_head)) {
1483 return;
1484 }
1485 opaq = ctrl->in(0)->in(1)->in(1)->in(2);
1486 // Zero-trip test uses an 'opaque' node which is not shared.
1487 assert(opaq->outcnt() == 1 && opaq->in(1) == limit, "");
1488 }
1489
1490 C->set_major_progress();
1629 // minimum-trip guard.
1630 assert(opaq->outcnt() == 1, "");
1631 _igvn.replace_input_of(opaq, 1, new_limit);
1632 }
1633
1634 // Adjust max trip count. The trip count is intentionally rounded
1635 // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
1636 // the main, unrolled, part of the loop will never execute as it is protected
1637 // by the min-trip test. See bug 4834191 for a case where we over-unrolled
1638 // and later determined that part of the unrolled loop was dead.
1639 loop_head->set_trip_count(old_trip_count / 2);
1640
1641 // Double the count of original iterations in the unrolled loop body.
1642 loop_head->double_unrolled_count();
1643
1644 // ---------
1645 // Step 4: Clone the loop body. Move it inside the loop. This loop body
1646 // represents the odd iterations; since the loop trips an even number of
1647 // times its backedge is never taken. Kill the backedge.
1648 uint dd = dom_depth(loop_head);
1649 clone_loop(loop, old_new, dd, IgnoreStripMined);
1650
1651 // Make backedges of the clone equal to backedges of the original.
1652 // Make the fall-in from the original come from the fall-out of the clone.
1653 for (DUIterator_Fast jmax, j = loop_head->fast_outs(jmax); j < jmax; j++) {
1654 Node* phi = loop_head->fast_out(j);
1655 if( phi->is_Phi() && phi->in(0) == loop_head && phi->outcnt() > 0 ) {
1656 Node *newphi = old_new[phi->_idx];
1657 _igvn.hash_delete( phi );
1658 _igvn.hash_delete( newphi );
1659
1660 phi ->set_req(LoopNode:: EntryControl, newphi->in(LoopNode::LoopBackControl));
1661 newphi->set_req(LoopNode::LoopBackControl, phi ->in(LoopNode::LoopBackControl));
1662 phi ->set_req(LoopNode::LoopBackControl, C->top());
1663 }
1664 }
1665 Node *clone_head = old_new[loop_head->_idx];
1666 _igvn.hash_delete( clone_head );
1667 loop_head ->set_req(LoopNode:: EntryControl, clone_head->in(LoopNode::LoopBackControl));
1668 clone_head->set_req(LoopNode::LoopBackControl, loop_head ->in(LoopNode::LoopBackControl));
1669 loop_head ->set_req(LoopNode::LoopBackControl, C->top());
1672 set_idom(loop_head, loop_head ->in(LoopNode::EntryControl), dd);
1673 set_idom(clone_head, clone_head->in(LoopNode::EntryControl), dd);
1674
1675 // Kill the clone's backedge
1676 Node *newcle = old_new[loop_end->_idx];
1677 _igvn.hash_delete( newcle );
1678 Node *one = _igvn.intcon(1);
1679 set_ctrl(one, C->root());
1680 newcle->set_req(1, one);
1681 // Force clone into same loop body
1682 uint max = loop->_body.size();
1683 for( uint k = 0; k < max; k++ ) {
1684 Node *old = loop->_body.at(k);
1685 Node *nnn = old_new[old->_idx];
1686 loop->_body.push(nnn);
1687 if (!has_ctrl(old))
1688 set_loop(nnn, loop);
1689 }
1690
1691 loop->record_for_igvn();
1692 loop_head->clear_strip_mined();
1693
1694 #ifndef PRODUCT
1695 if (C->do_vector_loop() && (PrintOpto && (VerifyLoopOptimizations || TraceLoopOpts))) {
1696 tty->print("\nnew loop after unroll\n"); loop->dump_head();
1697 for (uint i = 0; i < loop->_body.size(); i++) {
1698 loop->_body.at(i)->dump();
1699 }
1700 if(C->clone_map().is_debug()) {
1701 tty->print("\nCloneMap\n");
1702 Dict* dict = C->clone_map().dict();
1703 DictI i(dict);
1704 tty->print_cr("Dict@%p[%d] = ", dict, dict->Size());
1705 for (int ii = 0; i.test(); ++i, ++ii) {
1706 NodeCloneInfo cl((uint64_t)dict->operator[]((void*)i._key));
1707 tty->print("%d->%d:%d,", (int)(intptr_t)i._key, cl.idx(), cl.gen());
1708 if (ii % 10 == 9) {
1709 tty->print_cr(" ");
1710 }
1711 }
1712 tty->print_cr(" ");
2067 int closed_range_checks = 1;
2068
2069 // protect against stride not being a constant
2070 if (!cl->stride_is_con())
2071 return closed_range_checks;
2072
2073 // Find the trip counter; we are iteration splitting based on it
2074 Node *trip_counter = cl->phi();
2075 // Find the main loop limit; we will trim it's iterations
2076 // to not ever trip end tests
2077 Node *main_limit = cl->limit();
2078
2079 // Check graph shape. Cannot optimize a loop if zero-trip
2080 // Opaque1 node is optimized away and then another round
2081 // of loop opts attempted.
2082 if (!is_canonical_loop_entry(cl)) {
2083 return closed_range_checks;
2084 }
2085
2086 // Need to find the main-loop zero-trip guard
2087 Node *ctrl = cl->skip_strip_mined()->in(LoopNode::EntryControl);
2088 Node *iffm = ctrl->in(0);
2089 Node *opqzm = iffm->in(1)->in(1)->in(2);
2090 assert(opqzm->in(1) == main_limit, "do not understand situation");
2091
2092 // Find the pre-loop limit; we will expand its iterations to
2093 // not ever trip low tests.
2094 Node *p_f = iffm->in(0);
2095 // pre loop may have been optimized out
2096 if (p_f->Opcode() != Op_IfFalse) {
2097 return closed_range_checks;
2098 }
2099 CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2100 assert(pre_end->loopnode()->is_pre_loop(), "");
2101 Node *pre_opaq1 = pre_end->limit();
2102 // Occasionally it's possible for a pre-loop Opaque1 node to be
2103 // optimized away and then another round of loop opts attempted.
2104 // We can not optimize this particular loop in that case.
2105 if (pre_opaq1->Opcode() != Op_Opaque1)
2106 return closed_range_checks;
2107 Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
2433 last_min = rc_left;
2434 first_time = false;
2435 } else {
2436 Node *cur_min = new MinINode(last_min, rc_left);
2437 last_min = cur_min;
2438 _igvn.register_new_node_with_optimizer(last_min);
2439 }
2440 }
2441 }
2442 }
2443 }
2444
2445 // All we have to do is update the limit of the rce loop
2446 // with the min of our expression and the current limit.
2447 // We will use this expression to replace the current limit.
2448 if (last_min && multi_version_succeeded) {
2449 Node *cur_min = new MinINode(last_min, limit);
2450 _igvn.register_new_node_with_optimizer(cur_min);
2451 Node *cmp_node = rce_loop_end->cmp_node();
2452 _igvn.replace_input_of(cmp_node, 2, cur_min);
2453 set_ctrl(cur_min, ctrl);
2454 set_loop(cur_min, rce_loop->_parent);
2455
2456 legacy_cl->mark_is_multiversioned();
2457 rce_cl->mark_is_multiversioned();
2458 multi_version_succeeded = true;
2459
2460 C->set_major_progress();
2461 }
2462
2463 return multi_version_succeeded;
2464 }
2465
2466 //-------------------------poison_rce_post_loop--------------------------------
2467 // Causes the rce'd post loop to be optimized away if multiversioning fails
2468 void PhaseIdealLoop::poison_rce_post_loop(IdealLoopTree *rce_loop) {
2469 CountedLoopNode *rce_cl = rce_loop->_head->as_CountedLoop();
2470 Node* ctrl = rce_cl->in(LoopNode::EntryControl);
2471 if (ctrl->is_IfTrue() || ctrl->is_IfFalse()) {
2472 Node* iffm = ctrl->in(0);
2538 float p = iff->_prob;
2539 if( !phase->is_member( this, ex ) && iff->_fcnt == COUNT_UNKNOWN ) {
2540 if( top == Op_IfTrue ) {
2541 if( p < (PROB_FAIR + PROB_UNLIKELY_MAG(3))) {
2542 iff->_prob = PROB_STATIC_FREQUENT;
2543 }
2544 } else {
2545 if( p > (PROB_FAIR - PROB_UNLIKELY_MAG(3))) {
2546 iff->_prob = PROB_STATIC_INFREQUENT;
2547 }
2548 }
2549 }
2550 }
2551 }
2552 test = phase->idom(test);
2553 }
2554 }
2555
2556 #ifdef ASSERT
2557 static CountedLoopNode* locate_pre_from_main(CountedLoopNode *cl) {
2558 Node *ctrl = cl->skip_strip_mined()->in(LoopNode::EntryControl);
2559 assert(ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "");
2560 Node *iffm = ctrl->in(0);
2561 assert(iffm->Opcode() == Op_If, "");
2562 Node *p_f = iffm->in(0);
2563 assert(p_f->Opcode() == Op_IfFalse, "");
2564 CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2565 assert(pre_end->loopnode()->is_pre_loop(), "");
2566 return pre_end->loopnode();
2567 }
2568 #endif
2569
2570 // Remove the main and post loops and make the pre loop execute all
2571 // iterations. Useful when the pre loop is found empty.
2572 void IdealLoopTree::remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase) {
2573 CountedLoopEndNode* pre_end = cl->loopexit();
2574 Node* pre_cmp = pre_end->cmp_node();
2575 if (pre_cmp->in(2)->Opcode() != Op_Opaque1) {
2576 // Only safe to remove the main loop if the compiler optimized it
2577 // out based on an unknown number of iterations
2578 return;
2579 }
2580
2581 // Can we find the main loop?
2582 if (_next == NULL) {
2583 return;
2584 }
2585
2586 Node* next_head = _next->_head;
2587 if (!next_head->is_CountedLoop()) {
2588 return;
2589 }
2590
2591 CountedLoopNode* main_head = next_head->as_CountedLoop();
2592 if (!main_head->is_main_loop()) {
2593 return;
2594 }
2595
2596 assert(locate_pre_from_main(main_head) == cl, "bad main loop");
2597 Node* main_iff = main_head->skip_strip_mined()->in(LoopNode::EntryControl)->in(0);
2598
2599 // Remove the Opaque1Node of the pre loop and make it execute all iterations
2600 phase->_igvn.replace_input_of(pre_cmp, 2, pre_cmp->in(2)->in(2));
2601 // Remove the Opaque1Node of the main loop so it can be optimized out
2602 Node* main_cmp = main_iff->in(1)->in(1);
2603 assert(main_cmp->in(2)->Opcode() == Op_Opaque1, "main loop has no opaque node?");
2604 phase->_igvn.replace_input_of(main_cmp, 2, main_cmp->in(2)->in(1));
2605 }
2606
2607 //------------------------------policy_do_remove_empty_loop--------------------
2608 // Micro-benchmark spamming. Policy is to always remove empty loops.
2609 // The 'DO' part is to replace the trip counter with the value it will
2610 // have on the last iteration. This will break the loop.
2611 bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) {
2612 // Minimum size must be empty loop
2613 if (_body.size() > EMPTY_LOOP_SIZE)
2614 return false;
2615
2616 if (!_head->is_CountedLoop())
2617 return false; // Dead loop
2638 }
2639 }
2640 assert(iv == cl->phi(), "Wrong phi" );
2641 #endif
2642
2643 // main and post loops have explicitly created zero trip guard
2644 bool needs_guard = !cl->is_main_loop() && !cl->is_post_loop();
2645 if (needs_guard) {
2646 // Skip guard if values not overlap.
2647 const TypeInt* init_t = phase->_igvn.type(cl->init_trip())->is_int();
2648 const TypeInt* limit_t = phase->_igvn.type(cl->limit())->is_int();
2649 int stride_con = cl->stride_con();
2650 if (stride_con > 0) {
2651 needs_guard = (init_t->_hi >= limit_t->_lo);
2652 } else {
2653 needs_guard = (init_t->_lo <= limit_t->_hi);
2654 }
2655 }
2656 if (needs_guard) {
2657 // Check for an obvious zero trip guard.
2658 Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->skip_strip_mined()->in(LoopNode::EntryControl));
2659 if (inctrl->Opcode() == Op_IfTrue || inctrl->Opcode() == Op_IfFalse) {
2660 bool maybe_swapped = (inctrl->Opcode() == Op_IfFalse);
2661 // The test should look like just the backedge of a CountedLoop
2662 Node* iff = inctrl->in(0);
2663 if (iff->is_If()) {
2664 Node* bol = iff->in(1);
2665 if (bol->is_Bool()) {
2666 BoolTest test = bol->as_Bool()->_test;
2667 if (maybe_swapped) {
2668 test._test = test.commute();
2669 test._test = test.negate();
2670 }
2671 if (test._test == cl->loopexit()->test_trip()) {
2672 Node* cmp = bol->in(1);
2673 int init_idx = maybe_swapped ? 2 : 1;
2674 int limit_idx = maybe_swapped ? 1 : 2;
2675 if (cmp->is_Cmp() && cmp->in(init_idx) == cl->init_trip() && cmp->in(limit_idx) == cl->limit()) {
2676 needs_guard = false;
2677 }
2678 }
3186 }
3187 #endif
3188
3189 return msg == NULL;
3190 }
3191
3192
3193
3194 bool PhaseIdealLoop::intrinsify_fill(IdealLoopTree* lpt) {
3195 // Only for counted inner loops
3196 if (!lpt->is_counted() || !lpt->is_inner()) {
3197 return false;
3198 }
3199
3200 // Must have constant stride
3201 CountedLoopNode* head = lpt->_head->as_CountedLoop();
3202 if (!head->is_valid_counted_loop() || !head->is_normal_loop()) {
3203 return false;
3204 }
3205
3206 head->verify_strip_mined(1);
3207
3208 // Check that the body only contains a store of a loop invariant
3209 // value that is indexed by the loop phi.
3210 Node* store = NULL;
3211 Node* store_value = NULL;
3212 Node* shift = NULL;
3213 Node* offset = NULL;
3214 if (!match_fill_loop(lpt, store, store_value, shift, offset)) {
3215 return false;
3216 }
3217
3218 Node* exit = head->loopexit()->proj_out(0);
3219 if (exit == NULL) {
3220 return false;
3221 }
3222
3223 #ifndef PRODUCT
3224 if (TraceLoopOpts) {
3225 tty->print("ArrayFill ");
3226 lpt->dump_head();
3227 }
3308 Node* length = alloc->as_AllocateArray()->Ideal_length();
3309 if (head->limit() == length &&
3310 head->init_trip() == _igvn.intcon(0)) {
3311 if (TraceOptimizeFill) {
3312 tty->print_cr("Eliminated zeroing in allocation");
3313 }
3314 alloc->maybe_set_complete(&_igvn);
3315 } else {
3316 #ifdef ASSERT
3317 if (TraceOptimizeFill) {
3318 tty->print_cr("filling array but bounds don't match");
3319 alloc->dump();
3320 head->init_trip()->dump();
3321 head->limit()->dump();
3322 length->dump();
3323 }
3324 #endif
3325 }
3326 }
3327 */
3328
3329 if (head->is_strip_mined()) {
3330 // Inner strip mined loop goes away so get rid of outer strip
3331 // mined loop
3332 Node* outer_sfpt = head->outer_safepoint();
3333 Node* in = outer_sfpt->in(0);
3334 Node* outer_out = head->outer_loop_exit();
3335 lazy_replace(outer_out, in);
3336 _igvn.replace_input_of(outer_sfpt, 0, C->top());
3337 }
3338
3339 // Redirect the old control and memory edges that are outside the loop.
3340 // Sometimes the memory phi of the head is used as the outgoing
3341 // state of the loop. It's safe in this case to replace it with the
3342 // result_mem.
3343 _igvn.replace_node(store->in(MemNode::Memory), result_mem);
3344 lazy_replace(exit, result_ctrl);
3345 _igvn.replace_node(store, result_mem);
3346 // Any uses the increment outside of the loop become the loop limit.
3347 _igvn.replace_node(head->incr(), head->limit());
3348
3349 // Disconnect the head from the loop.
3350 for (uint i = 0; i < lpt->_body.size(); i++) {
3351 Node* n = lpt->_body.at(i);
3352 _igvn.replace_node(n, C->top());
3353 }
3354
3355 return true;
3356 }
|