1050 castii->set_req(0, ctrl);
1051 register_new_node(castii, ctrl);
1052 for (DUIterator_Fast imax, i = incr->fast_outs(imax); i < imax; i++) {
1053 Node* n = incr->fast_out(i);
1054 if (n->is_Phi() && n->in(0) == loop) {
1055 int nrep = n->replace_edge(incr, castii);
1056 return castii;
1057 }
1058 }
1059 return NULL;
1060 }
1061
1062 // Make a copy of the skeleton range check predicates before the main
1063 // loop and set the initial value of loop as input. After unrolling,
1064 // the range of values for the induction variable in the main loop can
1065 // fall outside the allowed range of values by the array access (main
1066 // loop is never executed). When that happens, range check
1067 // CastII/ConvI2L nodes cause some data paths to die. For consistency,
1068 // the control paths must die too but the range checks were removed by
1069 // predication. The range checks that we add here guarantee that they do.
1070 void PhaseIdealLoop::duplicate_predicates_helper(Node* predicate, Node* castii, IdealLoopTree* outer_loop,
1071 LoopNode* outer_main_head, uint dd_main_head) {
1072 if (predicate != NULL) {
1073 IfNode* iff = predicate->in(0)->as_If();
1074 ProjNode* uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
1075 Node* rgn = uncommon_proj->unique_ctrl_out();
1076 assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
1077 assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape");
1078 predicate = iff->in(0);
1079 Node* current_proj = outer_main_head->in(LoopNode::EntryControl);
1080 Node* prev_proj = current_proj;
1081 while (predicate != NULL && predicate->is_Proj() && predicate->in(0)->is_If()) {
1082 iff = predicate->in(0)->as_If();
1083 uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
1084 if (uncommon_proj->unique_ctrl_out() != rgn)
1085 break;
1086 if (iff->in(1)->Opcode() == Op_Opaque4) {
1087 // Clone the predicate twice and initialize one with the initial
1088 // value of the loop induction variable. Leave the other predicate
1089 // to be initialized when increasing the stride during loop unrolling.
1090 prev_proj = update_skeleton_predicate(iff, castii, predicate, uncommon_proj, current_proj, outer_loop, prev_proj);
1091 Node* value = new Opaque1Node(C, castii);
1092 register_new_node(value, current_proj);
1093 prev_proj = update_skeleton_predicate(iff, value, predicate, uncommon_proj, current_proj, outer_loop, prev_proj);
1094 // Remove the skeleton predicate from the pre-loop
1095 _igvn.replace_input_of(iff, 1, _igvn.intcon(1));
1096 }
1097 predicate = predicate->in(0)->in(0);
1098 }
1099 _igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
1100 set_idom(outer_main_head, prev_proj, dd_main_head);
1101 }
1102 }
1103
1104 Node* PhaseIdealLoop::update_skeleton_predicate(Node* iff, Node* value, Node* predicate, Node* uncommon_proj,
1105 Node* current_proj, IdealLoopTree* outer_loop, Node* prev_proj) {
1106 bool clone = (outer_loop != NULL); // Clone the predicate?
1107 Node_Stack to_clone(2);
1108 to_clone.push(iff->in(1), 1);
1109 uint current = C->unique();
1110 Node* result = NULL;
1111 // Look for the opaque node to replace with the new value
1112 // and clone everything in between. We keep the Opaque4 node
1113 // so the duplicated predicates are eliminated once loop
1114 // opts are over: they are here only to keep the IR graph
1115 // consistent.
1116 do {
1117 Node* n = to_clone.node();
1118 uint i = to_clone.index();
1119 Node* m = n->in(i);
1120 int op = m->Opcode();
1121 if (m->is_Bool() ||
1122 m->is_Cmp() ||
1123 op == Op_AndL ||
1124 op == Op_OrL ||
1125 op == Op_RShiftL ||
1126 op == Op_LShiftL ||
1127 op == Op_AddL ||
1128 op == Op_AddI ||
1129 op == Op_MulL ||
1130 op == Op_MulI ||
1131 op == Op_SubL ||
1132 op == Op_SubI ||
1133 op == Op_ConvI2L) {
1134 to_clone.push(m, 1);
1135 continue;
1136 }
1137 if (op == Op_Opaque1) {
1138 if (!clone) {
1139 // Update the input of the Opaque1Node and exit
1140 _igvn.replace_input_of(m, 1, value);
1141 return prev_proj;
1142 }
1143 if (n->_idx < current) {
1144 n = n->clone();
1145 }
1146 n->set_req(i, value);
1147 register_new_node(n, current_proj);
1148 to_clone.set_node(n);
1149 }
1150 for (;;) {
1151 Node* cur = to_clone.node();
1152 uint j = to_clone.index();
1153 if (j+1 < cur->req()) {
1154 to_clone.set_index(j+1);
1155 break;
1156 }
1157 to_clone.pop();
1158 if (to_clone.size() == 0) {
1159 result = cur;
1160 break;
1161 }
1162 Node* next = to_clone.node();
1163 j = to_clone.index();
1164 if (clone && cur->_idx >= current) {
1165 if (next->_idx < current) {
1166 next = next->clone();
1167 register_new_node(next, current_proj);
1168 to_clone.set_node(next);
1169 }
1170 assert(next->in(j) != cur, "input should have been cloned");
1171 next->set_req(j, cur);
1172 }
1173 }
1174 } while (result == NULL);
1175 if (!clone) {
1176 return NULL;
1177 }
1178 assert(result->_idx >= current, "new node expected");
1179
1180 Node* proj = predicate->clone();
1181 Node* other_proj = uncommon_proj->clone();
1182 Node* new_iff = iff->clone();
1183 new_iff->set_req(1, result);
1184 proj->set_req(0, new_iff);
1185 other_proj->set_req(0, new_iff);
1186 Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
1187 register_new_node(frame, C->start());
1188 // It's impossible for the predicate to fail at runtime. Use an Halt node.
1189 Node* halt = new HaltNode(other_proj, frame);
1190 C->root()->add_req(halt);
1191 new_iff->set_req(0, prev_proj);
1192
1193 register_control(new_iff, outer_loop->_parent, prev_proj);
1194 register_control(proj, outer_loop->_parent, new_iff);
1195 register_control(other_proj, _ltree_root, new_iff);
1196 register_control(halt, _ltree_root, other_proj);
1197 return proj;
1198 }
1199
1200 void PhaseIdealLoop::duplicate_predicates(CountedLoopNode* pre_head, Node* castii, IdealLoopTree* outer_loop,
1201 LoopNode* outer_main_head, uint dd_main_head) {
1202 if (UseLoopPredicate) {
1203 Node* entry = pre_head->in(LoopNode::EntryControl);
1204 Node* predicate = NULL;
1205 predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
1206 if (predicate != NULL) {
1207 entry = skip_loop_predicates(entry);
1208 }
1209 Node* profile_predicate = NULL;
1210 if (UseProfiledLoopPredicate) {
1211 profile_predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
1212 if (profile_predicate != NULL) {
1213 entry = skip_loop_predicates(entry);
1214 }
1215 }
1216 predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
1217 duplicate_predicates_helper(predicate, castii, outer_loop, outer_main_head, dd_main_head);
1218 duplicate_predicates_helper(profile_predicate, castii, outer_loop, outer_main_head, dd_main_head);
1219 }
1220 }
1221
1222 //------------------------------insert_pre_post_loops--------------------------
1223 // Insert pre and post loops. If peel_only is set, the pre-loop can not have
1224 // more iterations added. It acts as a 'peel' only, no lower-bound RCE, no
1225 // alignment. Useful to unroll loops that do no array accesses.
1226 void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ) {
1227
1228 #ifndef PRODUCT
1229 if (TraceLoopOpts) {
1230 if (peel_only)
1231 tty->print("PeelMainPost ");
1232 else
1233 tty->print("PreMainPost ");
1234 loop->dump_head();
1235 }
1236 #endif
1237 C->set_major_progress();
1238
1345 pre_phi->in(LoopNode::LoopBackControl),
1346 visited, clones);
1347 _igvn.hash_delete(main_phi);
1348 main_phi->set_req( LoopNode::EntryControl, fallpre );
1349 }
1350 }
1351
1352 // Nodes inside the loop may be control dependent on a predicate
1353 // that was moved before the preloop. If the back branch of the main
1354 // or post loops becomes dead, those nodes won't be dependent on the
1355 // test that guards that loop nest anymore which could lead to an
1356 // incorrect array access because it executes independently of the
1357 // test that was guarding the loop nest. We add a special CastII on
1358 // the if branch that enters the loop, between the input induction
1359 // variable value and the induction variable Phi to preserve correct
1360 // dependencies.
1361
1362 // CastII for the main loop:
1363 Node* castii = cast_incr_before_loop( pre_incr, min_taken, main_head );
1364 assert(castii != NULL, "no castII inserted");
1365 duplicate_predicates(pre_head, castii, outer_loop, outer_main_head, dd_main_head);
1366
1367 // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
1368 // RCE and alignment may change this later.
1369 Node *cmp_end = pre_end->cmp_node();
1370 assert( cmp_end->in(2) == limit, "" );
1371 Node *pre_limit = new AddINode( init, stride );
1372
1373 // Save the original loop limit in this Opaque1 node for
1374 // use by range check elimination.
1375 Node *pre_opaq = new Opaque1Node(C, pre_limit, limit);
1376
1377 register_new_node( pre_limit, pre_head->in(0) );
1378 register_new_node( pre_opaq , pre_head->in(0) );
1379
1380 // Since no other users of pre-loop compare, I can hack limit directly
1381 assert( cmp_end->outcnt() == 1, "no other users" );
1382 _igvn.hash_delete(cmp_end);
1383 cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq);
1384
1385 // Special case for not-equal loop bounds:
1624 _igvn.hash_delete(cur_phi);
1625 cur_phi->set_req(LoopNode::EntryControl, fallnew);
1626 }
1627 }
1628
1629 // CastII for the new post loop:
1630 Node* castii = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
1631 assert(castii != NULL, "no castII inserted");
1632
1633 return new_main_exit;
1634 }
1635
1636 //------------------------------is_invariant-----------------------------
1637 // Return true if n is invariant
1638 bool IdealLoopTree::is_invariant(Node* n) const {
1639 Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n;
1640 if (n_c->is_top()) return false;
1641 return !is_member(_phase->get_loop(n_c));
1642 }
1643
1644
1645 //------------------------------do_unroll--------------------------------------
1646 // Unroll the loop body one step - make each trip do 2 iterations.
1647 void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ) {
1648 assert(LoopUnrollLimit, "");
1649 CountedLoopNode *loop_head = loop->_head->as_CountedLoop();
1650 CountedLoopEndNode *loop_end = loop_head->loopexit();
1651 #ifndef PRODUCT
1652 if (PrintOpto && VerifyLoopOptimizations) {
1653 tty->print("Unrolling ");
1654 loop->dump_head();
1655 } else if (TraceLoopOpts) {
1656 if (loop_head->trip_count() < (uint)LoopUnrollLimit) {
1657 tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count());
1658 } else {
1659 tty->print("Unroll %d ", loop_head->unrolled_count()*2);
1660 }
1661 loop->dump_head();
1662 }
1663
1689 // graph shape is encountered, the compiler bails out loop unrolling;
1690 // compilation of the method will still succeed.
1691 if (!is_canonical_loop_entry(loop_head)) {
1692 return;
1693 }
1694 opaq = loop_head->skip_predicates()->in(0)->in(1)->in(1)->in(2);
1695 // Zero-trip test uses an 'opaque' node which is not shared.
1696 assert(opaq->outcnt() == 1 && opaq->in(1) == limit, "");
1697 }
1698
1699 C->set_major_progress();
1700
1701 Node* new_limit = NULL;
1702 int stride_con = stride->get_int();
1703 int stride_p = (stride_con > 0) ? stride_con : -stride_con;
1704 uint old_trip_count = loop_head->trip_count();
1705 // Verify that unroll policy result is still valid.
1706 assert(old_trip_count > 1 &&
1707 (!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity");
1708
1709 if (UseLoopPredicate) {
1710 // Search for skeleton predicates and update them according to the new stride
1711 Node* entry = ctrl;
1712 while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) {
1713 IfNode* iff = entry->in(0)->as_If();
1714 ProjNode* proj = iff->proj_out(1 - entry->as_Proj()->_con);
1715 if (proj->unique_ctrl_out()->Opcode() != Op_Halt) {
1716 break;
1717 }
1718 if (iff->in(1)->Opcode() == Op_Opaque4) {
1719 // Compute the value of the loop induction variable at the end of the
1720 // first iteration of the unrolled loop: init + new_stride_con - init_inc
1721 int init_inc = stride_con/loop_head->unrolled_count();
1722 assert(init_inc != 0, "invalid loop increment");
1723 int new_stride_con = stride_con * 2;
1724 Node* max_value = _igvn.intcon(new_stride_con - init_inc);
1725 max_value = new AddINode(init, max_value);
1726 register_new_node(max_value, get_ctrl(iff->in(1)));
1727 update_skeleton_predicate(iff, max_value);
1728 }
1729 entry = entry->in(0)->in(0);
1730 }
1731 }
1732
1733 // Adjust loop limit to keep valid iterations number after unroll.
1734 // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride
1735 // which may overflow.
1736 if (!adjust_min_trip) {
1737 assert(old_trip_count > 1 && (old_trip_count & 1) == 0,
1738 "odd trip count for maximally unroll");
1739 // Don't need to adjust limit for maximally unroll since trip count is even.
1740 } else if (loop_head->has_exact_trip_count() && init->is_Con()) {
1741 // Loop's limit is constant. Loop's init could be constant when pre-loop
1742 // become peeled iteration.
1743 jlong init_con = init->get_int();
1744 // We can keep old loop limit if iterations count stays the same:
1745 // old_trip_count == new_trip_count * 2
1746 // Note: since old_trip_count >= 2 then new_trip_count >= 1
1747 // so we also don't need to adjust zero trip test.
1748 jlong limit_con = limit->get_int();
1749 // (stride_con*2) not overflow since stride_con <= 8.
1750 int new_stride_con = stride_con * 2;
1751 int stride_m = new_stride_con - (stride_con > 0 ? 1 : -1);
2279 register_new_node(offset, ctrl_off);
2280 *p_offset = offset;
2281 }
2282 return true;
2283 }
2284 if (is_scaled_iv(exp->in(2), iv, p_scale)) {
2285 if (p_offset != NULL) {
2286 *p_scale *= -1;
2287 *p_offset = exp->in(1);
2288 }
2289 return true;
2290 }
2291 }
2292 return false;
2293 }
2294
2295 // Same as PhaseIdealLoop::duplicate_predicates() but for range checks
2296 // eliminated by iteration splitting.
2297 Node* PhaseIdealLoop::add_range_check_predicate(IdealLoopTree* loop, CountedLoopNode* cl,
2298 Node* predicate_proj, int scale_con, Node* offset,
2299 Node* limit, jint stride_con) {
2300 bool overflow = false;
2301 BoolNode* bol = rc_predicate(loop, predicate_proj, scale_con, offset, cl->init_trip(), NULL, stride_con, limit, (stride_con > 0) != (scale_con > 0), overflow);
2302 Node* opaque_bol = new Opaque4Node(C, bol, _igvn.intcon(1));
2303 register_new_node(opaque_bol, predicate_proj);
2304 IfNode* new_iff = NULL;
2305 if (overflow) {
2306 new_iff = new IfNode(predicate_proj, opaque_bol, PROB_MAX, COUNT_UNKNOWN);
2307 } else {
2308 new_iff = new RangeCheckNode(predicate_proj, opaque_bol, PROB_MAX, COUNT_UNKNOWN);
2309 }
2310 register_control(new_iff, loop->_parent, predicate_proj);
2311 Node* iffalse = new IfFalseNode(new_iff);
2312 register_control(iffalse, _ltree_root, new_iff);
2313 ProjNode* iftrue = new IfTrueNode(new_iff);
2314 register_control(iftrue, loop->_parent, new_iff);
2315 Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
2316 register_new_node(frame, C->start());
2317 Node* halt = new HaltNode(iffalse, frame);
2318 register_control(halt, _ltree_root, iffalse);
2319 C->root()->add_req(halt);
2320 return iftrue;
2321 }
2480 #ifdef ASSERT
2481 if (TraceRangeLimitCheck) {
2482 tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2483 bol->dump(2);
2484 }
2485 #endif
2486 // At this point we have the expression as:
2487 // scale_con * trip_counter + offset :: limit
2488 // where scale_con, offset and limit are loop invariant. Trip_counter
2489 // monotonically increases by stride_con, a constant. Both (or either)
2490 // stride_con and scale_con can be negative which will flip about the
2491 // sense of the test.
2492
2493 // Adjust pre and main loop limits to guard the correct iteration set
2494 if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
2495 if( b_test._test == BoolTest::lt ) { // Range checks always use lt
2496 // The underflow and overflow limits: 0 <= scale*I+offset < limit
2497 add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
2498 // (0-offset)/scale could be outside of loop iterations range.
2499 conditional_rc = true;
2500 predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, offset, limit, stride_con);
2501 } else {
2502 if (PrintOpto) {
2503 tty->print_cr("missed RCE opportunity");
2504 }
2505 continue; // In release mode, ignore it
2506 }
2507 } else { // Otherwise work on normal compares
2508 switch( b_test._test ) {
2509 case BoolTest::gt:
2510 // Fall into GE case
2511 case BoolTest::ge:
2512 // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2513 scale_con = -scale_con;
2514 offset = new SubINode( zero, offset );
2515 register_new_node( offset, pre_ctrl );
2516 limit = new SubINode( zero, limit );
2517 register_new_node( limit, pre_ctrl );
2518 // Fall into LE case
2519 case BoolTest::le:
2520 if (b_test._test != BoolTest::gt) {
|
1050 castii->set_req(0, ctrl);
1051 register_new_node(castii, ctrl);
1052 for (DUIterator_Fast imax, i = incr->fast_outs(imax); i < imax; i++) {
1053 Node* n = incr->fast_out(i);
1054 if (n->is_Phi() && n->in(0) == loop) {
1055 int nrep = n->replace_edge(incr, castii);
1056 return castii;
1057 }
1058 }
1059 return NULL;
1060 }
1061
1062 // Make a copy of the skeleton range check predicates before the main
1063 // loop and set the initial value of loop as input. After unrolling,
1064 // the range of values for the induction variable in the main loop can
1065 // fall outside the allowed range of values by the array access (main
1066 // loop is never executed). When that happens, range check
1067 // CastII/ConvI2L nodes cause some data paths to die. For consistency,
1068 // the control paths must die too but the range checks were removed by
1069 // predication. The range checks that we add here guarantee that they do.
1070 void PhaseIdealLoop::duplicate_predicates_helper(Node* predicate, Node* start, Node* end,
1071 IdealLoopTree* outer_loop, LoopNode* outer_main_head,
1072 uint dd_main_head) {
1073 if (predicate != NULL) {
1074 IfNode* iff = predicate->in(0)->as_If();
1075 ProjNode* uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
1076 Node* rgn = uncommon_proj->unique_ctrl_out();
1077 assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
1078 assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape");
1079 predicate = iff->in(0);
1080 Node* current_proj = outer_main_head->in(LoopNode::EntryControl);
1081 Node* prev_proj = current_proj;
1082 while (predicate != NULL && predicate->is_Proj() && predicate->in(0)->is_If()) {
1083 iff = predicate->in(0)->as_If();
1084 uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
1085 if (uncommon_proj->unique_ctrl_out() != rgn)
1086 break;
1087 if (iff->in(1)->Opcode() == Op_Opaque4) {
1088 assert(skeleton_predicate_has_opaque(iff), "unexpected");
1089 // Clone the predicate twice and initialize one with the initial
1090 // value of the loop induction variable. Leave the other predicate
1091 // to be initialized when increasing the stride during loop unrolling.
1092 prev_proj = clone_skeleton_predicate(iff, start, predicate, uncommon_proj, current_proj, outer_loop, prev_proj);
1093 assert(skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()) == (start->Opcode() == Op_Opaque1), "");
1094 prev_proj = clone_skeleton_predicate(iff, end, predicate, uncommon_proj, current_proj, outer_loop, prev_proj);
1095 assert(skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()) == (end->Opcode() == Op_Opaque1), "");
1096 // Remove the skeleton predicate from the pre-loop
1097 _igvn.replace_input_of(iff, 1, _igvn.intcon(1));
1098 }
1099 predicate = predicate->in(0)->in(0);
1100 }
1101 _igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
1102 set_idom(outer_main_head, prev_proj, dd_main_head);
1103 }
1104 }
1105
1106 static bool skeleton_follow_inputs(Node* n, int op) {
1107 return (n->is_Bool() ||
1108 n->is_Cmp() ||
1109 op == Op_AndL ||
1110 op == Op_OrL ||
1111 op == Op_RShiftL ||
1112 op == Op_LShiftL ||
1113 op == Op_AddL ||
1114 op == Op_AddI ||
1115 op == Op_MulL ||
1116 op == Op_MulI ||
1117 op == Op_SubL ||
1118 op == Op_SubI ||
1119 op == Op_ConvI2L);
1120 }
1121
1122 bool PhaseIdealLoop::skeleton_predicate_has_opaque(IfNode* iff) {
1123 ResourceMark rm;
1124 Unique_Node_List wq;
1125 wq.push(iff->in(1)->in(1));
1126 for (uint i = 0; i < wq.size(); i++) {
1127 Node* n = wq.at(i);
1128 int op = n->Opcode();
1129 if (skeleton_follow_inputs(n, op)) {
1130 for (uint j = 1; j < n->req(); j++) {
1131 Node* m = n->in(j);
1132 if (m != NULL) {
1133 wq.push(m);
1134 }
1135 }
1136 continue;
1137 }
1138 if (op == Op_Opaque1) {
1139 return true;
1140 }
1141 }
1142 return false;
1143 }
1144
1145 Node* PhaseIdealLoop::clone_skeleton_predicate(Node* iff, Node* value, Node* predicate, Node* uncommon_proj,
1146 Node* current_proj, IdealLoopTree* outer_loop, Node* prev_proj) {
1147 Node_Stack to_clone(2);
1148 to_clone.push(iff->in(1), 1);
1149 uint current = C->unique();
1150 Node* result = NULL;
1151 // Look for the opaque node to replace with the new value
1152 // and clone everything in between. We keep the Opaque4 node
1153 // so the duplicated predicates are eliminated once loop
1154 // opts are over: they are here only to keep the IR graph
1155 // consistent.
1156 do {
1157 Node* n = to_clone.node();
1158 uint i = to_clone.index();
1159 Node* m = n->in(i);
1160 int op = m->Opcode();
1161 if (skeleton_follow_inputs(m, op)) {
1162 to_clone.push(m, 1);
1163 continue;
1164 }
1165 if (op == Op_Opaque1) {
1166 if (n->_idx < current) {
1167 n = n->clone();
1168 }
1169 n->set_req(i, value);
1170 register_new_node(n, current_proj);
1171 to_clone.set_node(n);
1172 }
1173 for (;;) {
1174 Node* cur = to_clone.node();
1175 uint j = to_clone.index();
1176 if (j+1 < cur->req()) {
1177 to_clone.set_index(j+1);
1178 break;
1179 }
1180 to_clone.pop();
1181 if (to_clone.size() == 0) {
1182 result = cur;
1183 break;
1184 }
1185 Node* next = to_clone.node();
1186 j = to_clone.index();
1187 if (next->in(j) != cur) {
1188 assert(cur->_idx >= current || next->in(j)->Opcode() == Op_Opaque1, "new node or Opaque1 being replaced");
1189 if (next->_idx < current) {
1190 next = next->clone();
1191 register_new_node(next, current_proj);
1192 to_clone.set_node(next);
1193 }
1194 next->set_req(j, cur);
1195 }
1196 }
1197 } while (result == NULL);
1198 assert(result->_idx >= current, "new node expected");
1199
1200 Node* proj = predicate->clone();
1201 Node* other_proj = uncommon_proj->clone();
1202 Node* new_iff = iff->clone();
1203 new_iff->set_req(1, result);
1204 proj->set_req(0, new_iff);
1205 other_proj->set_req(0, new_iff);
1206 Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
1207 register_new_node(frame, C->start());
1208 // It's impossible for the predicate to fail at runtime. Use an Halt node.
1209 Node* halt = new HaltNode(other_proj, frame);
1210 C->root()->add_req(halt);
1211 new_iff->set_req(0, prev_proj);
1212
1213 register_control(new_iff, outer_loop->_parent, prev_proj);
1214 register_control(proj, outer_loop->_parent, new_iff);
1215 register_control(other_proj, _ltree_root, new_iff);
1216 register_control(halt, _ltree_root, other_proj);
1217 return proj;
1218 }
1219
1220 void PhaseIdealLoop::duplicate_predicates(CountedLoopNode* pre_head, Node* start, Node* end,
1221 IdealLoopTree* outer_loop, LoopNode* outer_main_head,
1222 uint dd_main_head) {
1223 if (UseLoopPredicate) {
1224 Node* entry = pre_head->in(LoopNode::EntryControl);
1225 Node* predicate = NULL;
1226 predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
1227 if (predicate != NULL) {
1228 entry = skip_loop_predicates(entry);
1229 }
1230 Node* profile_predicate = NULL;
1231 if (UseProfiledLoopPredicate) {
1232 profile_predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
1233 if (profile_predicate != NULL) {
1234 entry = skip_loop_predicates(entry);
1235 }
1236 }
1237 predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
1238 duplicate_predicates_helper(predicate, start, end, outer_loop, outer_main_head, dd_main_head);
1239 duplicate_predicates_helper(profile_predicate, start, end, outer_loop, outer_main_head, dd_main_head);
1240 }
1241 }
1242
1243 //------------------------------insert_pre_post_loops--------------------------
1244 // Insert pre and post loops. If peel_only is set, the pre-loop can not have
1245 // more iterations added. It acts as a 'peel' only, no lower-bound RCE, no
1246 // alignment. Useful to unroll loops that do no array accesses.
1247 void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ) {
1248
1249 #ifndef PRODUCT
1250 if (TraceLoopOpts) {
1251 if (peel_only)
1252 tty->print("PeelMainPost ");
1253 else
1254 tty->print("PreMainPost ");
1255 loop->dump_head();
1256 }
1257 #endif
1258 C->set_major_progress();
1259
1366 pre_phi->in(LoopNode::LoopBackControl),
1367 visited, clones);
1368 _igvn.hash_delete(main_phi);
1369 main_phi->set_req( LoopNode::EntryControl, fallpre );
1370 }
1371 }
1372
1373 // Nodes inside the loop may be control dependent on a predicate
1374 // that was moved before the preloop. If the back branch of the main
1375 // or post loops becomes dead, those nodes won't be dependent on the
1376 // test that guards that loop nest anymore which could lead to an
1377 // incorrect array access because it executes independently of the
1378 // test that was guarding the loop nest. We add a special CastII on
1379 // the if branch that enters the loop, between the input induction
1380 // variable value and the induction variable Phi to preserve correct
1381 // dependencies.
1382
1383 // CastII for the main loop:
1384 Node* castii = cast_incr_before_loop( pre_incr, min_taken, main_head );
1385 assert(castii != NULL, "no castII inserted");
1386 Node* opaque_castii = new Opaque1Node(C, castii);
1387 register_new_node(opaque_castii, outer_main_head->in(LoopNode::EntryControl));
1388 duplicate_predicates(pre_head, castii, opaque_castii, outer_loop, outer_main_head, dd_main_head);
1389
1390 // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
1391 // RCE and alignment may change this later.
1392 Node *cmp_end = pre_end->cmp_node();
1393 assert( cmp_end->in(2) == limit, "" );
1394 Node *pre_limit = new AddINode( init, stride );
1395
1396 // Save the original loop limit in this Opaque1 node for
1397 // use by range check elimination.
1398 Node *pre_opaq = new Opaque1Node(C, pre_limit, limit);
1399
1400 register_new_node( pre_limit, pre_head->in(0) );
1401 register_new_node( pre_opaq , pre_head->in(0) );
1402
1403 // Since no other users of pre-loop compare, I can hack limit directly
1404 assert( cmp_end->outcnt() == 1, "no other users" );
1405 _igvn.hash_delete(cmp_end);
1406 cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq);
1407
1408 // Special case for not-equal loop bounds:
1647 _igvn.hash_delete(cur_phi);
1648 cur_phi->set_req(LoopNode::EntryControl, fallnew);
1649 }
1650 }
1651
1652 // CastII for the new post loop:
1653 Node* castii = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
1654 assert(castii != NULL, "no castII inserted");
1655
1656 return new_main_exit;
1657 }
1658
1659 //------------------------------is_invariant-----------------------------
1660 // Return true if n is invariant
1661 bool IdealLoopTree::is_invariant(Node* n) const {
1662 Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n;
1663 if (n_c->is_top()) return false;
1664 return !is_member(_phase->get_loop(n_c));
1665 }
1666
1667 void PhaseIdealLoop::update_skeleton_predicates(Node* ctrl, CountedLoopNode* loop_head, Node* init, int stride_con) {
1668 // Search for skeleton predicates and update them according to the new stride
1669 Node* entry = ctrl;
1670 Node* prev_proj = ctrl;
1671 LoopNode* outer_loop_head = loop_head->skip_strip_mined();
1672 IdealLoopTree* outer_loop = get_loop(outer_loop_head);
1673 while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) {
1674 IfNode* iff = entry->in(0)->as_If();
1675 ProjNode* proj = iff->proj_out(1 - entry->as_Proj()->_con);
1676 if (proj->unique_ctrl_out()->Opcode() != Op_Halt) {
1677 break;
1678 }
1679 if (iff->in(1)->Opcode() == Op_Opaque4) {
1680 // Look for predicate with an Opaque1 node that can be used as a template
1681 if (!skeleton_predicate_has_opaque(iff)) {
1682 // No Opaque1 node? It's either the check for the first value
1683 // of the first iteration or the check for the last value of
1684 // the first iteration of an unrolled loop. We can't
1685 // tell. Kill it in any case.
1686 _igvn.replace_input_of(iff, 1, iff->in(1)->in(2));
1687 } else {
1688 // Add back the predicate for the value at the beginning of the first entry
1689 prev_proj = clone_skeleton_predicate(iff, init, entry, proj, ctrl, outer_loop, prev_proj);
1690 assert(!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "unexpected");
1691 // Compute the value of the loop induction variable at the end of the
1692 // first iteration of the unrolled loop: init + new_stride_con - init_inc
1693 int init_inc = stride_con/loop_head->unrolled_count();
1694 assert(init_inc != 0, "invalid loop increment");
1695 int new_stride_con = stride_con * 2;
1696 Node* max_value = _igvn.intcon(new_stride_con - init_inc);
1697 max_value = new AddINode(init, max_value);
1698 register_new_node(max_value, get_ctrl(iff->in(1)));
1699 prev_proj = clone_skeleton_predicate(iff, max_value, entry, proj, ctrl, outer_loop, prev_proj);
1700 assert(!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "unexpected");
1701 }
1702 }
1703 entry = entry->in(0)->in(0);
1704 }
1705 if (prev_proj != ctrl) {
1706 _igvn.replace_input_of(outer_loop_head, LoopNode::EntryControl, prev_proj);
1707 set_idom(outer_loop_head, prev_proj, dom_depth(outer_loop_head));
1708 }
1709 }
1710
1711 //------------------------------do_unroll--------------------------------------
1712 // Unroll the loop body one step - make each trip do 2 iterations.
1713 void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ) {
1714 assert(LoopUnrollLimit, "");
1715 CountedLoopNode *loop_head = loop->_head->as_CountedLoop();
1716 CountedLoopEndNode *loop_end = loop_head->loopexit();
1717 #ifndef PRODUCT
1718 if (PrintOpto && VerifyLoopOptimizations) {
1719 tty->print("Unrolling ");
1720 loop->dump_head();
1721 } else if (TraceLoopOpts) {
1722 if (loop_head->trip_count() < (uint)LoopUnrollLimit) {
1723 tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count());
1724 } else {
1725 tty->print("Unroll %d ", loop_head->unrolled_count()*2);
1726 }
1727 loop->dump_head();
1728 }
1729
1755 // graph shape is encountered, the compiler bails out loop unrolling;
1756 // compilation of the method will still succeed.
1757 if (!is_canonical_loop_entry(loop_head)) {
1758 return;
1759 }
1760 opaq = loop_head->skip_predicates()->in(0)->in(1)->in(1)->in(2);
1761 // Zero-trip test uses an 'opaque' node which is not shared.
1762 assert(opaq->outcnt() == 1 && opaq->in(1) == limit, "");
1763 }
1764
1765 C->set_major_progress();
1766
1767 Node* new_limit = NULL;
1768 int stride_con = stride->get_int();
1769 int stride_p = (stride_con > 0) ? stride_con : -stride_con;
1770 uint old_trip_count = loop_head->trip_count();
1771 // Verify that unroll policy result is still valid.
1772 assert(old_trip_count > 1 &&
1773 (!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity");
1774
1775 update_skeleton_predicates(ctrl, loop_head, init, stride_con);
1776
1777 // Adjust loop limit to keep valid iterations number after unroll.
1778 // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride
1779 // which may overflow.
1780 if (!adjust_min_trip) {
1781 assert(old_trip_count > 1 && (old_trip_count & 1) == 0,
1782 "odd trip count for maximally unroll");
1783 // Don't need to adjust limit for maximally unroll since trip count is even.
1784 } else if (loop_head->has_exact_trip_count() && init->is_Con()) {
1785 // Loop's limit is constant. Loop's init could be constant when pre-loop
1786 // become peeled iteration.
1787 jlong init_con = init->get_int();
1788 // We can keep old loop limit if iterations count stays the same:
1789 // old_trip_count == new_trip_count * 2
1790 // Note: since old_trip_count >= 2 then new_trip_count >= 1
1791 // so we also don't need to adjust zero trip test.
1792 jlong limit_con = limit->get_int();
1793 // (stride_con*2) not overflow since stride_con <= 8.
1794 int new_stride_con = stride_con * 2;
1795 int stride_m = new_stride_con - (stride_con > 0 ? 1 : -1);
2323 register_new_node(offset, ctrl_off);
2324 *p_offset = offset;
2325 }
2326 return true;
2327 }
2328 if (is_scaled_iv(exp->in(2), iv, p_scale)) {
2329 if (p_offset != NULL) {
2330 *p_scale *= -1;
2331 *p_offset = exp->in(1);
2332 }
2333 return true;
2334 }
2335 }
2336 return false;
2337 }
2338
2339 // Same as PhaseIdealLoop::duplicate_predicates() but for range checks
2340 // eliminated by iteration splitting.
2341 Node* PhaseIdealLoop::add_range_check_predicate(IdealLoopTree* loop, CountedLoopNode* cl,
2342 Node* predicate_proj, int scale_con, Node* offset,
2343 Node* limit, jint stride_con, Node* value) {
2344 bool overflow = false;
2345 BoolNode* bol = rc_predicate(loop, predicate_proj, scale_con, offset, value, NULL, stride_con, limit, (stride_con > 0) != (scale_con > 0), overflow);
2346 Node* opaque_bol = new Opaque4Node(C, bol, _igvn.intcon(1));
2347 register_new_node(opaque_bol, predicate_proj);
2348 IfNode* new_iff = NULL;
2349 if (overflow) {
2350 new_iff = new IfNode(predicate_proj, opaque_bol, PROB_MAX, COUNT_UNKNOWN);
2351 } else {
2352 new_iff = new RangeCheckNode(predicate_proj, opaque_bol, PROB_MAX, COUNT_UNKNOWN);
2353 }
2354 register_control(new_iff, loop->_parent, predicate_proj);
2355 Node* iffalse = new IfFalseNode(new_iff);
2356 register_control(iffalse, _ltree_root, new_iff);
2357 ProjNode* iftrue = new IfTrueNode(new_iff);
2358 register_control(iftrue, loop->_parent, new_iff);
2359 Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
2360 register_new_node(frame, C->start());
2361 Node* halt = new HaltNode(iffalse, frame);
2362 register_control(halt, _ltree_root, iffalse);
2363 C->root()->add_req(halt);
2364 return iftrue;
2365 }
2524 #ifdef ASSERT
2525 if (TraceRangeLimitCheck) {
2526 tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2527 bol->dump(2);
2528 }
2529 #endif
2530 // At this point we have the expression as:
2531 // scale_con * trip_counter + offset :: limit
2532 // where scale_con, offset and limit are loop invariant. Trip_counter
2533 // monotonically increases by stride_con, a constant. Both (or either)
2534 // stride_con and scale_con can be negative which will flip about the
2535 // sense of the test.
2536
2537 // Adjust pre and main loop limits to guard the correct iteration set
2538 if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
2539 if( b_test._test == BoolTest::lt ) { // Range checks always use lt
2540 // The underflow and overflow limits: 0 <= scale*I+offset < limit
2541 add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
2542 // (0-offset)/scale could be outside of loop iterations range.
2543 conditional_rc = true;
2544 Node* init = cl->init_trip();
2545 Node* opaque_init = new Opaque1Node(C, init);
2546 register_new_node(opaque_init, predicate_proj);
2547 // template predicate so it can be updated on next unrolling
2548 predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, offset, limit, stride_con, opaque_init);
2549 assert(skeleton_predicate_has_opaque(predicate_proj->in(0)->as_If()), "unexpected");
2550 // predicate on first value of first iteration
2551 predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, offset, limit, stride_con, init);
2552 assert(!skeleton_predicate_has_opaque(predicate_proj->in(0)->as_If()), "unexpected");
2553 int init_inc = stride_con/cl->unrolled_count();
2554 assert(init_inc != 0, "invalid loop increment");
2555 Node* max_value = _igvn.intcon(stride_con - init_inc);
2556 max_value = new AddINode(init, max_value);
2557 register_new_node(max_value, predicate_proj);
2558 // predicate on last value of first iteration (in case unrolling has already happened)
2559 predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, offset, limit, stride_con, max_value);
2560 assert(!skeleton_predicate_has_opaque(predicate_proj->in(0)->as_If()), "unexpected");
2561 } else {
2562 if (PrintOpto) {
2563 tty->print_cr("missed RCE opportunity");
2564 }
2565 continue; // In release mode, ignore it
2566 }
2567 } else { // Otherwise work on normal compares
2568 switch( b_test._test ) {
2569 case BoolTest::gt:
2570 // Fall into GE case
2571 case BoolTest::ge:
2572 // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2573 scale_con = -scale_con;
2574 offset = new SubINode( zero, offset );
2575 register_new_node( offset, pre_ctrl );
2576 limit = new SubINode( zero, limit );
2577 register_new_node( limit, pre_ctrl );
2578 // Fall into LE case
2579 case BoolTest::le:
2580 if (b_test._test != BoolTest::gt) {
|