< prev index next >

src/hotspot/share/opto/loopTransform.cpp

Print this page




1050   castii->set_req(0, ctrl);
1051   register_new_node(castii, ctrl);
1052   for (DUIterator_Fast imax, i = incr->fast_outs(imax); i < imax; i++) {
1053     Node* n = incr->fast_out(i);
1054     if (n->is_Phi() && n->in(0) == loop) {
1055       int nrep = n->replace_edge(incr, castii);
1056       return castii;
1057     }
1058   }
1059   return NULL;
1060 }
1061 
1062 // Make a copy of the skeleton range check predicates before the main
1063 // loop and set the initial value of loop as input. After unrolling,
1064 // the range of values for the induction variable in the main loop can
1065 // fall outside the allowed range of values by the array access (main
1066 // loop is never executed). When that happens, range check
1067 // CastII/ConvI2L nodes cause some data paths to die. For consistency,
1068 // the control paths must die too but the range checks were removed by
1069 // predication. The range checks that we add here guarantee that they do.
1070 void PhaseIdealLoop::duplicate_predicates_helper(Node* predicate, Node* castii, IdealLoopTree* outer_loop,
1071                                                  LoopNode* outer_main_head, uint dd_main_head) {

1072   if (predicate != NULL) {
1073     IfNode* iff = predicate->in(0)->as_If();
1074     ProjNode* uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
1075     Node* rgn = uncommon_proj->unique_ctrl_out();
1076     assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
1077     assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape");
1078     predicate = iff->in(0);
1079     Node* current_proj = outer_main_head->in(LoopNode::EntryControl);
1080     Node* prev_proj = current_proj;
1081     while (predicate != NULL && predicate->is_Proj() && predicate->in(0)->is_If()) {
1082       iff = predicate->in(0)->as_If();
1083       uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
1084       if (uncommon_proj->unique_ctrl_out() != rgn)
1085         break;
1086       if (iff->in(1)->Opcode() == Op_Opaque4) {

1087         // Clone the predicate twice and initialize one with the initial
1088         // value of the loop induction variable. Leave the other predicate
1089         // to be initialized when increasing the stride during loop unrolling.
1090         prev_proj = update_skeleton_predicate(iff, castii, predicate, uncommon_proj, current_proj, outer_loop, prev_proj);
1091         Node* value = new Opaque1Node(C, castii);
1092         register_new_node(value, current_proj);
1093         prev_proj = update_skeleton_predicate(iff, value, predicate, uncommon_proj, current_proj, outer_loop, prev_proj);
1094         // Remove the skeleton predicate from the pre-loop
1095         _igvn.replace_input_of(iff, 1, _igvn.intcon(1));
1096       }
1097       predicate = predicate->in(0)->in(0);
1098     }
1099     _igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
1100     set_idom(outer_main_head, prev_proj, dd_main_head);
1101   }
1102 }
1103 
1104 Node* PhaseIdealLoop::update_skeleton_predicate(Node* iff, Node* value, Node* predicate, Node* uncommon_proj,







































1105                                                 Node* current_proj, IdealLoopTree* outer_loop, Node* prev_proj) {
1106   bool clone = (outer_loop != NULL); // Clone the predicate?
1107   Node_Stack to_clone(2);
1108   to_clone.push(iff->in(1), 1);
1109   uint current = C->unique();
1110   Node* result = NULL;
1111   // Look for the opaque node to replace with the new value
1112   // and clone everything in between. We keep the Opaque4 node
1113   // so the duplicated predicates are eliminated once loop
1114   // opts are over: they are here only to keep the IR graph
1115   // consistent.
1116   do {
1117     Node* n = to_clone.node();
1118     uint i = to_clone.index();
1119     Node* m = n->in(i);
1120     int op = m->Opcode();
1121     if (m->is_Bool() ||
1122         m->is_Cmp() ||
1123         op == Op_AndL ||
1124         op == Op_OrL ||
1125         op == Op_RShiftL ||
1126         op == Op_LShiftL ||
1127         op == Op_AddL ||
1128         op == Op_AddI ||
1129         op == Op_MulL ||
1130         op == Op_MulI ||
1131         op == Op_SubL ||
1132         op == Op_SubI ||
1133         op == Op_ConvI2L) {
1134         to_clone.push(m, 1);
1135         continue;
1136     }
1137     if (op == Op_Opaque1) {
1138       if (!clone) {
1139         // Update the input of the Opaque1Node and exit
1140         _igvn.replace_input_of(m, 1, value);
1141         return prev_proj;
1142       }
1143       if (n->_idx < current) {
1144         n = n->clone();
1145       }
1146       n->set_req(i, value);
1147       register_new_node(n, current_proj);
1148       to_clone.set_node(n);
1149     }
1150     for (;;) {
1151       Node* cur = to_clone.node();
1152       uint j = to_clone.index();
1153       if (j+1 < cur->req()) {
1154         to_clone.set_index(j+1);
1155         break;
1156       }
1157       to_clone.pop();
1158       if (to_clone.size() == 0) {
1159         result = cur;
1160         break;
1161       }
1162       Node* next = to_clone.node();
1163       j = to_clone.index();
1164       if (clone && cur->_idx >= current) {

1165         if (next->_idx < current) {
1166           next = next->clone();
1167           register_new_node(next, current_proj);
1168           to_clone.set_node(next);
1169         }
1170         assert(next->in(j) != cur, "input should have been cloned");
1171         next->set_req(j, cur);
1172       }
1173     }
1174   } while (result == NULL);
1175   if (!clone) {
1176     return NULL;
1177   }
1178   assert(result->_idx >= current, "new node expected");
1179 
1180   Node* proj = predicate->clone();
1181   Node* other_proj = uncommon_proj->clone();
1182   Node* new_iff = iff->clone();
1183   new_iff->set_req(1, result);
1184   proj->set_req(0, new_iff);
1185   other_proj->set_req(0, new_iff);
1186   Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
1187   register_new_node(frame, C->start());
1188   // It's impossible for the predicate to fail at runtime. Use an Halt node.
1189   Node* halt = new HaltNode(other_proj, frame);
1190   C->root()->add_req(halt);
1191   new_iff->set_req(0, prev_proj);
1192 
1193   register_control(new_iff, outer_loop->_parent, prev_proj);
1194   register_control(proj, outer_loop->_parent, new_iff);
1195   register_control(other_proj, _ltree_root, new_iff);
1196   register_control(halt, _ltree_root, other_proj);
1197   return proj;
1198 }
1199 
1200 void PhaseIdealLoop::duplicate_predicates(CountedLoopNode* pre_head, Node* castii, IdealLoopTree* outer_loop,
1201                                           LoopNode* outer_main_head, uint dd_main_head) {

1202   if (UseLoopPredicate) {
1203     Node* entry = pre_head->in(LoopNode::EntryControl);
1204     Node* predicate = NULL;
1205     predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
1206     if (predicate != NULL) {
1207       entry = skip_loop_predicates(entry);
1208     }
1209     Node* profile_predicate = NULL;
1210     if (UseProfiledLoopPredicate) {
1211       profile_predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
1212       if (profile_predicate != NULL) {
1213         entry = skip_loop_predicates(entry);
1214       }
1215     }
1216     predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
1217     duplicate_predicates_helper(predicate, castii, outer_loop, outer_main_head, dd_main_head);
1218     duplicate_predicates_helper(profile_predicate, castii, outer_loop, outer_main_head, dd_main_head);
1219   }
1220 }
1221 
1222 //------------------------------insert_pre_post_loops--------------------------
1223 // Insert pre and post loops.  If peel_only is set, the pre-loop can not have
1224 // more iterations added.  It acts as a 'peel' only, no lower-bound RCE, no
1225 // alignment.  Useful to unroll loops that do no array accesses.
1226 void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ) {
1227 
1228 #ifndef PRODUCT
1229   if (TraceLoopOpts) {
1230     if (peel_only)
1231       tty->print("PeelMainPost ");
1232     else
1233       tty->print("PreMainPost  ");
1234     loop->dump_head();
1235   }
1236 #endif
1237   C->set_major_progress();
1238 


1345                                              pre_phi->in(LoopNode::LoopBackControl),
1346                                              visited, clones);
1347       _igvn.hash_delete(main_phi);
1348       main_phi->set_req( LoopNode::EntryControl, fallpre );
1349     }
1350   }
1351 
1352   // Nodes inside the loop may be control dependent on a predicate
1353   // that was moved before the preloop. If the back branch of the main
1354   // or post loops becomes dead, those nodes won't be dependent on the
1355   // test that guards that loop nest anymore which could lead to an
1356   // incorrect array access because it executes independently of the
1357   // test that was guarding the loop nest. We add a special CastII on
1358   // the if branch that enters the loop, between the input induction
1359   // variable value and the induction variable Phi to preserve correct
1360   // dependencies.
1361 
1362   // CastII for the main loop:
1363   Node* castii = cast_incr_before_loop( pre_incr, min_taken, main_head );
1364   assert(castii != NULL, "no castII inserted");
1365   duplicate_predicates(pre_head, castii, outer_loop, outer_main_head, dd_main_head);


1366 
1367   // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
1368   // RCE and alignment may change this later.
1369   Node *cmp_end = pre_end->cmp_node();
1370   assert( cmp_end->in(2) == limit, "" );
1371   Node *pre_limit = new AddINode( init, stride );
1372 
1373   // Save the original loop limit in this Opaque1 node for
1374   // use by range check elimination.
1375   Node *pre_opaq  = new Opaque1Node(C, pre_limit, limit);
1376 
1377   register_new_node( pre_limit, pre_head->in(0) );
1378   register_new_node( pre_opaq , pre_head->in(0) );
1379 
1380   // Since no other users of pre-loop compare, I can hack limit directly
1381   assert( cmp_end->outcnt() == 1, "no other users" );
1382   _igvn.hash_delete(cmp_end);
1383   cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq);
1384 
1385   // Special case for not-equal loop bounds:


1624       _igvn.hash_delete(cur_phi);
1625       cur_phi->set_req(LoopNode::EntryControl, fallnew);
1626     }
1627   }
1628 
1629   // CastII for the new post loop:
1630   Node* castii = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
1631   assert(castii != NULL, "no castII inserted");
1632 
1633   return new_main_exit;
1634 }
1635 
1636 //------------------------------is_invariant-----------------------------
1637 // Return true if n is invariant
1638 bool IdealLoopTree::is_invariant(Node* n) const {
1639   Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n;
1640   if (n_c->is_top()) return false;
1641   return !is_member(_phase->get_loop(n_c));
1642 }
1643 











































1644 
1645 //------------------------------do_unroll--------------------------------------
1646 // Unroll the loop body one step - make each trip do 2 iterations.
1647 void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ) {
1648   assert(LoopUnrollLimit, "");
1649   CountedLoopNode *loop_head = loop->_head->as_CountedLoop();
1650   CountedLoopEndNode *loop_end = loop_head->loopexit();
1651 #ifndef PRODUCT
1652   if (PrintOpto && VerifyLoopOptimizations) {
1653     tty->print("Unrolling ");
1654     loop->dump_head();
1655   } else if (TraceLoopOpts) {
1656     if (loop_head->trip_count() < (uint)LoopUnrollLimit) {
1657       tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count());
1658     } else {
1659       tty->print("Unroll %d     ", loop_head->unrolled_count()*2);
1660     }
1661     loop->dump_head();
1662   }
1663 


1689     // graph shape is encountered, the compiler bails out loop unrolling;
1690     // compilation of the method will still succeed.
1691     if (!is_canonical_loop_entry(loop_head)) {
1692       return;
1693     }
1694     opaq = loop_head->skip_predicates()->in(0)->in(1)->in(1)->in(2);
1695     // Zero-trip test uses an 'opaque' node which is not shared.
1696     assert(opaq->outcnt() == 1 && opaq->in(1) == limit, "");
1697   }
1698 
1699   C->set_major_progress();
1700 
1701   Node* new_limit = NULL;
1702   int stride_con = stride->get_int();
1703   int stride_p = (stride_con > 0) ? stride_con : -stride_con;
1704   uint old_trip_count = loop_head->trip_count();
1705   // Verify that unroll policy result is still valid.
1706   assert(old_trip_count > 1 &&
1707       (!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity");
1708 
1709   if (UseLoopPredicate) {
1710     // Search for skeleton predicates and update them according to the new stride
1711     Node* entry = ctrl;
1712     while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) {
1713       IfNode* iff = entry->in(0)->as_If();
1714       ProjNode* proj = iff->proj_out(1 - entry->as_Proj()->_con);
1715       if (proj->unique_ctrl_out()->Opcode() != Op_Halt) {
1716         break;
1717       }
1718       if (iff->in(1)->Opcode() == Op_Opaque4) {
1719         // Compute the value of the loop induction variable at the end of the
1720         // first iteration of the unrolled loop: init + new_stride_con - init_inc
1721         int init_inc = stride_con/loop_head->unrolled_count();
1722         assert(init_inc != 0, "invalid loop increment");
1723         int new_stride_con = stride_con * 2;
1724         Node* max_value = _igvn.intcon(new_stride_con - init_inc);
1725         max_value = new AddINode(init, max_value);
1726         register_new_node(max_value, get_ctrl(iff->in(1)));
1727         update_skeleton_predicate(iff, max_value);
1728       }
1729       entry = entry->in(0)->in(0);
1730     }
1731   }
1732 
1733   // Adjust loop limit to keep valid iterations number after unroll.
1734   // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride
1735   // which may overflow.
1736   if (!adjust_min_trip) {
1737     assert(old_trip_count > 1 && (old_trip_count & 1) == 0,
1738         "odd trip count for maximally unroll");
1739     // Don't need to adjust limit for maximally unroll since trip count is even.
1740   } else if (loop_head->has_exact_trip_count() && init->is_Con()) {
1741     // Loop's limit is constant. Loop's init could be constant when pre-loop
1742     // become peeled iteration.
1743     jlong init_con = init->get_int();
1744     // We can keep old loop limit if iterations count stays the same:
1745     //   old_trip_count == new_trip_count * 2
1746     // Note: since old_trip_count >= 2 then new_trip_count >= 1
1747     // so we also don't need to adjust zero trip test.
1748     jlong limit_con  = limit->get_int();
1749     // (stride_con*2) not overflow since stride_con <= 8.
1750     int new_stride_con = stride_con * 2;
1751     int stride_m    = new_stride_con - (stride_con > 0 ? 1 : -1);


2279         register_new_node(offset, ctrl_off);
2280         *p_offset = offset;
2281       }
2282       return true;
2283     }
2284     if (is_scaled_iv(exp->in(2), iv, p_scale)) {
2285       if (p_offset != NULL) {
2286         *p_scale *= -1;
2287         *p_offset = exp->in(1);
2288       }
2289       return true;
2290     }
2291   }
2292   return false;
2293 }
2294 
2295 // Same as PhaseIdealLoop::duplicate_predicates() but for range checks
2296 // eliminated by iteration splitting.
2297 Node* PhaseIdealLoop::add_range_check_predicate(IdealLoopTree* loop, CountedLoopNode* cl,
2298                                                 Node* predicate_proj, int scale_con, Node* offset,
2299                                                 Node* limit, jint stride_con) {
2300   bool overflow = false;
2301   BoolNode* bol = rc_predicate(loop, predicate_proj, scale_con, offset, cl->init_trip(), NULL, stride_con, limit, (stride_con > 0) != (scale_con > 0), overflow);
2302   Node* opaque_bol = new Opaque4Node(C, bol, _igvn.intcon(1));
2303   register_new_node(opaque_bol, predicate_proj);
2304   IfNode* new_iff = NULL;
2305   if (overflow) {
2306     new_iff = new IfNode(predicate_proj, opaque_bol, PROB_MAX, COUNT_UNKNOWN);
2307   } else {
2308     new_iff = new RangeCheckNode(predicate_proj, opaque_bol, PROB_MAX, COUNT_UNKNOWN);
2309   }
2310   register_control(new_iff, loop->_parent, predicate_proj);
2311   Node* iffalse = new IfFalseNode(new_iff);
2312   register_control(iffalse, _ltree_root, new_iff);
2313   ProjNode* iftrue = new IfTrueNode(new_iff);
2314   register_control(iftrue, loop->_parent, new_iff);
2315   Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
2316   register_new_node(frame, C->start());
2317   Node* halt = new HaltNode(iffalse, frame);
2318   register_control(halt, _ltree_root, iffalse);
2319   C->root()->add_req(halt);
2320   return iftrue;
2321 }


2480 #ifdef ASSERT
2481       if (TraceRangeLimitCheck) {
2482         tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2483         bol->dump(2);
2484       }
2485 #endif
2486       // At this point we have the expression as:
2487       //   scale_con * trip_counter + offset :: limit
2488       // where scale_con, offset and limit are loop invariant.  Trip_counter
2489       // monotonically increases by stride_con, a constant.  Both (or either)
2490       // stride_con and scale_con can be negative which will flip about the
2491       // sense of the test.
2492 
2493       // Adjust pre and main loop limits to guard the correct iteration set
2494       if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
2495         if( b_test._test == BoolTest::lt ) { // Range checks always use lt
2496           // The underflow and overflow limits: 0 <= scale*I+offset < limit
2497           add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
2498           // (0-offset)/scale could be outside of loop iterations range.
2499           conditional_rc = true;
2500           predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, offset, limit, stride_con);
















2501         } else {
2502           if (PrintOpto) {
2503             tty->print_cr("missed RCE opportunity");
2504           }
2505           continue;             // In release mode, ignore it
2506         }
2507       } else {                  // Otherwise work on normal compares
2508         switch( b_test._test ) {
2509         case BoolTest::gt:
2510           // Fall into GE case
2511         case BoolTest::ge:
2512           // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2513           scale_con = -scale_con;
2514           offset = new SubINode( zero, offset );
2515           register_new_node( offset, pre_ctrl );
2516           limit  = new SubINode( zero, limit );
2517           register_new_node( limit, pre_ctrl );
2518           // Fall into LE case
2519         case BoolTest::le:
2520           if (b_test._test != BoolTest::gt) {




1050   castii->set_req(0, ctrl);
1051   register_new_node(castii, ctrl);
1052   for (DUIterator_Fast imax, i = incr->fast_outs(imax); i < imax; i++) {
1053     Node* n = incr->fast_out(i);
1054     if (n->is_Phi() && n->in(0) == loop) {
1055       int nrep = n->replace_edge(incr, castii);
1056       return castii;
1057     }
1058   }
1059   return NULL;
1060 }
1061 
1062 // Make a copy of the skeleton range check predicates before the main
1063 // loop and set the initial value of loop as input. After unrolling,
1064 // the range of values for the induction variable in the main loop can
1065 // fall outside the allowed range of values by the array access (main
1066 // loop is never executed). When that happens, range check
1067 // CastII/ConvI2L nodes cause some data paths to die. For consistency,
1068 // the control paths must die too but the range checks were removed by
1069 // predication. The range checks that we add here guarantee that they do.
1070 void PhaseIdealLoop::duplicate_predicates_helper(Node* predicate, Node* start, Node* end,
1071                                                  IdealLoopTree* outer_loop, LoopNode* outer_main_head,
1072                                                  uint dd_main_head) {
1073   if (predicate != NULL) {
1074     IfNode* iff = predicate->in(0)->as_If();
1075     ProjNode* uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
1076     Node* rgn = uncommon_proj->unique_ctrl_out();
1077     assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
1078     assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape");
1079     predicate = iff->in(0);
1080     Node* current_proj = outer_main_head->in(LoopNode::EntryControl);
1081     Node* prev_proj = current_proj;
1082     while (predicate != NULL && predicate->is_Proj() && predicate->in(0)->is_If()) {
1083       iff = predicate->in(0)->as_If();
1084       uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
1085       if (uncommon_proj->unique_ctrl_out() != rgn)
1086         break;
1087       if (iff->in(1)->Opcode() == Op_Opaque4) {
1088         assert(skeleton_predicate_has_opaque(iff), "unexpected");
1089         // Clone the predicate twice and initialize one with the initial
1090         // value of the loop induction variable. Leave the other predicate
1091         // to be initialized when increasing the stride during loop unrolling.
1092         prev_proj = clone_skeleton_predicate(iff, start, predicate, uncommon_proj, current_proj, outer_loop, prev_proj);
1093         assert(skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()) == (start->Opcode() == Op_Opaque1), "");
1094         prev_proj = clone_skeleton_predicate(iff, end, predicate, uncommon_proj, current_proj, outer_loop, prev_proj);
1095         assert(skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()) == (end->Opcode() == Op_Opaque1), "");
1096         // Remove the skeleton predicate from the pre-loop
1097         _igvn.replace_input_of(iff, 1, _igvn.intcon(1));
1098       }
1099       predicate = predicate->in(0)->in(0);
1100     }
1101     _igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
1102     set_idom(outer_main_head, prev_proj, dd_main_head);
1103   }
1104 }
1105 
1106 static bool skeleton_follow_inputs(Node* n, int op) {
1107   return (n->is_Bool() ||
1108           n->is_Cmp() ||
1109           op == Op_AndL ||
1110           op == Op_OrL ||
1111           op == Op_RShiftL ||
1112           op == Op_LShiftL ||
1113           op == Op_AddL ||
1114           op == Op_AddI ||
1115           op == Op_MulL ||
1116           op == Op_MulI ||
1117           op == Op_SubL ||
1118           op == Op_SubI ||
1119           op == Op_ConvI2L);
1120 }
1121     
1122 bool PhaseIdealLoop::skeleton_predicate_has_opaque(IfNode* iff) {
1123   ResourceMark rm;
1124   Unique_Node_List wq;
1125   wq.push(iff->in(1)->in(1));
1126   for (uint i = 0; i < wq.size(); i++) {
1127     Node* n = wq.at(i);
1128     int op = n->Opcode();
1129     if (skeleton_follow_inputs(n, op)) {
1130       for (uint j = 1; j < n->req(); j++) {
1131         Node* m = n->in(j);
1132         if (m != NULL) {
1133           wq.push(m);
1134         }
1135       }
1136       continue;
1137     }
1138     if (op == Op_Opaque1) {
1139       return true;
1140     }
1141   }
1142   return false;
1143 }
1144 
1145 Node* PhaseIdealLoop::clone_skeleton_predicate(Node* iff, Node* value, Node* predicate, Node* uncommon_proj,
1146                                                Node* current_proj, IdealLoopTree* outer_loop, Node* prev_proj) {

1147   Node_Stack to_clone(2);
1148   to_clone.push(iff->in(1), 1);
1149   uint current = C->unique();
1150   Node* result = NULL;
1151   // Look for the opaque node to replace with the new value
1152   // and clone everything in between. We keep the Opaque4 node
1153   // so the duplicated predicates are eliminated once loop
1154   // opts are over: they are here only to keep the IR graph
1155   // consistent.
1156   do {
1157     Node* n = to_clone.node();
1158     uint i = to_clone.index();
1159     Node* m = n->in(i);
1160     int op = m->Opcode();
1161     if (skeleton_follow_inputs(m, op)) {












1162         to_clone.push(m, 1);
1163         continue;
1164     }
1165     if (op == Op_Opaque1) {





1166       if (n->_idx < current) {
1167         n = n->clone();
1168       }
1169       n->set_req(i, value);
1170       register_new_node(n, current_proj);
1171       to_clone.set_node(n);
1172     }
1173     for (;;) {
1174       Node* cur = to_clone.node();
1175       uint j = to_clone.index();
1176       if (j+1 < cur->req()) {
1177         to_clone.set_index(j+1);
1178         break;
1179       }
1180       to_clone.pop();
1181       if (to_clone.size() == 0) {
1182         result = cur;
1183         break;
1184       }
1185       Node* next = to_clone.node();
1186       j = to_clone.index();
1187       if (next->in(j) != cur) {
1188         assert(cur->_idx >= current || next->in(j)->Opcode() == Op_Opaque1, "new node or Opaque1 being replaced");
1189         if (next->_idx < current) {
1190           next = next->clone();
1191           register_new_node(next, current_proj);
1192           to_clone.set_node(next);
1193         }

1194         next->set_req(j, cur);
1195       }
1196     }
1197   } while (result == NULL);



1198   assert(result->_idx >= current, "new node expected");
1199 
1200   Node* proj = predicate->clone();
1201   Node* other_proj = uncommon_proj->clone();
1202   Node* new_iff = iff->clone();
1203   new_iff->set_req(1, result);
1204   proj->set_req(0, new_iff);
1205   other_proj->set_req(0, new_iff);
1206   Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
1207   register_new_node(frame, C->start());
1208   // It's impossible for the predicate to fail at runtime. Use an Halt node.
1209   Node* halt = new HaltNode(other_proj, frame);
1210   C->root()->add_req(halt);
1211   new_iff->set_req(0, prev_proj);
1212 
1213   register_control(new_iff, outer_loop->_parent, prev_proj);
1214   register_control(proj, outer_loop->_parent, new_iff);
1215   register_control(other_proj, _ltree_root, new_iff);
1216   register_control(halt, _ltree_root, other_proj);
1217   return proj;
1218 }
1219 
1220 void PhaseIdealLoop::duplicate_predicates(CountedLoopNode* pre_head, Node* start, Node* end,
1221                                           IdealLoopTree* outer_loop, LoopNode* outer_main_head,
1222                                           uint dd_main_head) {
1223   if (UseLoopPredicate) {
1224     Node* entry = pre_head->in(LoopNode::EntryControl);
1225     Node* predicate = NULL;
1226     predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
1227     if (predicate != NULL) {
1228       entry = skip_loop_predicates(entry);
1229     }
1230     Node* profile_predicate = NULL;
1231     if (UseProfiledLoopPredicate) {
1232       profile_predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
1233       if (profile_predicate != NULL) {
1234         entry = skip_loop_predicates(entry);
1235       }
1236     }
1237     predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
1238     duplicate_predicates_helper(predicate, start, end, outer_loop, outer_main_head, dd_main_head);
1239     duplicate_predicates_helper(profile_predicate, start, end, outer_loop, outer_main_head, dd_main_head);
1240   }
1241 }
1242 
1243 //------------------------------insert_pre_post_loops--------------------------
1244 // Insert pre and post loops.  If peel_only is set, the pre-loop can not have
1245 // more iterations added.  It acts as a 'peel' only, no lower-bound RCE, no
1246 // alignment.  Useful to unroll loops that do no array accesses.
1247 void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ) {
1248 
1249 #ifndef PRODUCT
1250   if (TraceLoopOpts) {
1251     if (peel_only)
1252       tty->print("PeelMainPost ");
1253     else
1254       tty->print("PreMainPost  ");
1255     loop->dump_head();
1256   }
1257 #endif
1258   C->set_major_progress();
1259 


1366                                              pre_phi->in(LoopNode::LoopBackControl),
1367                                              visited, clones);
1368       _igvn.hash_delete(main_phi);
1369       main_phi->set_req( LoopNode::EntryControl, fallpre );
1370     }
1371   }
1372 
1373   // Nodes inside the loop may be control dependent on a predicate
1374   // that was moved before the preloop. If the back branch of the main
1375   // or post loops becomes dead, those nodes won't be dependent on the
1376   // test that guards that loop nest anymore which could lead to an
1377   // incorrect array access because it executes independently of the
1378   // test that was guarding the loop nest. We add a special CastII on
1379   // the if branch that enters the loop, between the input induction
1380   // variable value and the induction variable Phi to preserve correct
1381   // dependencies.
1382 
1383   // CastII for the main loop:
1384   Node* castii = cast_incr_before_loop( pre_incr, min_taken, main_head );
1385   assert(castii != NULL, "no castII inserted");
1386   Node* opaque_castii = new Opaque1Node(C, castii);
1387   register_new_node(opaque_castii, outer_main_head->in(LoopNode::EntryControl));
1388   duplicate_predicates(pre_head, castii, opaque_castii, outer_loop, outer_main_head, dd_main_head);
1389 
1390   // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
1391   // RCE and alignment may change this later.
1392   Node *cmp_end = pre_end->cmp_node();
1393   assert( cmp_end->in(2) == limit, "" );
1394   Node *pre_limit = new AddINode( init, stride );
1395 
1396   // Save the original loop limit in this Opaque1 node for
1397   // use by range check elimination.
1398   Node *pre_opaq  = new Opaque1Node(C, pre_limit, limit);
1399 
1400   register_new_node( pre_limit, pre_head->in(0) );
1401   register_new_node( pre_opaq , pre_head->in(0) );
1402 
1403   // Since no other users of pre-loop compare, I can hack limit directly
1404   assert( cmp_end->outcnt() == 1, "no other users" );
1405   _igvn.hash_delete(cmp_end);
1406   cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq);
1407 
1408   // Special case for not-equal loop bounds:


1647       _igvn.hash_delete(cur_phi);
1648       cur_phi->set_req(LoopNode::EntryControl, fallnew);
1649     }
1650   }
1651 
1652   // CastII for the new post loop:
1653   Node* castii = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
1654   assert(castii != NULL, "no castII inserted");
1655 
1656   return new_main_exit;
1657 }
1658 
1659 //------------------------------is_invariant-----------------------------
1660 // Return true if n is invariant
1661 bool IdealLoopTree::is_invariant(Node* n) const {
1662   Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n;
1663   if (n_c->is_top()) return false;
1664   return !is_member(_phase->get_loop(n_c));
1665 }
1666 
1667 void PhaseIdealLoop::update_skeleton_predicates(Node* ctrl, CountedLoopNode* loop_head, Node* init, int stride_con) {
1668   // Search for skeleton predicates and update them according to the new stride
1669   Node* entry = ctrl;
1670   Node* prev_proj = ctrl;
1671   LoopNode* outer_loop_head = loop_head->skip_strip_mined();
1672   IdealLoopTree* outer_loop = get_loop(outer_loop_head);
1673   while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) {
1674     IfNode* iff = entry->in(0)->as_If();
1675     ProjNode* proj = iff->proj_out(1 - entry->as_Proj()->_con);
1676     if (proj->unique_ctrl_out()->Opcode() != Op_Halt) {
1677       break;
1678     }
1679     if (iff->in(1)->Opcode() == Op_Opaque4) {
1680       // Look for predicate with an Opaque1 node that can be used as a template
1681       if (!skeleton_predicate_has_opaque(iff)) {
1682         // No Opaque1 node? It's either the check for the first value
1683         // of the first iteration or the check for the last value of
1684         // the first iteration of an unrolled loop. We can't
1685         // tell. Kill it in any case.
1686         _igvn.replace_input_of(iff, 1, iff->in(1)->in(2));
1687       } else {
1688         // Add back the predicate for the value at the beginning of the first entry
1689         prev_proj = clone_skeleton_predicate(iff, init, entry, proj, ctrl, outer_loop, prev_proj);
1690         assert(!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "unexpected");
1691         // Compute the value of the loop induction variable at the end of the
1692         // first iteration of the unrolled loop: init + new_stride_con - init_inc
1693         int init_inc = stride_con/loop_head->unrolled_count();
1694         assert(init_inc != 0, "invalid loop increment");
1695         int new_stride_con = stride_con * 2;
1696         Node* max_value = _igvn.intcon(new_stride_con - init_inc);
1697         max_value = new AddINode(init, max_value);
1698         register_new_node(max_value, get_ctrl(iff->in(1)));
1699         prev_proj = clone_skeleton_predicate(iff, max_value, entry, proj, ctrl, outer_loop, prev_proj);
1700         assert(!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "unexpected");
1701       }
1702     }
1703     entry = entry->in(0)->in(0);
1704   }
1705   if (prev_proj != ctrl) {
1706     _igvn.replace_input_of(outer_loop_head, LoopNode::EntryControl, prev_proj);
1707     set_idom(outer_loop_head, prev_proj, dom_depth(outer_loop_head));
1708   }
1709 }
1710 
1711 //------------------------------do_unroll--------------------------------------
1712 // Unroll the loop body one step - make each trip do 2 iterations.
1713 void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ) {
1714   assert(LoopUnrollLimit, "");
1715   CountedLoopNode *loop_head = loop->_head->as_CountedLoop();
1716   CountedLoopEndNode *loop_end = loop_head->loopexit();
1717 #ifndef PRODUCT
1718   if (PrintOpto && VerifyLoopOptimizations) {
1719     tty->print("Unrolling ");
1720     loop->dump_head();
1721   } else if (TraceLoopOpts) {
1722     if (loop_head->trip_count() < (uint)LoopUnrollLimit) {
1723       tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count());
1724     } else {
1725       tty->print("Unroll %d     ", loop_head->unrolled_count()*2);
1726     }
1727     loop->dump_head();
1728   }
1729 


1755     // graph shape is encountered, the compiler bails out loop unrolling;
1756     // compilation of the method will still succeed.
1757     if (!is_canonical_loop_entry(loop_head)) {
1758       return;
1759     }
1760     opaq = loop_head->skip_predicates()->in(0)->in(1)->in(1)->in(2);
1761     // Zero-trip test uses an 'opaque' node which is not shared.
1762     assert(opaq->outcnt() == 1 && opaq->in(1) == limit, "");
1763   }
1764 
1765   C->set_major_progress();
1766 
1767   Node* new_limit = NULL;
1768   int stride_con = stride->get_int();
1769   int stride_p = (stride_con > 0) ? stride_con : -stride_con;
1770   uint old_trip_count = loop_head->trip_count();
1771   // Verify that unroll policy result is still valid.
1772   assert(old_trip_count > 1 &&
1773       (!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity");
1774 
1775   update_skeleton_predicates(ctrl, loop_head, init, stride_con);






















1776   
1777   // Adjust loop limit to keep valid iterations number after unroll.
1778   // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride
1779   // which may overflow.
1780   if (!adjust_min_trip) {
1781     assert(old_trip_count > 1 && (old_trip_count & 1) == 0,
1782         "odd trip count for maximally unroll");
1783     // Don't need to adjust limit for maximally unroll since trip count is even.
1784   } else if (loop_head->has_exact_trip_count() && init->is_Con()) {
1785     // Loop's limit is constant. Loop's init could be constant when pre-loop
1786     // become peeled iteration.
1787     jlong init_con = init->get_int();
1788     // We can keep old loop limit if iterations count stays the same:
1789     //   old_trip_count == new_trip_count * 2
1790     // Note: since old_trip_count >= 2 then new_trip_count >= 1
1791     // so we also don't need to adjust zero trip test.
1792     jlong limit_con  = limit->get_int();
1793     // (stride_con*2) not overflow since stride_con <= 8.
1794     int new_stride_con = stride_con * 2;
1795     int stride_m    = new_stride_con - (stride_con > 0 ? 1 : -1);


2323         register_new_node(offset, ctrl_off);
2324         *p_offset = offset;
2325       }
2326       return true;
2327     }
2328     if (is_scaled_iv(exp->in(2), iv, p_scale)) {
2329       if (p_offset != NULL) {
2330         *p_scale *= -1;
2331         *p_offset = exp->in(1);
2332       }
2333       return true;
2334     }
2335   }
2336   return false;
2337 }
2338 
2339 // Same as PhaseIdealLoop::duplicate_predicates() but for range checks
2340 // eliminated by iteration splitting.
2341 Node* PhaseIdealLoop::add_range_check_predicate(IdealLoopTree* loop, CountedLoopNode* cl,
2342                                                 Node* predicate_proj, int scale_con, Node* offset,
2343                                                 Node* limit, jint stride_con, Node* value) {
2344   bool overflow = false;
2345   BoolNode* bol = rc_predicate(loop, predicate_proj, scale_con, offset, value, NULL, stride_con, limit, (stride_con > 0) != (scale_con > 0), overflow);
2346   Node* opaque_bol = new Opaque4Node(C, bol, _igvn.intcon(1));
2347   register_new_node(opaque_bol, predicate_proj);
2348   IfNode* new_iff = NULL;
2349   if (overflow) {
2350     new_iff = new IfNode(predicate_proj, opaque_bol, PROB_MAX, COUNT_UNKNOWN);
2351   } else {
2352     new_iff = new RangeCheckNode(predicate_proj, opaque_bol, PROB_MAX, COUNT_UNKNOWN);
2353   }
2354   register_control(new_iff, loop->_parent, predicate_proj);
2355   Node* iffalse = new IfFalseNode(new_iff);
2356   register_control(iffalse, _ltree_root, new_iff);
2357   ProjNode* iftrue = new IfTrueNode(new_iff);
2358   register_control(iftrue, loop->_parent, new_iff);
2359   Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
2360   register_new_node(frame, C->start());
2361   Node* halt = new HaltNode(iffalse, frame);
2362   register_control(halt, _ltree_root, iffalse);
2363   C->root()->add_req(halt);
2364   return iftrue;
2365 }


2524 #ifdef ASSERT
2525       if (TraceRangeLimitCheck) {
2526         tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2527         bol->dump(2);
2528       }
2529 #endif
2530       // At this point we have the expression as:
2531       //   scale_con * trip_counter + offset :: limit
2532       // where scale_con, offset and limit are loop invariant.  Trip_counter
2533       // monotonically increases by stride_con, a constant.  Both (or either)
2534       // stride_con and scale_con can be negative which will flip about the
2535       // sense of the test.
2536 
2537       // Adjust pre and main loop limits to guard the correct iteration set
2538       if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
2539         if( b_test._test == BoolTest::lt ) { // Range checks always use lt
2540           // The underflow and overflow limits: 0 <= scale*I+offset < limit
2541           add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
2542           // (0-offset)/scale could be outside of loop iterations range.
2543           conditional_rc = true;
2544           Node* init = cl->init_trip();
2545           Node* opaque_init = new Opaque1Node(C, init);
2546           register_new_node(opaque_init, predicate_proj);
2547           // template predicate so it can be updated on next unrolling
2548           predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, offset, limit, stride_con, opaque_init);
2549           assert(skeleton_predicate_has_opaque(predicate_proj->in(0)->as_If()), "unexpected");
2550           // predicate on first value of first iteration
2551           predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, offset, limit, stride_con, init);
2552           assert(!skeleton_predicate_has_opaque(predicate_proj->in(0)->as_If()), "unexpected");
2553           int init_inc = stride_con/cl->unrolled_count();
2554           assert(init_inc != 0, "invalid loop increment");
2555           Node* max_value = _igvn.intcon(stride_con - init_inc);
2556           max_value = new AddINode(init, max_value);
2557           register_new_node(max_value, predicate_proj);
2558           // predicate on last value of first iteration (in case unrolling has already happened)
2559           predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, offset, limit, stride_con, max_value);
2560           assert(!skeleton_predicate_has_opaque(predicate_proj->in(0)->as_If()), "unexpected");
2561         } else {
2562           if (PrintOpto) {
2563             tty->print_cr("missed RCE opportunity");
2564           }
2565           continue;             // In release mode, ignore it
2566         }
2567       } else {                  // Otherwise work on normal compares
2568         switch( b_test._test ) {
2569         case BoolTest::gt:
2570           // Fall into GE case
2571         case BoolTest::ge:
2572           // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2573           scale_con = -scale_con;
2574           offset = new SubINode( zero, offset );
2575           register_new_node( offset, pre_ctrl );
2576           limit  = new SubINode( zero, limit );
2577           register_new_node( limit, pre_ctrl );
2578           // Fall into LE case
2579         case BoolTest::le:
2580           if (b_test._test != BoolTest::gt) {


< prev index next >