269 // (x + inv2) - inv1 => (-inv1 + inv2) + x
270 // (x - inv2) + inv1 => ( inv1 - inv2) + x
271 // (x - inv2) - inv1 => (-inv1 - inv2) + x
272 // inv1 + (inv2 - x) => ( inv1 + inv2) - x
273 // inv1 - (x - inv2) => ( inv1 + inv2) - x
274 // (inv2 - x) + inv1 => ( inv1 + inv2) - x
275 // (inv2 - x) - inv1 => (-inv1 + inv2) - x
276 // inv1 - (x + inv2) => ( inv1 - inv2) - x
277 //
278 Node* IdealLoopTree::reassociate_add_sub(Node* n1, PhaseIdealLoop *phase) {
279 if ((!n1->is_Add() && !n1->is_Sub()) || n1->outcnt() == 0) return NULL;
280 if (is_invariant(n1)) return NULL;
281 int inv1_idx = is_invariant_addition(n1, phase);
282 if (!inv1_idx) return NULL;
283 // Don't mess with add of constant (igvn moves them to expression tree root.)
284 if (n1->is_Add() && n1->in(2)->is_Con()) return NULL;
285 Node* inv1 = n1->in(inv1_idx);
286 Node* n2 = n1->in(3 - inv1_idx);
287 int inv2_idx = is_invariant_addition(n2, phase);
288 if (!inv2_idx) return NULL;
289 Node* x = n2->in(3 - inv2_idx);
290 Node* inv2 = n2->in(inv2_idx);
291
292 bool neg_x = n2->is_Sub() && inv2_idx == 1;
293 bool neg_inv2 = n2->is_Sub() && inv2_idx == 2;
294 bool neg_inv1 = n1->is_Sub() && inv1_idx == 2;
295 if (n1->is_Sub() && inv1_idx == 1) {
296 neg_x = !neg_x;
297 neg_inv2 = !neg_inv2;
298 }
299 Node* inv1_c = phase->get_ctrl(inv1);
300 Node* inv2_c = phase->get_ctrl(inv2);
301 Node* n_inv1;
302 if (neg_inv1) {
303 Node *zero = phase->_igvn.intcon(0);
304 phase->set_ctrl(zero, phase->C->root());
305 n_inv1 = new SubINode(zero, inv1);
306 phase->register_new_node(n_inv1, inv1_c);
307 } else {
308 n_inv1 = inv1;
320 addx = new SubINode(inv, x);
321 } else {
322 addx = new AddINode(x, inv);
323 }
324 phase->register_new_node(addx, phase->get_ctrl(x));
325 phase->_igvn.replace_node(n1, addx);
326 assert(phase->get_loop(phase->get_ctrl(n1)) == this, "");
327 _body.yank(n1);
328 return addx;
329 }
330
331 //---------------------reassociate_invariants-----------------------------
332 // Reassociate invariant expressions:
333 void IdealLoopTree::reassociate_invariants(PhaseIdealLoop *phase) {
334 for (int i = _body.size() - 1; i >= 0; i--) {
335 Node *n = _body.at(i);
336 for (int j = 0; j < 5; j++) {
337 Node* nn = reassociate_add_sub(n, phase);
338 if (nn == NULL) break;
339 n = nn; // again
340 };
341 }
342 }
343
344 //------------------------------policy_peeling---------------------------------
345 // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can
346 // make some loop-invariant test (usually a null-check) happen before the loop.
347 bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) const {
348 IdealLoopTree *loop = (IdealLoopTree*)this;
349
350 // If nodes are depleted, some transform has miscalculated its needs.
351 assert(!phase->exceeding_node_budget(), "sanity");
352
353 uint body_size = loop->_body.size();
354 // Peeling does loop cloning which can result in O(N^2) node construction
355 if (body_size > 255) {
356 return false; // Prevent overflow for large body size
357 }
358 uint estimate = body_size * body_size;
359 if (phase->exceeding_node_budget(estimate)) {
360 return false; // Too large to safely clone
361 }
362
363 // check for vectorized loops, any peeling done was already applied
364 if (_head->is_CountedLoop()) {
365 CountedLoopNode* cl = _head->as_CountedLoop();
366 if (cl->is_unroll_only() || cl->trip_count() == 1) {
367 return false;
368 }
369 }
370
371 Node* test = loop->tail();
372
373 while (test != _head) { // Scan till run off top of loop
374 if (test->is_If()) { // Test?
375 Node *ctrl = phase->get_ctrl(test->in(1));
376 if (ctrl->is_top()) {
377 return false; // Found dead test on live IF? No peeling!
378 }
379 // Standard IF only has one input value to check for loop invariance
380 assert(test->Opcode() == Op_If ||
381 test->Opcode() == Op_CountedLoopEnd ||
382 test->Opcode() == Op_RangeCheck,
383 "Check this code when new subtype is added");
384 // Condition is not a member of this loop?
385 if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) {
386 // Found reason to peel!
387 return phase->may_require_nodes(estimate);
388 }
389 }
390 // Walk up dominators to loop _head looking for test which is
391 // executed on every path thru loop.
392 test = phase->idom(test);
393 }
394 return false;
395 }
396
397 //------------------------------peeled_dom_test_elim---------------------------
398 // If we got the effect of peeling, either by actually peeling or by making
399 // a pre-loop which must execute at least once, we can remove all
400 // loop-invariant dominated tests in the main body.
401 void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree *loop, Node_List &old_new) {
402 bool progress = true;
403 while (progress) {
404 progress = false; // Reset for next iteration
405 Node *prev = loop->_head->in(LoopNode::LoopBackControl);//loop->tail();
406 Node *test = prev->in(0);
407 while (test != loop->_head) { // Scan till run off top of loop
408
409 int p_op = prev->Opcode();
410 if ((p_op == Op_IfFalse || p_op == Op_IfTrue) &&
411 test->is_If() && // Test?
412 !test->in(1)->is_Con() && // And not already obvious?
413 // Condition is not a member of this loop?
414 !loop->is_member(get_loop(get_ctrl(test->in(1))))){
621 new_exit_value = old->in(LoopNode::LoopBackControl);
622 _igvn.hash_delete(old);
623 old->set_req(LoopNode::EntryControl, new_exit_value);
624 }
625 }
626
627
628 // Step 3: Cut the backedge on the clone (so its not a loop) and remove the
629 // extra backedge user.
630 Node* new_head = old_new[head->_idx];
631 _igvn.hash_delete(new_head);
632 new_head->set_req(LoopNode::LoopBackControl, C->top());
633 for (DUIterator_Fast j2max, j2 = new_head->fast_outs(j2max); j2 < j2max; j2++) {
634 Node* use = new_head->fast_out(j2);
635 if (use->in(0) == new_head && use->req() == 3 && use->is_Phi()) {
636 _igvn.hash_delete(use);
637 use->set_req(LoopNode::LoopBackControl, C->top());
638 }
639 }
640
641
642 // Step 4: Correct dom-depth info. Set to loop-head depth.
643 int dd = dom_depth(head);
644 set_idom(head, head->in(1), dd);
645 for (uint j3 = 0; j3 < loop->_body.size(); j3++) {
646 Node *old = loop->_body.at(j3);
647 Node *nnn = old_new[old->_idx];
648 if (!has_ctrl(nnn)) {
649 set_idom(nnn, idom(nnn), dd-1);
650 }
651 }
652
653 // Now force out all loop-invariant dominating tests. The optimizer
654 // finds some, but we _know_ they are all useless.
655 peeled_dom_test_elim(loop,old_new);
656
657 loop->record_for_igvn();
658 }
659
660 #define EMPTY_LOOP_SIZE 7 // number of nodes in an empty loop
661
662 //------------------------------policy_maximally_unroll------------------------
663 // Calculate exact loop trip count and return true if loop can be maximally
664 // unrolled.
665 bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop *phase) const {
666 CountedLoopNode *cl = _head->as_CountedLoop();
667 assert(cl->is_normal_loop(), "");
668 if (!cl->is_valid_counted_loop()) {
669 return false; // Malformed counted loop
670 }
671 if (!cl->has_exact_trip_count()) {
672 // Trip count is not exact.
673 return false;
674 }
675
676 uint trip_count = cl->trip_count();
677 // Note, max_juint is used to indicate unknown trip count.
678 assert(trip_count > 1, "one iteration loop should be optimized out already");
679 assert(trip_count < max_juint, "exact trip_count should be less than max_uint.");
680
681 // If nodes are depleted, some transform has miscalculated its needs.
682 assert(!phase->exceeding_node_budget(), "sanity");
683
684 // Real policy: if we maximally unroll, does it get too big?
685 // Allow the unrolled mess to get larger than standard loop
686 // size. After all, it will no longer be a loop.
687 uint body_size = _body.size();
688 uint unroll_limit = (uint)LoopUnrollLimit * 4;
689 assert((intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
690 if (trip_count > unroll_limit || body_size > unroll_limit) {
691 return false;
692 }
693
694 // Take into account that after unroll conjoined heads and tails will fold,
695 // otherwise policy_unroll() may allow more unrolling than max unrolling.
696 uint new_body_size = est_loop_clone_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
697
698 if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
699 return false;
700 }
701
702 // Fully unroll a loop with few iterations regardless next conditions since
703 // following loop optimizations will split such loop anyway (pre-main-post).
704 if (trip_count <= 3) {
705 return phase->may_require_nodes(new_body_size);
706 }
707
708 if (new_body_size > unroll_limit ||
709 // Unrolling can result in a large amount of node construction
710 phase->exceeding_node_budget(new_body_size)) {
711 return false;
712 }
713
714 // Do not unroll a loop with String intrinsics code.
715 // String intrinsics are large and have loops.
716 for (uint k = 0; k < _body.size(); k++) {
725 case Op_HasNegatives: {
726 return false;
727 }
728 #if INCLUDE_RTM_OPT
729 case Op_FastLock:
730 case Op_FastUnlock: {
731 // Don't unroll RTM locking code because it is large.
732 if (UseRTMLocking) {
733 return false;
734 }
735 }
736 #endif
737 } // switch
738 }
739
740 return phase->may_require_nodes(new_body_size);
741 }
742
743
744 //------------------------------policy_unroll----------------------------------
745 // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if the
746 // loop is a CountedLoop and the body is small enough.
747 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
748
749 CountedLoopNode *cl = _head->as_CountedLoop();
750 assert(cl->is_normal_loop() || cl->is_main_loop(), "");
751
752 if (!cl->is_valid_counted_loop()) {
753 return false; // Malformed counted loop
754 }
755
756 // If nodes are depleted, some transform has miscalculated its needs.
757 assert(!phase->exceeding_node_budget(), "sanity");
758
759 // Protect against over-unrolling.
760 // After split at least one iteration will be executed in pre-loop.
761 if (cl->trip_count() <= (cl->is_normal_loop() ? 2u : 1u)) {
762 return false;
763 }
764 _local_loop_unroll_limit = LoopUnrollLimit;
765 _local_loop_unroll_factor = 4;
766 int future_unroll_cnt = cl->unrolled_count() * 2;
870 if (UseSuperWord) {
871 if (!cl->is_reduction_loop()) {
872 phase->mark_reductions(this);
873 }
874
875 // Only attempt slp analysis when user controls do not prohibit it
876 if (LoopMaxUnroll > _local_loop_unroll_factor) {
877 // Once policy_slp_analysis succeeds, mark the loop with the
878 // maximal unroll factor so that we minimize analysis passes
879 if (future_unroll_cnt >= _local_loop_unroll_factor) {
880 policy_unroll_slp_analysis(cl, phase, future_unroll_cnt);
881 }
882 }
883 }
884
885 int slp_max_unroll_factor = cl->slp_max_unroll();
886 if ((LoopMaxUnroll < slp_max_unroll_factor) && FLAG_IS_DEFAULT(LoopMaxUnroll) && UseSubwordForMaxVector) {
887 LoopMaxUnroll = slp_max_unroll_factor;
888 }
889
890 uint estimate = est_loop_clone_sz(2, body_size);
891
892 if (cl->has_passed_slp()) {
893 if (slp_max_unroll_factor >= future_unroll_cnt) {
894 return phase->may_require_nodes(estimate);
895 }
896 return false; // Loop too big.
897 }
898
899 // Check for being too big
900 if (body_size > (uint)_local_loop_unroll_limit) {
901 if ((cl->is_subword_loop() || xors_in_loop >= 4) && body_size < 4u * LoopUnrollLimit) {
902 return phase->may_require_nodes(estimate);
903 }
904 return false; // Loop too big.
905 }
906
907 if (cl->is_unroll_only()) {
908 if (TraceSuperWordLoopUnrollAnalysis) {
909 tty->print_cr("policy_unroll passed vector loop(vlen=%d, factor=%d)\n",
910 slp_max_unroll_factor, future_unroll_cnt);
941 tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit);
942 }
943 _local_loop_unroll_limit = new_limit;
944 }
945 }
946 }
947 }
948 }
949
950 //------------------------------policy_align-----------------------------------
951 // Return TRUE or FALSE if the loop should be cache-line aligned. Gather the
952 // expression that does the alignment. Note that only one array base can be
953 // aligned in a loop (unless the VM guarantees mutual alignment). Note that
954 // if we vectorize short memory ops into longer memory ops, we may want to
955 // increase alignment.
956 bool IdealLoopTree::policy_align(PhaseIdealLoop *phase) const {
957 return false;
958 }
959
960 //------------------------------policy_range_check-----------------------------
961 // Return TRUE or FALSE if the loop should be range-check-eliminated.
962 // Actually we do iteration-splitting, a more powerful form of RCE.
963 bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const {
964 if (!RangeCheckElimination) return false;
965
966 // If nodes are depleted, some transform has miscalculated its needs.
967 assert(!phase->exceeding_node_budget(), "sanity");
968
969 CountedLoopNode *cl = _head->as_CountedLoop();
970 // If we unrolled with no intention of doing RCE and we later
971 // changed our minds, we got no pre-loop. Either we need to
972 // make a new pre-loop, or we gotta disallow RCE.
973 if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
974 Node *trip_counter = cl->phi();
975
976 // check for vectorized loops, some opts are no longer needed
977 if (cl->is_unroll_only()) return false;
978
979 // Check loop body for tests of trip-counter plus loop-invariant vs
980 // loop-invariant.
981 for (uint i = 0; i < _body.size(); i++) {
982 Node *iff = _body[i];
983 if (iff->Opcode() == Op_If ||
984 iff->Opcode() == Op_RangeCheck) { // Test?
985
986 // Comparing trip+off vs limit
987 Node *bol = iff->in(1);
988 if (bol->req() != 2) {
989 continue; // dead constant test
990 }
991 if (!bol->is_Bool()) {
992 assert(bol->Opcode() == Op_Conv2B, "predicate check only");
999 Node *rc_exp = cmp->in(1);
1000 Node *limit = cmp->in(2);
1001
1002 Node *limit_c = phase->get_ctrl(limit);
1003 if (limit_c == phase->C->top()) {
1004 return false; // Found dead test on live IF? No RCE!
1005 }
1006 if (is_member(phase->get_loop(limit_c))) {
1007 // Compare might have operands swapped; commute them
1008 rc_exp = cmp->in(2);
1009 limit = cmp->in(1);
1010 limit_c = phase->get_ctrl(limit);
1011 if (is_member(phase->get_loop(limit_c))) {
1012 continue; // Both inputs are loop varying; cannot RCE
1013 }
1014 }
1015
1016 if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
1017 continue;
1018 }
1019 // Found a test like 'trip+off vs limit'. Test is an IfNode, has two
1020 // (2) projections. If BOTH are in the loop we need loop unswitching
1021 // instead of iteration splitting.
1022 if (is_loop_exit(iff)) {
1023 // Found valid reason to split iterations (if there is room).
1024 // NOTE: Usually a gross overestimate.
1025 return phase->may_require_nodes(est_loop_clone_sz(2, _body.size()));
1026 }
1027 } // End of is IF
1028 }
1029
1030 return false;
1031 }
1032
1033 //------------------------------policy_peel_only-------------------------------
1034 // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned. Useful
1035 // for unrolling loops with NO array accesses.
1036 bool IdealLoopTree::policy_peel_only(PhaseIdealLoop *phase) const {
1037
1038 // If nodes are depleted, some transform has miscalculated its needs.
1039 assert(!phase->exceeding_node_budget(), "sanity");
1040
1041 // check for vectorized loops, any peeling done was already applied
1042 if (_head->is_CountedLoop() && _head->as_CountedLoop()->is_unroll_only()) {
1043 return false;
1044 }
1045
1504
1505 // Now force out all loop-invariant dominating tests. The optimizer
1506 // finds some, but we _know_ they are all useless.
1507 peeled_dom_test_elim(loop,old_new);
1508 loop->record_for_igvn();
1509 }
1510
1511 //------------------------------insert_vector_post_loop------------------------
1512 // Insert a copy of the atomic unrolled vectorized main loop as a post loop,
1513 // unroll_policy has already informed us that more unrolling is about to
1514 // happen to the main loop. The resultant post loop will serve as a
1515 // vectorized drain loop.
1516 void PhaseIdealLoop::insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new) {
1517 if (!loop->_head->is_CountedLoop()) return;
1518
1519 CountedLoopNode *cl = loop->_head->as_CountedLoop();
1520
1521 // only process vectorized main loops
1522 if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
1523
1524 if (!may_require_nodes(est_loop_clone_sz(2, loop->_body.size()))) {
1525 return;
1526 }
1527 int slp_max_unroll_factor = cl->slp_max_unroll();
1528 int cur_unroll = cl->unrolled_count();
1529
1530 if (slp_max_unroll_factor == 0) return;
1531
1532 // only process atomic unroll vector loops (not super unrolled after vectorization)
1533 if (cur_unroll != slp_max_unroll_factor) return;
1534
1535 // we only ever process this one time
1536 if (cl->has_atomic_post_loop()) return;
1537
1538 #ifndef PRODUCT
1539 if (TraceLoopOpts) {
1540 tty->print("PostVector ");
1541 loop->dump_head();
1542 }
1543 #endif
1544 C->set_major_progress();
1545
1546 // Find common pieces of the loop being guarded with pre & post loops
1547 CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1548 CountedLoopEndNode *main_end = main_head->loopexit();
1549 // diagnostic to show loop end is not properly formed
1550 assert(main_end->outcnt() == 2, "1 true, 1 false path only");
1551
1552 // mark this loop as processed
1553 main_head->mark_has_atomic_post_loop();
1554
1555 Node *incr = main_end->incr();
1556 Node *limit = main_end->limit();
1557
3161 return true;
3162 }
3163
3164 //=============================================================================
3165 //------------------------------iteration_split_impl---------------------------
3166 bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_new) {
3167 // Compute loop trip count if possible.
3168 compute_trip_count(phase);
3169
3170 // Convert one iteration loop into normal code.
3171 if (do_one_iteration_loop(phase)) {
3172 return true;
3173 }
3174 // Check and remove empty loops (spam micro-benchmarks)
3175 if (do_remove_empty_loop(phase)) {
3176 return true; // Here we removed an empty loop
3177 }
3178
3179 AutoNodeBudget node_budget(phase);
3180
3181 bool should_peel = policy_peeling(phase);
3182 bool should_unswitch = policy_unswitching(phase);
3183
3184 // Non-counted loops may be peeled; exactly 1 iteration is peeled.
3185 // This removes loop-invariant tests (usually null checks).
3186 if (!_head->is_CountedLoop()) { // Non-counted loop
3187 if (PartialPeelLoop && phase->partial_peel(this, old_new)) {
3188 // Partial peel succeeded so terminate this round of loop opts
3189 return false;
3190 }
3191 if (should_peel) { // Should we peel?
3192 if (PrintOpto) { tty->print_cr("should_peel"); }
3193 phase->do_peeling(this,old_new);
3194 } else if (should_unswitch) {
3195 phase->do_unswitching(this, old_new);
3196 }
3197 return true;
3198 }
3199 CountedLoopNode *cl = _head->as_CountedLoop();
3200
3201 if (!cl->is_valid_counted_loop()) return true; // Ignore various kinds of broken loops
3202
3203 // Do nothing special to pre- and post- loops
3204 if (cl->is_pre_loop() || cl->is_post_loop()) return true;
3205
3206 // Compute loop trip count from profile data
3207 compute_profile_trip_cnt(phase);
3208
3209 // Before attempting fancy unrolling, RCE or alignment, see if we want
3210 // to completely unroll this loop or do loop unswitching.
3211 if (cl->is_normal_loop()) {
3212 if (should_unswitch) {
3213 phase->do_unswitching(this, old_new);
3214 return true;
3215 }
3216 bool should_maximally_unroll = policy_maximally_unroll(phase);
3217 if (should_maximally_unroll) {
3218 // Here we did some unrolling and peeling. Eventually we will
3219 // completely unroll this loop and it will no longer be a loop.
3220 phase->do_maximally_unroll(this, old_new);
3221 return true;
3222 }
3223 }
3224
3225 // Counted loops may be peeled, may need some iterations run up
3226 // front for RCE, and may want to align loop refs to a cache
3227 // line. Thus we clone a full loop up front whose trip count is
3228 // at least 1 (if peeling), but may be several more.
3229
3230 // The main loop will start cache-line aligned with at least 1
3231 // iteration of the unrolled body (zero-trip test required) and
3232 // will have some range checks removed.
3233
3234 // A post-loop will finish any odd iterations (leftover after
3235 // unrolling), plus any needed for RCE purposes.
3236
3237 bool should_unroll = policy_unroll(phase);
3238 bool should_rce = policy_range_check(phase);
3239 // TODO: Remove align -- not used.
3240 bool should_align = policy_align(phase);
3241
3242 // If not RCE'ing (iteration splitting) or Aligning, then we do not need a
3243 // pre-loop. We may still need to peel an initial iteration but we will not
3244 // be needing an unknown number of pre-iterations.
3245 //
3246 // Basically, if may_rce_align reports FALSE first time through, we will not
3247 // be able to later do RCE or Aligning on this loop.
3248 bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align;
3249
3250 // If we have any of these conditions (RCE, alignment, unrolling) met, then
3251 // we switch to the pre-/main-/post-loop model. This model also covers
3252 // peeling.
3253 if (should_rce || should_align || should_unroll) {
3254 if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops
3255 if (!phase->may_require_nodes(est_loop_clone_sz(3, _body.size()))) {
3256 return false;
3257 }
3258 phase->insert_pre_post_loops(this,old_new, !may_rce_align);
3259 }
3260 // Adjust the pre- and main-loop limits to let the pre and post loops run
3261 // with full checks, but the main-loop with no checks. Remove said
3262 // checks from the main body.
3263 if (should_rce) {
3264 if (phase->do_range_check(this, old_new) != 0) {
3265 cl->mark_has_range_checks();
3266 }
3267 } else if (PostLoopMultiversioning) {
3268 phase->has_range_checks(this);
3269 }
3270
3271 if (should_unroll && !should_peel && PostLoopMultiversioning) {
3272 // Try to setup multiversioning on main loops before they are unrolled
3273 if (cl->is_main_loop() && (cl->unrolled_count() == 1)) {
3274 phase->insert_scalar_rced_post_loop(this, old_new);
3275 }
3276 }
3277
3278 // Double loop body for unrolling. Adjust the minimum-trip test (will do
3279 // twice as many iterations as before) and the main body limit (only do
3280 // an even number of trips). If we are peeling, we might enable some RCE
3281 // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
3282 // peeling.
3283 if (should_unroll && !should_peel) {
3284 if (SuperWordLoopUnrollAnalysis) {
3285 phase->insert_vector_post_loop(this, old_new);
3286 }
3287 phase->do_unroll(this, old_new, true);
3288 }
3289
3290 // Adjust the pre-loop limits to align the main body iterations.
3291 if (should_align) {
3292 Unimplemented();
3293 }
3294 } else { // Else we have an unchanged counted loop
3295 if (should_peel) { // Might want to peel but do nothing else
3296 phase->do_peeling(this,old_new);
3297 }
3298 }
3299 return true;
3300 }
3301
3302
3303 //=============================================================================
3304 //------------------------------iteration_split--------------------------------
3305 bool IdealLoopTree::iteration_split(PhaseIdealLoop* phase, Node_List &old_new) {
3306 // Recursively iteration split nested loops
3307 if (_child && !_child->iteration_split(phase, old_new)) {
3308 return false;
3309 }
3310
3311 // Clean out prior deadwood
3312 DCE_loop_body();
3313
3314 // Look for loop-exit tests with my 50/50 guesses from the Parsing stage.
3315 // Replace with a 1-in-10 exit guess.
3316 if (!is_root() && is_loop()) {
|
269 // (x + inv2) - inv1 => (-inv1 + inv2) + x
270 // (x - inv2) + inv1 => ( inv1 - inv2) + x
271 // (x - inv2) - inv1 => (-inv1 - inv2) + x
272 // inv1 + (inv2 - x) => ( inv1 + inv2) - x
273 // inv1 - (x - inv2) => ( inv1 + inv2) - x
274 // (inv2 - x) + inv1 => ( inv1 + inv2) - x
275 // (inv2 - x) - inv1 => (-inv1 + inv2) - x
276 // inv1 - (x + inv2) => ( inv1 - inv2) - x
277 //
278 Node* IdealLoopTree::reassociate_add_sub(Node* n1, PhaseIdealLoop *phase) {
279 if ((!n1->is_Add() && !n1->is_Sub()) || n1->outcnt() == 0) return NULL;
280 if (is_invariant(n1)) return NULL;
281 int inv1_idx = is_invariant_addition(n1, phase);
282 if (!inv1_idx) return NULL;
283 // Don't mess with add of constant (igvn moves them to expression tree root.)
284 if (n1->is_Add() && n1->in(2)->is_Con()) return NULL;
285 Node* inv1 = n1->in(inv1_idx);
286 Node* n2 = n1->in(3 - inv1_idx);
287 int inv2_idx = is_invariant_addition(n2, phase);
288 if (!inv2_idx) return NULL;
289
290 if (!phase->may_require_nodes(10, 10)) return NULL;
291
292 Node* x = n2->in(3 - inv2_idx);
293 Node* inv2 = n2->in(inv2_idx);
294
295 bool neg_x = n2->is_Sub() && inv2_idx == 1;
296 bool neg_inv2 = n2->is_Sub() && inv2_idx == 2;
297 bool neg_inv1 = n1->is_Sub() && inv1_idx == 2;
298 if (n1->is_Sub() && inv1_idx == 1) {
299 neg_x = !neg_x;
300 neg_inv2 = !neg_inv2;
301 }
302 Node* inv1_c = phase->get_ctrl(inv1);
303 Node* inv2_c = phase->get_ctrl(inv2);
304 Node* n_inv1;
305 if (neg_inv1) {
306 Node *zero = phase->_igvn.intcon(0);
307 phase->set_ctrl(zero, phase->C->root());
308 n_inv1 = new SubINode(zero, inv1);
309 phase->register_new_node(n_inv1, inv1_c);
310 } else {
311 n_inv1 = inv1;
323 addx = new SubINode(inv, x);
324 } else {
325 addx = new AddINode(x, inv);
326 }
327 phase->register_new_node(addx, phase->get_ctrl(x));
328 phase->_igvn.replace_node(n1, addx);
329 assert(phase->get_loop(phase->get_ctrl(n1)) == this, "");
330 _body.yank(n1);
331 return addx;
332 }
333
334 //---------------------reassociate_invariants-----------------------------
335 // Reassociate invariant expressions:
336 void IdealLoopTree::reassociate_invariants(PhaseIdealLoop *phase) {
337 for (int i = _body.size() - 1; i >= 0; i--) {
338 Node *n = _body.at(i);
339 for (int j = 0; j < 5; j++) {
340 Node* nn = reassociate_add_sub(n, phase);
341 if (nn == NULL) break;
342 n = nn; // again
343 }
344 }
345 }
346
347 //------------------------------policy_peeling---------------------------------
348 // Return TRUE if the loop should be peeled, otherwise return FALSE. Peeling
349 // is applicable if we can make a loop-invariant test (usually a null-check)
350 // execute before we enter the loop. When TRUE, the estimated node budget is
351 // also requested.
352 bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) {
353 uint estimate = estimate_peeling(phase);
354
355 return estimate == 0 ? false : phase->may_require_nodes(estimate);
356 }
357
358 // Perform actual policy and size estimate for the loop peeling transform, and
359 // return the estimated loop size if peeling is applicable, otherwise return
360 // zero. No node budget is allocated.
361 uint IdealLoopTree::estimate_peeling(PhaseIdealLoop *phase) {
362
363 // If nodes are depleted, some transform has miscalculated its needs.
364 assert(!phase->exceeding_node_budget(), "sanity");
365
366 // Peeling does loop cloning which can result in O(N^2) node construction.
367 if (_body.size() > 255) {
368 return 0; // Suppress too large body size.
369 }
370 // Optimistic estimate that approximates loop body complexity via data and
371 // control flow fan-out (instead of using the more pessimistic: BodySize^2).
372 uint estimate = est_loop_clone_sz(2);
373
374 if (phase->exceeding_node_budget(estimate)) {
375 return 0; // Too large to safely clone.
376 }
377
378 // Check for vectorized loops, any peeling done was already applied.
379 if (_head->is_CountedLoop()) {
380 CountedLoopNode* cl = _head->as_CountedLoop();
381 if (cl->is_unroll_only() || cl->trip_count() == 1) {
382 return 0;
383 }
384 }
385
386 Node* test = tail();
387
388 while (test != _head) { // Scan till run off top of loop
389 if (test->is_If()) { // Test?
390 Node *ctrl = phase->get_ctrl(test->in(1));
391 if (ctrl->is_top()) {
392 return 0; // Found dead test on live IF? No peeling!
393 }
394 // Standard IF only has one input value to check for loop invariance.
395 assert(test->Opcode() == Op_If ||
396 test->Opcode() == Op_CountedLoopEnd ||
397 test->Opcode() == Op_RangeCheck,
398 "Check this code when new subtype is added");
399 // Condition is not a member of this loop?
400 if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) {
401 return estimate; // Found reason to peel!
402 }
403 }
404 // Walk up dominators to loop _head looking for test which is executed on
405 // every path through the loop.
406 test = phase->idom(test);
407 }
408 return 0;
409 }
410
411 //------------------------------peeled_dom_test_elim---------------------------
412 // If we got the effect of peeling, either by actually peeling or by making
413 // a pre-loop which must execute at least once, we can remove all
414 // loop-invariant dominated tests in the main body.
415 void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree *loop, Node_List &old_new) {
416 bool progress = true;
417 while (progress) {
418 progress = false; // Reset for next iteration
419 Node *prev = loop->_head->in(LoopNode::LoopBackControl);//loop->tail();
420 Node *test = prev->in(0);
421 while (test != loop->_head) { // Scan till run off top of loop
422
423 int p_op = prev->Opcode();
424 if ((p_op == Op_IfFalse || p_op == Op_IfTrue) &&
425 test->is_If() && // Test?
426 !test->in(1)->is_Con() && // And not already obvious?
427 // Condition is not a member of this loop?
428 !loop->is_member(get_loop(get_ctrl(test->in(1))))){
635 new_exit_value = old->in(LoopNode::LoopBackControl);
636 _igvn.hash_delete(old);
637 old->set_req(LoopNode::EntryControl, new_exit_value);
638 }
639 }
640
641
642 // Step 3: Cut the backedge on the clone (so its not a loop) and remove the
643 // extra backedge user.
644 Node* new_head = old_new[head->_idx];
645 _igvn.hash_delete(new_head);
646 new_head->set_req(LoopNode::LoopBackControl, C->top());
647 for (DUIterator_Fast j2max, j2 = new_head->fast_outs(j2max); j2 < j2max; j2++) {
648 Node* use = new_head->fast_out(j2);
649 if (use->in(0) == new_head && use->req() == 3 && use->is_Phi()) {
650 _igvn.hash_delete(use);
651 use->set_req(LoopNode::LoopBackControl, C->top());
652 }
653 }
654
655 // Step 4: Correct dom-depth info. Set to loop-head depth.
656
657 int dd = dom_depth(head);
658 set_idom(head, head->in(1), dd);
659 for (uint j3 = 0; j3 < loop->_body.size(); j3++) {
660 Node *old = loop->_body.at(j3);
661 Node *nnn = old_new[old->_idx];
662 if (!has_ctrl(nnn)) {
663 set_idom(nnn, idom(nnn), dd-1);
664 }
665 }
666
667 // Now force out all loop-invariant dominating tests. The optimizer
668 // finds some, but we _know_ they are all useless.
669 peeled_dom_test_elim(loop,old_new);
670
671 loop->record_for_igvn();
672 }
673
674 // The Estimated Loop Unroll Size: UnrollFactor * (106% * BodySize + BC) + CC,
675 // where BC and CC are (totally) ad-hoc/magic "body" and "clone" constants,
676 // respectively, used to ensure that node usage estimates made are on the safe
677 // side, for the most part. This is a simplified version of the loop clone
678 // size calculation in est_loop_clone_sz(), defined for unroll factors larger
679 // than one (>1), performing an overflow check and returning 'UINT_MAX' in
680 // case of an overflow.
681 static uint est_loop_unroll_sz(uint factor, uint size) {
682 precond(0 < factor);
683
684 uint const bc = 5;
685 uint const cc = 7;
686 uint const sz = size + (size + 15) / 16;
687 uint estimate = factor * (sz + bc) + cc;
688
689 return (estimate - cc) / factor == sz + bc ? estimate : UINT_MAX;
690 }
691
692 #define EMPTY_LOOP_SIZE 7 // Number of nodes in an empty loop.
693
694 //------------------------------policy_maximally_unroll------------------------
695 // Calculate the exact loop trip-count and return TRUE if loop can be fully,
696 // i.e. maximally, unrolled, otherwise return FALSE. When TRUE, the estimated
697 // node budget is also requested.
698 bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop *phase) const {
699 CountedLoopNode *cl = _head->as_CountedLoop();
700 assert(cl->is_normal_loop(), "");
701 if (!cl->is_valid_counted_loop()) {
702 return false; // Malformed counted loop
703 }
704 if (!cl->has_exact_trip_count()) {
705 // Trip count is not exact.
706 return false;
707 }
708
709 uint trip_count = cl->trip_count();
710 // Note, max_juint is used to indicate unknown trip count.
711 assert(trip_count > 1, "one iteration loop should be optimized out already");
712 assert(trip_count < max_juint, "exact trip_count should be less than max_uint.");
713
714 // If nodes are depleted, some transform has miscalculated its needs.
715 assert(!phase->exceeding_node_budget(), "sanity");
716
717 // Real policy: if we maximally unroll, does it get too big?
718 // Allow the unrolled mess to get larger than standard loop
719 // size. After all, it will no longer be a loop.
720 uint body_size = _body.size();
721 uint unroll_limit = (uint)LoopUnrollLimit * 4;
722 assert((intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
723 if (trip_count > unroll_limit || body_size > unroll_limit) {
724 return false;
725 }
726
727 // Take into account that after unroll conjoined heads and tails will fold,
728 // otherwise policy_unroll() may allow more unrolling than max unrolling.
729 uint new_body_size = est_loop_unroll_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
730
731 if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
732 return false;
733 }
734
735 // Fully unroll a loop with few iterations regardless next conditions since
736 // following loop optimizations will split such loop anyway (pre-main-post).
737 if (trip_count <= 3) {
738 return phase->may_require_nodes(new_body_size);
739 }
740
741 if (new_body_size > unroll_limit ||
742 // Unrolling can result in a large amount of node construction
743 phase->exceeding_node_budget(new_body_size)) {
744 return false;
745 }
746
747 // Do not unroll a loop with String intrinsics code.
748 // String intrinsics are large and have loops.
749 for (uint k = 0; k < _body.size(); k++) {
758 case Op_HasNegatives: {
759 return false;
760 }
761 #if INCLUDE_RTM_OPT
762 case Op_FastLock:
763 case Op_FastUnlock: {
764 // Don't unroll RTM locking code because it is large.
765 if (UseRTMLocking) {
766 return false;
767 }
768 }
769 #endif
770 } // switch
771 }
772
773 return phase->may_require_nodes(new_body_size);
774 }
775
776
777 //------------------------------policy_unroll----------------------------------
778 // Return TRUE or FALSE if the loop should be unrolled or not. Apply unroll if
779 // the loop is a counted loop and the loop body is small enough. When TRUE,
780 // the estimated node budget is also requested.
781 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
782
783 CountedLoopNode *cl = _head->as_CountedLoop();
784 assert(cl->is_normal_loop() || cl->is_main_loop(), "");
785
786 if (!cl->is_valid_counted_loop()) {
787 return false; // Malformed counted loop
788 }
789
790 // If nodes are depleted, some transform has miscalculated its needs.
791 assert(!phase->exceeding_node_budget(), "sanity");
792
793 // Protect against over-unrolling.
794 // After split at least one iteration will be executed in pre-loop.
795 if (cl->trip_count() <= (cl->is_normal_loop() ? 2u : 1u)) {
796 return false;
797 }
798 _local_loop_unroll_limit = LoopUnrollLimit;
799 _local_loop_unroll_factor = 4;
800 int future_unroll_cnt = cl->unrolled_count() * 2;
904 if (UseSuperWord) {
905 if (!cl->is_reduction_loop()) {
906 phase->mark_reductions(this);
907 }
908
909 // Only attempt slp analysis when user controls do not prohibit it
910 if (LoopMaxUnroll > _local_loop_unroll_factor) {
911 // Once policy_slp_analysis succeeds, mark the loop with the
912 // maximal unroll factor so that we minimize analysis passes
913 if (future_unroll_cnt >= _local_loop_unroll_factor) {
914 policy_unroll_slp_analysis(cl, phase, future_unroll_cnt);
915 }
916 }
917 }
918
919 int slp_max_unroll_factor = cl->slp_max_unroll();
920 if ((LoopMaxUnroll < slp_max_unroll_factor) && FLAG_IS_DEFAULT(LoopMaxUnroll) && UseSubwordForMaxVector) {
921 LoopMaxUnroll = slp_max_unroll_factor;
922 }
923
924 uint estimate = est_loop_clone_sz(2);
925
926 if (cl->has_passed_slp()) {
927 if (slp_max_unroll_factor >= future_unroll_cnt) {
928 return phase->may_require_nodes(estimate);
929 }
930 return false; // Loop too big.
931 }
932
933 // Check for being too big
934 if (body_size > (uint)_local_loop_unroll_limit) {
935 if ((cl->is_subword_loop() || xors_in_loop >= 4) && body_size < 4u * LoopUnrollLimit) {
936 return phase->may_require_nodes(estimate);
937 }
938 return false; // Loop too big.
939 }
940
941 if (cl->is_unroll_only()) {
942 if (TraceSuperWordLoopUnrollAnalysis) {
943 tty->print_cr("policy_unroll passed vector loop(vlen=%d, factor=%d)\n",
944 slp_max_unroll_factor, future_unroll_cnt);
975 tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit);
976 }
977 _local_loop_unroll_limit = new_limit;
978 }
979 }
980 }
981 }
982 }
983
984 //------------------------------policy_align-----------------------------------
985 // Return TRUE or FALSE if the loop should be cache-line aligned. Gather the
986 // expression that does the alignment. Note that only one array base can be
987 // aligned in a loop (unless the VM guarantees mutual alignment). Note that
988 // if we vectorize short memory ops into longer memory ops, we may want to
989 // increase alignment.
990 bool IdealLoopTree::policy_align(PhaseIdealLoop *phase) const {
991 return false;
992 }
993
994 //------------------------------policy_range_check-----------------------------
995 // Return TRUE or FALSE if the loop should be range-check-eliminated or not.
996 // When TRUE, the estimated node budget is also requested.
997 //
998 // We will actually perform iteration-splitting, a more powerful form of RCE.
999 bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const {
1000 if (!RangeCheckElimination) return false;
1001
1002 // If nodes are depleted, some transform has miscalculated its needs.
1003 assert(!phase->exceeding_node_budget(), "sanity");
1004
1005 CountedLoopNode *cl = _head->as_CountedLoop();
1006 // If we unrolled with no intention of doing RCE and we later changed our
1007 // minds, we got no pre-loop. Either we need to make a new pre-loop, or we
1008 // have to disallow RCE.
1009 if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
1010 Node *trip_counter = cl->phi();
1011
1012 // check for vectorized loops, some opts are no longer needed
1013 if (cl->is_unroll_only()) return false;
1014
1015 // Check loop body for tests of trip-counter plus loop-invariant vs
1016 // loop-invariant.
1017 for (uint i = 0; i < _body.size(); i++) {
1018 Node *iff = _body[i];
1019 if (iff->Opcode() == Op_If ||
1020 iff->Opcode() == Op_RangeCheck) { // Test?
1021
1022 // Comparing trip+off vs limit
1023 Node *bol = iff->in(1);
1024 if (bol->req() != 2) {
1025 continue; // dead constant test
1026 }
1027 if (!bol->is_Bool()) {
1028 assert(bol->Opcode() == Op_Conv2B, "predicate check only");
1035 Node *rc_exp = cmp->in(1);
1036 Node *limit = cmp->in(2);
1037
1038 Node *limit_c = phase->get_ctrl(limit);
1039 if (limit_c == phase->C->top()) {
1040 return false; // Found dead test on live IF? No RCE!
1041 }
1042 if (is_member(phase->get_loop(limit_c))) {
1043 // Compare might have operands swapped; commute them
1044 rc_exp = cmp->in(2);
1045 limit = cmp->in(1);
1046 limit_c = phase->get_ctrl(limit);
1047 if (is_member(phase->get_loop(limit_c))) {
1048 continue; // Both inputs are loop varying; cannot RCE
1049 }
1050 }
1051
1052 if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
1053 continue;
1054 }
1055 // Found a test like 'trip+off vs limit'. Test is an IfNode, has two (2)
1056 // projections. If BOTH are in the loop we need loop unswitching instead
1057 // of iteration splitting.
1058 if (is_loop_exit(iff)) {
1059 // Found valid reason to split iterations (if there is room).
1060 // NOTE: Usually a gross overestimate.
1061 return phase->may_require_nodes(est_loop_clone_sz(2));
1062 }
1063 } // End of is IF
1064 }
1065
1066 return false;
1067 }
1068
1069 //------------------------------policy_peel_only-------------------------------
1070 // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned. Useful
1071 // for unrolling loops with NO array accesses.
1072 bool IdealLoopTree::policy_peel_only(PhaseIdealLoop *phase) const {
1073
1074 // If nodes are depleted, some transform has miscalculated its needs.
1075 assert(!phase->exceeding_node_budget(), "sanity");
1076
1077 // check for vectorized loops, any peeling done was already applied
1078 if (_head->is_CountedLoop() && _head->as_CountedLoop()->is_unroll_only()) {
1079 return false;
1080 }
1081
1540
1541 // Now force out all loop-invariant dominating tests. The optimizer
1542 // finds some, but we _know_ they are all useless.
1543 peeled_dom_test_elim(loop,old_new);
1544 loop->record_for_igvn();
1545 }
1546
1547 //------------------------------insert_vector_post_loop------------------------
1548 // Insert a copy of the atomic unrolled vectorized main loop as a post loop,
1549 // unroll_policy has already informed us that more unrolling is about to
1550 // happen to the main loop. The resultant post loop will serve as a
1551 // vectorized drain loop.
1552 void PhaseIdealLoop::insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new) {
1553 if (!loop->_head->is_CountedLoop()) return;
1554
1555 CountedLoopNode *cl = loop->_head->as_CountedLoop();
1556
1557 // only process vectorized main loops
1558 if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
1559
1560 int slp_max_unroll_factor = cl->slp_max_unroll();
1561 int cur_unroll = cl->unrolled_count();
1562
1563 if (slp_max_unroll_factor == 0) return;
1564
1565 // only process atomic unroll vector loops (not super unrolled after vectorization)
1566 if (cur_unroll != slp_max_unroll_factor) return;
1567
1568 // we only ever process this one time
1569 if (cl->has_atomic_post_loop()) return;
1570
1571 if (!may_require_nodes(loop->est_loop_clone_sz(2))) {
1572 return;
1573 }
1574
1575 #ifndef PRODUCT
1576 if (TraceLoopOpts) {
1577 tty->print("PostVector ");
1578 loop->dump_head();
1579 }
1580 #endif
1581 C->set_major_progress();
1582
1583 // Find common pieces of the loop being guarded with pre & post loops
1584 CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1585 CountedLoopEndNode *main_end = main_head->loopexit();
1586 // diagnostic to show loop end is not properly formed
1587 assert(main_end->outcnt() == 2, "1 true, 1 false path only");
1588
1589 // mark this loop as processed
1590 main_head->mark_has_atomic_post_loop();
1591
1592 Node *incr = main_end->incr();
1593 Node *limit = main_end->limit();
1594
3198 return true;
3199 }
3200
3201 //=============================================================================
3202 //------------------------------iteration_split_impl---------------------------
3203 bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_new) {
3204 // Compute loop trip count if possible.
3205 compute_trip_count(phase);
3206
3207 // Convert one iteration loop into normal code.
3208 if (do_one_iteration_loop(phase)) {
3209 return true;
3210 }
3211 // Check and remove empty loops (spam micro-benchmarks)
3212 if (do_remove_empty_loop(phase)) {
3213 return true; // Here we removed an empty loop
3214 }
3215
3216 AutoNodeBudget node_budget(phase);
3217
3218 // Non-counted loops may be peeled; exactly 1 iteration is peeled.
3219 // This removes loop-invariant tests (usually null checks).
3220 if (!_head->is_CountedLoop()) { // Non-counted loop
3221 if (PartialPeelLoop && phase->partial_peel(this, old_new)) {
3222 // Partial peel succeeded so terminate this round of loop opts
3223 return false;
3224 }
3225 if (policy_peeling(phase)) { // Should we peel?
3226 if (PrintOpto) { tty->print_cr("should_peel"); }
3227 phase->do_peeling(this, old_new);
3228 } else if (policy_unswitching(phase)) {
3229 phase->do_unswitching(this, old_new);
3230 }
3231 return true;
3232 }
3233 CountedLoopNode *cl = _head->as_CountedLoop();
3234
3235 if (!cl->is_valid_counted_loop()) return true; // Ignore various kinds of broken loops
3236
3237 // Do nothing special to pre- and post- loops
3238 if (cl->is_pre_loop() || cl->is_post_loop()) return true;
3239
3240 // Compute loop trip count from profile data
3241 compute_profile_trip_cnt(phase);
3242
3243 // Before attempting fancy unrolling, RCE or alignment, see if we want
3244 // to completely unroll this loop or do loop unswitching.
3245 if (cl->is_normal_loop()) {
3246 if (policy_unswitching(phase)) {
3247 phase->do_unswitching(this, old_new);
3248 return true;
3249 }
3250 if (policy_maximally_unroll(phase)) {
3251 // Here we did some unrolling and peeling. Eventually we will
3252 // completely unroll this loop and it will no longer be a loop.
3253 phase->do_maximally_unroll(this, old_new);
3254 return true;
3255 }
3256 }
3257
3258 uint est_peeling = estimate_peeling(phase);
3259 bool should_peel = 0 < est_peeling;
3260
3261 // Counted loops may be peeled, may need some iterations run up
3262 // front for RCE, and may want to align loop refs to a cache
3263 // line. Thus we clone a full loop up front whose trip count is
3264 // at least 1 (if peeling), but may be several more.
3265
3266 // The main loop will start cache-line aligned with at least 1
3267 // iteration of the unrolled body (zero-trip test required) and
3268 // will have some range checks removed.
3269
3270 // A post-loop will finish any odd iterations (leftover after
3271 // unrolling), plus any needed for RCE purposes.
3272
3273 bool should_unroll = policy_unroll(phase);
3274 bool should_rce = policy_range_check(phase);
3275 // TODO: Remove align -- not used.
3276 bool should_align = policy_align(phase);
3277
3278 // If not RCE'ing (iteration splitting) or Aligning, then we do not need a
3279 // pre-loop. We may still need to peel an initial iteration but we will not
3280 // be needing an unknown number of pre-iterations.
3281 //
3282 // Basically, if may_rce_align reports FALSE first time through, we will not
3283 // be able to later do RCE or Aligning on this loop.
3284 bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align;
3285
3286 // If we have any of these conditions (RCE, alignment, unrolling) met, then
3287 // we switch to the pre-/main-/post-loop model. This model also covers
3288 // peeling.
3289 if (should_rce || should_align || should_unroll) {
3290 if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops
3291 uint estimate = est_loop_clone_sz(3);
3292 if (!phase->may_require_nodes(estimate)) {
3293 return false;
3294 }
3295 phase->insert_pre_post_loops(this, old_new, !may_rce_align);
3296 }
3297 // Adjust the pre- and main-loop limits to let the pre and post loops run
3298 // with full checks, but the main-loop with no checks. Remove said checks
3299 // from the main body.
3300 if (should_rce) {
3301 if (phase->do_range_check(this, old_new) != 0) {
3302 cl->mark_has_range_checks();
3303 }
3304 } else if (PostLoopMultiversioning) {
3305 phase->has_range_checks(this);
3306 }
3307
3308 if (should_unroll && !should_peel && PostLoopMultiversioning) {
3309 // Try to setup multiversioning on main loops before they are unrolled
3310 if (cl->is_main_loop() && (cl->unrolled_count() == 1)) {
3311 phase->insert_scalar_rced_post_loop(this, old_new);
3312 }
3313 }
3314
3315 // Double loop body for unrolling. Adjust the minimum-trip test (will do
3316 // twice as many iterations as before) and the main body limit (only do
3317 // an even number of trips). If we are peeling, we might enable some RCE
3318 // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
3319 // peeling.
3320 if (should_unroll && !should_peel) {
3321 if (SuperWordLoopUnrollAnalysis) {
3322 phase->insert_vector_post_loop(this, old_new);
3323 }
3324 phase->do_unroll(this, old_new, true);
3325 }
3326
3327 // Adjust the pre-loop limits to align the main body iterations.
3328 if (should_align) {
3329 Unimplemented();
3330 }
3331 } else { // Else we have an unchanged counted loop
3332 if (should_peel) { // Might want to peel but do nothing else
3333 if (phase->may_require_nodes(est_peeling)) {
3334 phase->do_peeling(this, old_new);
3335 }
3336 }
3337 }
3338 return true;
3339 }
3340
3341
3342 //=============================================================================
3343 //------------------------------iteration_split--------------------------------
3344 bool IdealLoopTree::iteration_split(PhaseIdealLoop* phase, Node_List &old_new) {
3345 // Recursively iteration split nested loops
3346 if (_child && !_child->iteration_split(phase, old_new)) {
3347 return false;
3348 }
3349
3350 // Clean out prior deadwood
3351 DCE_loop_body();
3352
3353 // Look for loop-exit tests with my 50/50 guesses from the Parsing stage.
3354 // Replace with a 1-in-10 exit guess.
3355 if (!is_root() && is_loop()) {
|