21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "compiler/compileLog.hpp"
27 #include "memory/allocation.inline.hpp"
28 #include "opto/addnode.hpp"
29 #include "opto/callnode.hpp"
30 #include "opto/castnode.hpp"
31 #include "opto/connode.hpp"
32 #include "opto/convertnode.hpp"
33 #include "opto/divnode.hpp"
34 #include "opto/loopnode.hpp"
35 #include "opto/mulnode.hpp"
36 #include "opto/movenode.hpp"
37 #include "opto/opaquenode.hpp"
38 #include "opto/rootnode.hpp"
39 #include "opto/runtime.hpp"
40 #include "opto/subnode.hpp"
41 #include "opto/vectornode.hpp"
42
43 //------------------------------is_loop_exit-----------------------------------
44 // Given an IfNode, return the loop-exiting projection or NULL if both
45 // arms remain in the loop.
46 Node *IdealLoopTree::is_loop_exit(Node *iff) const {
47 if( iff->outcnt() != 2 ) return NULL; // Ignore partially dead tests
48 PhaseIdealLoop *phase = _phase;
49 // Test is an IfNode, has 2 projections. If BOTH are in the loop
50 // we need loop unswitching instead of peeling.
51 if( !is_member(phase->get_loop( iff->raw_out(0) )) )
52 return iff->raw_out(0);
53 if( !is_member(phase->get_loop( iff->raw_out(1) )) )
54 return iff->raw_out(1);
55 return NULL;
56 }
57
58
59 //=============================================================================
60
623 }
624 #if INCLUDE_RTM_OPT
625 case Op_FastLock:
626 case Op_FastUnlock: {
627 // Don't unroll RTM locking code because it is large.
628 if (UseRTMLocking) {
629 return false;
630 }
631 }
632 #endif
633 } // switch
634 }
635
636 return true; // Do maximally unroll
637 }
638
639
640 //------------------------------policy_unroll----------------------------------
641 // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if
642 // the loop is a CountedLoop and the body is small enough.
643 bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const {
644
645 CountedLoopNode *cl = _head->as_CountedLoop();
646 assert(cl->is_normal_loop() || cl->is_main_loop(), "");
647
648 if (!cl->is_valid_counted_loop())
649 return false; // Malformed counted loop
650
651 // Protect against over-unrolling.
652 // After split at least one iteration will be executed in pre-loop.
653 if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
654
655 int future_unroll_ct = cl->unrolled_count() * 2;
656 if (future_unroll_ct > LoopMaxUnroll) return false;
657
658 // Check for initial stride being a small enough constant
659 if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false;
660
661 // Don't unroll if the next round of unrolling would push us
662 // over the expected trip count of the loop. One is subtracted
663 // from the expected trip count because the pre-loop normally
664 // executes 1 iteration.
665 if (UnrollLimitForProfileCheck > 0 &&
666 cl->profile_trip_cnt() != COUNT_UNKNOWN &&
667 future_unroll_ct > UnrollLimitForProfileCheck &&
668 (float)future_unroll_ct > cl->profile_trip_cnt() - 1.0) {
669 return false;
670 }
671
672 // When unroll count is greater than LoopUnrollMin, don't unroll if:
673 // the residual iterations are more than 10% of the trip count
674 // and rounds of "unroll,optimize" are not making significant progress
730 case Op_StrEquals:
731 case Op_StrIndexOf:
732 case Op_EncodeISOArray:
733 case Op_AryEq: {
734 // Do not unroll a loop with String intrinsics code.
735 // String intrinsics are large and have loops.
736 return false;
737 }
738 #if INCLUDE_RTM_OPT
739 case Op_FastLock:
740 case Op_FastUnlock: {
741 // Don't unroll RTM locking code because it is large.
742 if (UseRTMLocking) {
743 return false;
744 }
745 }
746 #endif
747 } // switch
748 }
749
750 // Check for being too big
751 if (body_size > (uint)LoopUnrollLimit) {
752 if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
753 // Normal case: loop too big
754 return false;
755 }
756
757 // Unroll once! (Each trip will soon do double iterations)
758 return true;
759 }
760
761 //------------------------------policy_align-----------------------------------
762 // Return TRUE or FALSE if the loop should be cache-line aligned. Gather the
763 // expression that does the alignment. Note that only one array base can be
764 // aligned in a loop (unless the VM guarantees mutual alignment). Note that
765 // if we vectorize short memory ops into longer memory ops, we may want to
766 // increase alignment.
767 bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const {
768 return false;
769 }
770
771 //------------------------------policy_range_check-----------------------------
772 // Return TRUE or FALSE if the loop should be range-check-eliminated.
773 // Actually we do iteration-splitting, a more powerful form of RCE.
774 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const {
775 if (!RangeCheckElimination) return false;
776
777 CountedLoopNode *cl = _head->as_CountedLoop();
778 // If we unrolled with no intention of doing RCE and we later
779 // changed our minds, we got no pre-loop. Either we need to
780 // make a new pre-loop, or we gotta disallow RCE.
1568 }
1569
1570 Node* trip_phi = loop_head->phi();
1571 for (DUIterator_Fast imax, i = loop_head->fast_outs(imax); i < imax; i++) {
1572 Node* phi = loop_head->fast_out(i);
1573 if (phi->is_Phi() && phi->outcnt() > 0 && phi != trip_phi) {
1574 // For definitions which are loop inclusive and not tripcounts.
1575 Node* def_node = phi->in(LoopNode::LoopBackControl);
1576
1577 if (def_node != NULL) {
1578 Node* n_ctrl = get_ctrl(def_node);
1579 if (n_ctrl != NULL && loop->is_member(get_loop(n_ctrl))) {
1580 // Now test it to see if it fits the standard pattern for a reduction operator.
1581 int opc = def_node->Opcode();
1582 if (opc != ReductionNode::opcode(opc, def_node->bottom_type()->basic_type())) {
1583 if (!def_node->is_reduction()) { // Not marked yet
1584 // To be a reduction, the arithmetic node must have the phi as input and provide a def to it
1585 for (unsigned j = 1; j < def_node->req(); j++) {
1586 Node* in = def_node->in(j);
1587 if (in == phi) {
1588 def_node->add_flag(Node::Flag_is_reduction);
1589 break;
1590 }
1591 }
1592 }
1593 }
1594 }
1595 }
1596 }
1597 }
1598 }
1599
1600 //------------------------------dominates_backedge---------------------------------
1601 // Returns true if ctrl is executed on every complete iteration
1602 bool IdealLoopTree::dominates_backedge(Node* ctrl) {
1603 assert(ctrl->is_CFG(), "must be control");
1604 Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl);
1605 return _phase->dom_lca_internal(ctrl, backedge) == ctrl;
1606 }
1607
2418
2419 // If we have any of these conditions (RCE, alignment, unrolling) met, then
2420 // we switch to the pre-/main-/post-loop model. This model also covers
2421 // peeling.
2422 if (should_rce || should_align || should_unroll) {
2423 if (cl->is_normal_loop()) // Convert to 'pre/main/post' loops
2424 phase->insert_pre_post_loops(this,old_new, !may_rce_align);
2425
2426 // Adjust the pre- and main-loop limits to let the pre and post loops run
2427 // with full checks, but the main-loop with no checks. Remove said
2428 // checks from the main body.
2429 if (should_rce)
2430 phase->do_range_check(this,old_new);
2431
2432 // Double loop body for unrolling. Adjust the minimum-trip test (will do
2433 // twice as many iterations as before) and the main body limit (only do
2434 // an even number of trips). If we are peeling, we might enable some RCE
2435 // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
2436 // peeling.
2437 if (should_unroll && !should_peel) {
2438 phase->mark_reductions(this);
2439 phase->do_unroll(this, old_new, true);
2440 }
2441
2442 // Adjust the pre-loop limits to align the main body
2443 // iterations.
2444 if (should_align)
2445 Unimplemented();
2446
2447 } else { // Else we have an unchanged counted loop
2448 if (should_peel) // Might want to peel but do nothing else
2449 phase->do_peeling(this,old_new);
2450 }
2451 return true;
2452 }
2453
2454
2455 //=============================================================================
2456 //------------------------------iteration_split--------------------------------
2457 bool IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) {
2458 // Recursively iteration split nested loops
|
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "compiler/compileLog.hpp"
27 #include "memory/allocation.inline.hpp"
28 #include "opto/addnode.hpp"
29 #include "opto/callnode.hpp"
30 #include "opto/castnode.hpp"
31 #include "opto/connode.hpp"
32 #include "opto/convertnode.hpp"
33 #include "opto/divnode.hpp"
34 #include "opto/loopnode.hpp"
35 #include "opto/mulnode.hpp"
36 #include "opto/movenode.hpp"
37 #include "opto/opaquenode.hpp"
38 #include "opto/rootnode.hpp"
39 #include "opto/runtime.hpp"
40 #include "opto/subnode.hpp"
41 #include "opto/superword.hpp"
42 #include "opto/vectornode.hpp"
43
44 //------------------------------is_loop_exit-----------------------------------
45 // Given an IfNode, return the loop-exiting projection or NULL if both
46 // arms remain in the loop.
47 Node *IdealLoopTree::is_loop_exit(Node *iff) const {
48 if( iff->outcnt() != 2 ) return NULL; // Ignore partially dead tests
49 PhaseIdealLoop *phase = _phase;
50 // Test is an IfNode, has 2 projections. If BOTH are in the loop
51 // we need loop unswitching instead of peeling.
52 if( !is_member(phase->get_loop( iff->raw_out(0) )) )
53 return iff->raw_out(0);
54 if( !is_member(phase->get_loop( iff->raw_out(1) )) )
55 return iff->raw_out(1);
56 return NULL;
57 }
58
59
60 //=============================================================================
61
624 }
625 #if INCLUDE_RTM_OPT
626 case Op_FastLock:
627 case Op_FastUnlock: {
628 // Don't unroll RTM locking code because it is large.
629 if (UseRTMLocking) {
630 return false;
631 }
632 }
633 #endif
634 } // switch
635 }
636
637 return true; // Do maximally unroll
638 }
639
640
641 //------------------------------policy_unroll----------------------------------
642 // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if
643 // the loop is a CountedLoop and the body is small enough.
644 bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) {
645
646 CountedLoopNode *cl = _head->as_CountedLoop();
647 assert(cl->is_normal_loop() || cl->is_main_loop(), "");
648
649 if (!cl->is_valid_counted_loop())
650 return false; // Malformed counted loop
651
652 // Protect against over-unrolling.
653 // After split at least one iteration will be executed in pre-loop.
654 if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
655
656 _local_loop_unroll_limit = LoopUnrollLimit;
657 _local_loop_unroll_factor = 4;
658 int future_unroll_ct = cl->unrolled_count() * 2;
659 if (future_unroll_ct > LoopMaxUnroll) return false;
660
661 // Check for initial stride being a small enough constant
662 if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false;
663
664 // Don't unroll if the next round of unrolling would push us
665 // over the expected trip count of the loop. One is subtracted
666 // from the expected trip count because the pre-loop normally
667 // executes 1 iteration.
668 if (UnrollLimitForProfileCheck > 0 &&
669 cl->profile_trip_cnt() != COUNT_UNKNOWN &&
670 future_unroll_ct > UnrollLimitForProfileCheck &&
671 (float)future_unroll_ct > cl->profile_trip_cnt() - 1.0) {
672 return false;
673 }
674
675 // When unroll count is greater than LoopUnrollMin, don't unroll if:
676 // the residual iterations are more than 10% of the trip count
677 // and rounds of "unroll,optimize" are not making significant progress
733 case Op_StrEquals:
734 case Op_StrIndexOf:
735 case Op_EncodeISOArray:
736 case Op_AryEq: {
737 // Do not unroll a loop with String intrinsics code.
738 // String intrinsics are large and have loops.
739 return false;
740 }
741 #if INCLUDE_RTM_OPT
742 case Op_FastLock:
743 case Op_FastUnlock: {
744 // Don't unroll RTM locking code because it is large.
745 if (UseRTMLocking) {
746 return false;
747 }
748 }
749 #endif
750 } // switch
751 }
752
753 if (UseSuperWord) {
754 if (!cl->is_reduction_loop()) {
755 phase->mark_reductions(this);
756 }
757
758 // Only attempt slp analysis when user controls do not prohibit it
759 if (LoopMaxUnroll > _local_loop_unroll_factor) {
760 // Once policy_slp_analysis succeeds, mark the loop with the
761 // maximal unroll factor so that we minimize analysis passes
762 if (future_unroll_ct > _local_loop_unroll_factor) {
763 policy_unroll_slp_analysis(cl, phase, future_unroll_ct);
764 }
765 }
766 }
767
768 // Check for being too big
769 if (body_size > (uint)_local_loop_unroll_limit) {
770 if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
771 // Normal case: loop too big
772 return false;
773 }
774
775 // Unroll once! (Each trip will soon do double iterations)
776 return true;
777 }
778
779 void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct) {
780 // Enable this functionality target by target as needed
781 if (SuperWordLoopUnrollAnalysis) {
782 if (!cl->has_passed_slp()) {
783 SuperWord sw(phase);
784 sw.transform_loop(this, false);
785
786 // If the loop is slp canonical analyze it
787 if (sw.early_return() == false) {
788 sw.unrolling_analysis(cl, _local_loop_unroll_factor);
789 }
790 }
791
792 int slp_max_unroll_factor = cl->slp_max_unroll();
793 if ((slp_max_unroll_factor > 4) &&
794 (slp_max_unroll_factor >= future_unroll_ct)) {
795 int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor;
796 if (new_limit > LoopUnrollLimit) {
797 #ifndef PRODUCT
798 if (TraceSuperWordLoopUnrollAnalysis) {
799 tty->print_cr("slp analysis is applying unroll limit %d, the original limit was %d\n",
800 new_limit, _local_loop_unroll_limit);
801 }
802 #endif
803 _local_loop_unroll_limit = new_limit;
804 }
805 }
806 }
807 }
808
809 //------------------------------policy_align-----------------------------------
810 // Return TRUE or FALSE if the loop should be cache-line aligned. Gather the
811 // expression that does the alignment. Note that only one array base can be
812 // aligned in a loop (unless the VM guarantees mutual alignment). Note that
813 // if we vectorize short memory ops into longer memory ops, we may want to
814 // increase alignment.
815 bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const {
816 return false;
817 }
818
819 //------------------------------policy_range_check-----------------------------
820 // Return TRUE or FALSE if the loop should be range-check-eliminated.
821 // Actually we do iteration-splitting, a more powerful form of RCE.
822 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const {
823 if (!RangeCheckElimination) return false;
824
825 CountedLoopNode *cl = _head->as_CountedLoop();
826 // If we unrolled with no intention of doing RCE and we later
827 // changed our minds, we got no pre-loop. Either we need to
828 // make a new pre-loop, or we gotta disallow RCE.
1616 }
1617
1618 Node* trip_phi = loop_head->phi();
1619 for (DUIterator_Fast imax, i = loop_head->fast_outs(imax); i < imax; i++) {
1620 Node* phi = loop_head->fast_out(i);
1621 if (phi->is_Phi() && phi->outcnt() > 0 && phi != trip_phi) {
1622 // For definitions which are loop inclusive and not tripcounts.
1623 Node* def_node = phi->in(LoopNode::LoopBackControl);
1624
1625 if (def_node != NULL) {
1626 Node* n_ctrl = get_ctrl(def_node);
1627 if (n_ctrl != NULL && loop->is_member(get_loop(n_ctrl))) {
1628 // Now test it to see if it fits the standard pattern for a reduction operator.
1629 int opc = def_node->Opcode();
1630 if (opc != ReductionNode::opcode(opc, def_node->bottom_type()->basic_type())) {
1631 if (!def_node->is_reduction()) { // Not marked yet
1632 // To be a reduction, the arithmetic node must have the phi as input and provide a def to it
1633 for (unsigned j = 1; j < def_node->req(); j++) {
1634 Node* in = def_node->in(j);
1635 if (in == phi) {
1636 loop_head->mark_has_reductions();
1637 def_node->add_flag(Node::Flag_is_reduction);
1638 break;
1639 }
1640 }
1641 }
1642 }
1643 }
1644 }
1645 }
1646 }
1647 }
1648
1649 //------------------------------dominates_backedge---------------------------------
1650 // Returns true if ctrl is executed on every complete iteration
1651 bool IdealLoopTree::dominates_backedge(Node* ctrl) {
1652 assert(ctrl->is_CFG(), "must be control");
1653 Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl);
1654 return _phase->dom_lca_internal(ctrl, backedge) == ctrl;
1655 }
1656
2467
2468 // If we have any of these conditions (RCE, alignment, unrolling) met, then
2469 // we switch to the pre-/main-/post-loop model. This model also covers
2470 // peeling.
2471 if (should_rce || should_align || should_unroll) {
2472 if (cl->is_normal_loop()) // Convert to 'pre/main/post' loops
2473 phase->insert_pre_post_loops(this,old_new, !may_rce_align);
2474
2475 // Adjust the pre- and main-loop limits to let the pre and post loops run
2476 // with full checks, but the main-loop with no checks. Remove said
2477 // checks from the main body.
2478 if (should_rce)
2479 phase->do_range_check(this,old_new);
2480
2481 // Double loop body for unrolling. Adjust the minimum-trip test (will do
2482 // twice as many iterations as before) and the main body limit (only do
2483 // an even number of trips). If we are peeling, we might enable some RCE
2484 // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
2485 // peeling.
2486 if (should_unroll && !should_peel) {
2487 phase->do_unroll(this, old_new, true);
2488 }
2489
2490 // Adjust the pre-loop limits to align the main body
2491 // iterations.
2492 if (should_align)
2493 Unimplemented();
2494
2495 } else { // Else we have an unchanged counted loop
2496 if (should_peel) // Might want to peel but do nothing else
2497 phase->do_peeling(this,old_new);
2498 }
2499 return true;
2500 }
2501
2502
2503 //=============================================================================
2504 //------------------------------iteration_split--------------------------------
2505 bool IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) {
2506 // Recursively iteration split nested loops
|