263 Node *n = _body.at(i); 264 for (int j = 0; j < 5; j++) { 265 Node* nn = reassociate_add_sub(n, phase); 266 if (nn == NULL) break; 267 n = nn; // again 268 }; 269 } 270 } 271 272 //------------------------------policy_peeling--------------------------------- 273 // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can 274 // make some loop-invariant test (usually a null-check) happen before the loop. 275 bool IdealLoopTree::policy_peeling( PhaseIdealLoop *phase ) const { 276 Node *test = ((IdealLoopTree*)this)->tail(); 277 int body_size = ((IdealLoopTree*)this)->_body.size(); 278 // Peeling does loop cloning which can result in O(N^2) node construction 279 if( body_size > 255 /* Prevent overflow for large body_size */ 280 || (body_size * body_size + phase->C->live_nodes()) > phase->C->max_node_limit() ) { 281 return false; // too large to safely clone 282 } 283 while( test != _head ) { // Scan till run off top of loop 284 if( test->is_If() ) { // Test? 285 Node *ctrl = phase->get_ctrl(test->in(1)); 286 if (ctrl->is_top()) 287 return false; // Found dead test on live IF? No peeling! 288 // Standard IF only has one input value to check for loop invariance 289 assert( test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added"); 290 // Condition is not a member of this loop? 291 if( !is_member(phase->get_loop(ctrl)) && 292 is_loop_exit(test) ) 293 return true; // Found reason to peel! 294 } 295 // Walk up dominators to loop _head looking for test which is 296 // executed on every path thru loop. 297 test = phase->idom(test); 298 } 299 return false; 300 } 301 302 //------------------------------peeled_dom_test_elim--------------------------- 639 640 641 //------------------------------policy_unroll---------------------------------- 642 // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if 643 // the loop is a CountedLoop and the body is small enough. 644 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) { 645 646 CountedLoopNode *cl = _head->as_CountedLoop(); 647 assert(cl->is_normal_loop() || cl->is_main_loop(), ""); 648 649 if (!cl->is_valid_counted_loop()) 650 return false; // Malformed counted loop 651 652 // Protect against over-unrolling. 653 // After split at least one iteration will be executed in pre-loop. 654 if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false; 655 656 _local_loop_unroll_limit = LoopUnrollLimit; 657 _local_loop_unroll_factor = 4; 658 int future_unroll_ct = cl->unrolled_count() * 2; 659 if (future_unroll_ct > LoopMaxUnroll) return false; 660 661 // Check for initial stride being a small enough constant 662 if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false; 663 664 // Don't unroll if the next round of unrolling would push us 665 // over the expected trip count of the loop. One is subtracted 666 // from the expected trip count because the pre-loop normally 667 // executes 1 iteration. 668 if (UnrollLimitForProfileCheck > 0 && 669 cl->profile_trip_cnt() != COUNT_UNKNOWN && 670 future_unroll_ct > UnrollLimitForProfileCheck && 671 (float)future_unroll_ct > cl->profile_trip_cnt() - 1.0) { 672 return false; 673 } 674 675 // When unroll count is greater than LoopUnrollMin, don't unroll if: 676 // the residual iterations are more than 10% of the trip count 677 // and rounds of "unroll,optimize" are not making significant progress 678 // Progress defined as current size less than 20% larger than previous size. 679 if (UseSuperWord && cl->node_count_before_unroll() > 0 && 742 case Op_FastLock: 743 case Op_FastUnlock: { 744 // Don't unroll RTM locking code because it is large. 745 if (UseRTMLocking) { 746 return false; 747 } 748 } 749 #endif 750 } // switch 751 } 752 753 if (UseSuperWord) { 754 if (!cl->is_reduction_loop()) { 755 phase->mark_reductions(this); 756 } 757 758 // Only attempt slp analysis when user controls do not prohibit it 759 if (LoopMaxUnroll > _local_loop_unroll_factor) { 760 // Once policy_slp_analysis succeeds, mark the loop with the 761 // maximal unroll factor so that we minimize analysis passes 762 if ((future_unroll_ct > _local_loop_unroll_factor) || 763 (body_size > (uint)_local_loop_unroll_limit)) { 764 policy_unroll_slp_analysis(cl, phase, future_unroll_ct); 765 } 766 } 767 } 768 769 // Check for being too big 770 if (body_size > (uint)_local_loop_unroll_limit) { 771 if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true; 772 // Normal case: loop too big 773 return false; 774 } 775 776 // Unroll once! (Each trip will soon do double iterations) 777 return true; 778 } 779 780 void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct) { 781 // Enable this functionality target by target as needed 782 if (SuperWordLoopUnrollAnalysis) { 783 if (!cl->has_passed_slp()) { 784 SuperWord sw(phase); 785 sw.transform_loop(this, false); 786 787 // If the loop is slp canonical analyze it 788 if (sw.early_return() == false) { 789 sw.unrolling_analysis(cl, _local_loop_unroll_factor); 790 } 791 } 792 793 int slp_max_unroll_factor = cl->slp_max_unroll(); 794 if ((slp_max_unroll_factor > 4) && 795 (slp_max_unroll_factor >= future_unroll_ct)) { 796 int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor; 797 if (new_limit > LoopUnrollLimit) { 798 #ifndef PRODUCT 799 if (TraceSuperWordLoopUnrollAnalysis) { 800 tty->print_cr("slp analysis is applying unroll limit %d, the original limit was %d\n", 801 new_limit, _local_loop_unroll_limit); 802 } 803 #endif 804 _local_loop_unroll_limit = new_limit; 805 } 806 } 807 } 808 } 809 810 //------------------------------policy_align----------------------------------- 811 // Return TRUE or FALSE if the loop should be cache-line aligned. Gather the 812 // expression that does the alignment. Note that only one array base can be 813 // aligned in a loop (unless the VM guarantees mutual alignment). Note that 814 // if we vectorize short memory ops into longer memory ops, we may want to 815 // increase alignment. 816 bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const { 817 return false; 818 } 819 820 //------------------------------policy_range_check----------------------------- 821 // Return TRUE or FALSE if the loop should be range-check-eliminated. 822 // Actually we do iteration-splitting, a more powerful form of RCE. 823 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const { 824 if (!RangeCheckElimination) return false; 825 826 CountedLoopNode *cl = _head->as_CountedLoop(); 827 // If we unrolled with no intention of doing RCE and we later 828 // changed our minds, we got no pre-loop. Either we need to 829 // make a new pre-loop, or we gotta disallow RCE. 830 if (cl->is_main_no_pre_loop()) return false; // Disallowed for now. 831 Node *trip_counter = cl->phi(); 832 833 // Check loop body for tests of trip-counter plus loop-invariant vs 834 // loop-invariant. 835 for (uint i = 0; i < _body.size(); i++) { 836 Node *iff = _body[i]; 837 if (iff->Opcode() == Op_If) { // Test? 838 839 // Comparing trip+off vs limit 840 Node *bol = iff->in(1); 841 if (bol->req() != 2) continue; // dead constant test 842 if (!bol->is_Bool()) { 843 assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only"); 844 continue; 845 } 846 if (bol->as_Bool()->_test._test == BoolTest::ne) 847 continue; // not RC 848 849 Node *cmp = bol->in(1); 850 Node *rc_exp = cmp->in(1); 851 Node *limit = cmp->in(2); 852 863 } 864 865 if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) { 866 continue; 867 } 868 // Yeah! Found a test like 'trip+off vs limit' 869 // Test is an IfNode, has 2 projections. If BOTH are in the loop 870 // we need loop unswitching instead of iteration splitting. 871 if( is_loop_exit(iff) ) 872 return true; // Found reason to split iterations 873 } // End of is IF 874 } 875 876 return false; 877 } 878 879 //------------------------------policy_peel_only------------------------------- 880 // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned. Useful 881 // for unrolling loops with NO array accesses. 882 bool IdealLoopTree::policy_peel_only( PhaseIdealLoop *phase ) const { 883 884 for( uint i = 0; i < _body.size(); i++ ) 885 if( _body[i]->is_Mem() ) 886 return false; 887 888 // No memory accesses at all! 889 return true; 890 } 891 892 //------------------------------clone_up_backedge_goo-------------------------- 893 // If Node n lives in the back_ctrl block and cannot float, we clone a private 894 // version of n in preheader_ctrl block and return that, otherwise return n. 895 Node *PhaseIdealLoop::clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones ) { 896 if( get_ctrl(n) != back_ctrl ) return n; 897 898 // Only visit once 899 if (visited.test_set(n->_idx)) { 900 Node *x = clones.find(n->_idx); 901 if (x != NULL) 902 return x; | 263 Node *n = _body.at(i); 264 for (int j = 0; j < 5; j++) { 265 Node* nn = reassociate_add_sub(n, phase); 266 if (nn == NULL) break; 267 n = nn; // again 268 }; 269 } 270 } 271 272 //------------------------------policy_peeling--------------------------------- 273 // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can 274 // make some loop-invariant test (usually a null-check) happen before the loop. 275 bool IdealLoopTree::policy_peeling( PhaseIdealLoop *phase ) const { 276 Node *test = ((IdealLoopTree*)this)->tail(); 277 int body_size = ((IdealLoopTree*)this)->_body.size(); 278 // Peeling does loop cloning which can result in O(N^2) node construction 279 if( body_size > 255 /* Prevent overflow for large body_size */ 280 || (body_size * body_size + phase->C->live_nodes()) > phase->C->max_node_limit() ) { 281 return false; // too large to safely clone 282 } 283 284 // check for vectorized loops, any peeling done was already applied 285 if (_head->is_CountedLoop() && _head->as_CountedLoop()->do_unroll_only()) return false; 286 287 while( test != _head ) { // Scan till run off top of loop 288 if( test->is_If() ) { // Test? 289 Node *ctrl = phase->get_ctrl(test->in(1)); 290 if (ctrl->is_top()) 291 return false; // Found dead test on live IF? No peeling! 292 // Standard IF only has one input value to check for loop invariance 293 assert( test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added"); 294 // Condition is not a member of this loop? 295 if( !is_member(phase->get_loop(ctrl)) && 296 is_loop_exit(test) ) 297 return true; // Found reason to peel! 298 } 299 // Walk up dominators to loop _head looking for test which is 300 // executed on every path thru loop. 301 test = phase->idom(test); 302 } 303 return false; 304 } 305 306 //------------------------------peeled_dom_test_elim--------------------------- 643 644 645 //------------------------------policy_unroll---------------------------------- 646 // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if 647 // the loop is a CountedLoop and the body is small enough. 648 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) { 649 650 CountedLoopNode *cl = _head->as_CountedLoop(); 651 assert(cl->is_normal_loop() || cl->is_main_loop(), ""); 652 653 if (!cl->is_valid_counted_loop()) 654 return false; // Malformed counted loop 655 656 // Protect against over-unrolling. 657 // After split at least one iteration will be executed in pre-loop. 658 if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false; 659 660 _local_loop_unroll_limit = LoopUnrollLimit; 661 _local_loop_unroll_factor = 4; 662 int future_unroll_ct = cl->unrolled_count() * 2; 663 if (!cl->do_unroll_only()) { 664 if (future_unroll_ct > LoopMaxUnroll) return false; 665 } else { 666 // obey user constraints on vector mapped loops with additional unrolling applied 667 if ((future_unroll_ct / cl->slp_max_unroll()) > LoopMaxUnroll) return false; 668 } 669 670 // Check for initial stride being a small enough constant 671 if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false; 672 673 // Don't unroll if the next round of unrolling would push us 674 // over the expected trip count of the loop. One is subtracted 675 // from the expected trip count because the pre-loop normally 676 // executes 1 iteration. 677 if (UnrollLimitForProfileCheck > 0 && 678 cl->profile_trip_cnt() != COUNT_UNKNOWN && 679 future_unroll_ct > UnrollLimitForProfileCheck && 680 (float)future_unroll_ct > cl->profile_trip_cnt() - 1.0) { 681 return false; 682 } 683 684 // When unroll count is greater than LoopUnrollMin, don't unroll if: 685 // the residual iterations are more than 10% of the trip count 686 // and rounds of "unroll,optimize" are not making significant progress 687 // Progress defined as current size less than 20% larger than previous size. 688 if (UseSuperWord && cl->node_count_before_unroll() > 0 && 751 case Op_FastLock: 752 case Op_FastUnlock: { 753 // Don't unroll RTM locking code because it is large. 754 if (UseRTMLocking) { 755 return false; 756 } 757 } 758 #endif 759 } // switch 760 } 761 762 if (UseSuperWord) { 763 if (!cl->is_reduction_loop()) { 764 phase->mark_reductions(this); 765 } 766 767 // Only attempt slp analysis when user controls do not prohibit it 768 if (LoopMaxUnroll > _local_loop_unroll_factor) { 769 // Once policy_slp_analysis succeeds, mark the loop with the 770 // maximal unroll factor so that we minimize analysis passes 771 if (future_unroll_ct >= _local_loop_unroll_factor) { 772 policy_unroll_slp_analysis(cl, phase, future_unroll_ct); 773 } 774 } 775 } 776 777 int slp_max_unroll_factor = cl->slp_max_unroll(); 778 if (cl->has_passed_slp()) { 779 if (slp_max_unroll_factor >= future_unroll_ct) return true; 780 // Normal case: loop too big 781 return false; 782 } 783 784 // Check for being too big 785 if (body_size > (uint)_local_loop_unroll_limit) { 786 if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true; 787 // Normal case: loop too big 788 return false; 789 } 790 791 if(cl->do_unroll_only()) { 792 NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("policy_unroll passed vector loop(vlen=%d,factor = %d)\n", slp_max_unroll_factor, future_unroll_ct)); 793 } 794 795 // Unroll once! (Each trip will soon do double iterations) 796 return true; 797 } 798 799 void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct) { 800 // Enable this functionality target by target as needed 801 if (SuperWordLoopUnrollAnalysis) { 802 if (!cl->was_slp_analyzed()) { 803 SuperWord sw(phase); 804 sw.transform_loop(this, false); 805 806 // If the loop is slp canonical analyze it 807 if (sw.early_return() == false) { 808 sw.unrolling_analysis(_local_loop_unroll_factor); 809 } 810 } 811 812 if (cl->has_passed_slp()) { 813 int slp_max_unroll_factor = cl->slp_max_unroll(); 814 if (slp_max_unroll_factor >= future_unroll_ct) { 815 int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor; 816 if (new_limit > LoopUnrollLimit) { 817 NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit)); 818 _local_loop_unroll_limit = new_limit; 819 } 820 } 821 } 822 } 823 } 824 825 //------------------------------policy_align----------------------------------- 826 // Return TRUE or FALSE if the loop should be cache-line aligned. Gather the 827 // expression that does the alignment. Note that only one array base can be 828 // aligned in a loop (unless the VM guarantees mutual alignment). Note that 829 // if we vectorize short memory ops into longer memory ops, we may want to 830 // increase alignment. 831 bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const { 832 return false; 833 } 834 835 //------------------------------policy_range_check----------------------------- 836 // Return TRUE or FALSE if the loop should be range-check-eliminated. 837 // Actually we do iteration-splitting, a more powerful form of RCE. 838 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const { 839 if (!RangeCheckElimination) return false; 840 841 CountedLoopNode *cl = _head->as_CountedLoop(); 842 // If we unrolled with no intention of doing RCE and we later 843 // changed our minds, we got no pre-loop. Either we need to 844 // make a new pre-loop, or we gotta disallow RCE. 845 if (cl->is_main_no_pre_loop()) return false; // Disallowed for now. 846 Node *trip_counter = cl->phi(); 847 848 // check for vectorized loops, some opts are no longer needed 849 if (cl->do_unroll_only()) return false; 850 851 // Check loop body for tests of trip-counter plus loop-invariant vs 852 // loop-invariant. 853 for (uint i = 0; i < _body.size(); i++) { 854 Node *iff = _body[i]; 855 if (iff->Opcode() == Op_If) { // Test? 856 857 // Comparing trip+off vs limit 858 Node *bol = iff->in(1); 859 if (bol->req() != 2) continue; // dead constant test 860 if (!bol->is_Bool()) { 861 assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only"); 862 continue; 863 } 864 if (bol->as_Bool()->_test._test == BoolTest::ne) 865 continue; // not RC 866 867 Node *cmp = bol->in(1); 868 Node *rc_exp = cmp->in(1); 869 Node *limit = cmp->in(2); 870 881 } 882 883 if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) { 884 continue; 885 } 886 // Yeah! Found a test like 'trip+off vs limit' 887 // Test is an IfNode, has 2 projections. If BOTH are in the loop 888 // we need loop unswitching instead of iteration splitting. 889 if( is_loop_exit(iff) ) 890 return true; // Found reason to split iterations 891 } // End of is IF 892 } 893 894 return false; 895 } 896 897 //------------------------------policy_peel_only------------------------------- 898 // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned. Useful 899 // for unrolling loops with NO array accesses. 900 bool IdealLoopTree::policy_peel_only( PhaseIdealLoop *phase ) const { 901 // check for vectorized loops, any peeling done was already applied 902 if (_head->is_CountedLoop() && _head->as_CountedLoop()->do_unroll_only()) return false; 903 904 for( uint i = 0; i < _body.size(); i++ ) 905 if( _body[i]->is_Mem() ) 906 return false; 907 908 // No memory accesses at all! 909 return true; 910 } 911 912 //------------------------------clone_up_backedge_goo-------------------------- 913 // If Node n lives in the back_ctrl block and cannot float, we clone a private 914 // version of n in preheader_ctrl block and return that, otherwise return n. 915 Node *PhaseIdealLoop::clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones ) { 916 if( get_ctrl(n) != back_ctrl ) return n; 917 918 // Only visit once 919 if (visited.test_set(n->_idx)) { 920 Node *x = clones.find(n->_idx); 921 if (x != NULL) 922 return x; |