< prev index next >

src/share/vm/opto/loopTransform.cpp

Print this page




 263     Node *n = _body.at(i);
 264     for (int j = 0; j < 5; j++) {
 265       Node* nn = reassociate_add_sub(n, phase);
 266       if (nn == NULL) break;
 267       n = nn; // again
 268     };
 269   }
 270 }
 271 
 272 //------------------------------policy_peeling---------------------------------
 273 // Return TRUE or FALSE if the loop should be peeled or not.  Peel if we can
 274 // make some loop-invariant test (usually a null-check) happen before the loop.
 275 bool IdealLoopTree::policy_peeling( PhaseIdealLoop *phase ) const {
 276   Node *test = ((IdealLoopTree*)this)->tail();
 277   int  body_size = ((IdealLoopTree*)this)->_body.size();
 278   // Peeling does loop cloning which can result in O(N^2) node construction
 279   if( body_size > 255 /* Prevent overflow for large body_size */
 280       || (body_size * body_size + phase->C->live_nodes()) > phase->C->max_node_limit() ) {
 281     return false;           // too large to safely clone
 282   }




 283   while( test != _head ) {      // Scan till run off top of loop
 284     if( test->is_If() ) {       // Test?
 285       Node *ctrl = phase->get_ctrl(test->in(1));
 286       if (ctrl->is_top())
 287         return false;           // Found dead test on live IF?  No peeling!
 288       // Standard IF only has one input value to check for loop invariance
 289       assert( test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added");
 290       // Condition is not a member of this loop?
 291       if( !is_member(phase->get_loop(ctrl)) &&
 292           is_loop_exit(test) )
 293         return true;            // Found reason to peel!
 294     }
 295     // Walk up dominators to loop _head looking for test which is
 296     // executed on every path thru loop.
 297     test = phase->idom(test);
 298   }
 299   return false;
 300 }
 301 
 302 //------------------------------peeled_dom_test_elim---------------------------


 639 
 640 
 641 //------------------------------policy_unroll----------------------------------
 642 // Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if
 643 // the loop is a CountedLoop and the body is small enough.
 644 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
 645 
 646   CountedLoopNode *cl = _head->as_CountedLoop();
 647   assert(cl->is_normal_loop() || cl->is_main_loop(), "");
 648 
 649   if (!cl->is_valid_counted_loop())
 650     return false; // Malformed counted loop
 651 
 652   // Protect against over-unrolling.
 653   // After split at least one iteration will be executed in pre-loop.
 654   if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
 655 
 656   _local_loop_unroll_limit = LoopUnrollLimit;
 657   _local_loop_unroll_factor = 4;
 658   int future_unroll_ct = cl->unrolled_count() * 2;

 659   if (future_unroll_ct > LoopMaxUnroll) return false;




 660 
 661   // Check for initial stride being a small enough constant
 662   if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false;
 663 
 664   // Don't unroll if the next round of unrolling would push us
 665   // over the expected trip count of the loop.  One is subtracted
 666   // from the expected trip count because the pre-loop normally
 667   // executes 1 iteration.
 668   if (UnrollLimitForProfileCheck > 0 &&
 669       cl->profile_trip_cnt() != COUNT_UNKNOWN &&
 670       future_unroll_ct        > UnrollLimitForProfileCheck &&
 671       (float)future_unroll_ct > cl->profile_trip_cnt() - 1.0) {
 672     return false;
 673   }
 674 
 675   // When unroll count is greater than LoopUnrollMin, don't unroll if:
 676   //   the residual iterations are more than 10% of the trip count
 677   //   and rounds of "unroll,optimize" are not making significant progress
 678   //   Progress defined as current size less than 20% larger than previous size.
 679   if (UseSuperWord && cl->node_count_before_unroll() > 0 &&


 742       case Op_FastLock:
 743       case Op_FastUnlock: {
 744         // Don't unroll RTM locking code because it is large.
 745         if (UseRTMLocking) {
 746           return false;
 747         }
 748       }
 749 #endif
 750     } // switch
 751   }
 752 
 753   if (UseSuperWord) {
 754     if (!cl->is_reduction_loop()) {
 755       phase->mark_reductions(this);
 756     }
 757 
 758     // Only attempt slp analysis when user controls do not prohibit it
 759     if (LoopMaxUnroll > _local_loop_unroll_factor) {
 760       // Once policy_slp_analysis succeeds, mark the loop with the
 761       // maximal unroll factor so that we minimize analysis passes
 762       if ((future_unroll_ct > _local_loop_unroll_factor) ||
 763           (body_size > (uint)_local_loop_unroll_limit)) {
 764         policy_unroll_slp_analysis(cl, phase, future_unroll_ct);
 765       }
 766     }
 767   }
 768 







 769   // Check for being too big
 770   if (body_size > (uint)_local_loop_unroll_limit) {
 771     if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
 772     // Normal case: loop too big
 773     return false;
 774   }
 775 




 776   // Unroll once!  (Each trip will soon do double iterations)
 777   return true;
 778 }
 779 
 780 void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct) {
 781   // Enable this functionality target by target as needed
 782   if (SuperWordLoopUnrollAnalysis) {
 783     if (!cl->has_passed_slp()) {
 784       SuperWord sw(phase);
 785       sw.transform_loop(this, false);
 786 
 787       // If the loop is slp canonical analyze it
 788       if (sw.early_return() == false) {
 789         sw.unrolling_analysis(cl, _local_loop_unroll_factor);
 790       }
 791     }
 792 

 793     int slp_max_unroll_factor = cl->slp_max_unroll();
 794     if ((slp_max_unroll_factor > 4) &&
 795         (slp_max_unroll_factor >= future_unroll_ct)) {
 796       int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor;
 797       if (new_limit > LoopUnrollLimit) {
 798 #ifndef PRODUCT
 799         if (TraceSuperWordLoopUnrollAnalysis) {
 800           tty->print_cr("slp analysis is applying unroll limit  %d, the original limit was %d\n",
 801             new_limit, _local_loop_unroll_limit);
 802         }
 803 #endif
 804         _local_loop_unroll_limit = new_limit;
 805       }
 806     }
 807   }

 808 }
 809 
 810 //------------------------------policy_align-----------------------------------
 811 // Return TRUE or FALSE if the loop should be cache-line aligned.  Gather the
 812 // expression that does the alignment.  Note that only one array base can be
 813 // aligned in a loop (unless the VM guarantees mutual alignment).  Note that
 814 // if we vectorize short memory ops into longer memory ops, we may want to
 815 // increase alignment.
 816 bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const {
 817   return false;
 818 }
 819 
 820 //------------------------------policy_range_check-----------------------------
 821 // Return TRUE or FALSE if the loop should be range-check-eliminated.
 822 // Actually we do iteration-splitting, a more powerful form of RCE.
 823 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const {
 824   if (!RangeCheckElimination) return false;
 825 
 826   CountedLoopNode *cl = _head->as_CountedLoop();
 827   // If we unrolled with no intention of doing RCE and we later
 828   // changed our minds, we got no pre-loop.  Either we need to
 829   // make a new pre-loop, or we gotta disallow RCE.
 830   if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
 831   Node *trip_counter = cl->phi();
 832 



 833   // Check loop body for tests of trip-counter plus loop-invariant vs
 834   // loop-invariant.
 835   for (uint i = 0; i < _body.size(); i++) {
 836     Node *iff = _body[i];
 837     if (iff->Opcode() == Op_If) { // Test?
 838 
 839       // Comparing trip+off vs limit
 840       Node *bol = iff->in(1);
 841       if (bol->req() != 2) continue; // dead constant test
 842       if (!bol->is_Bool()) {
 843         assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only");
 844         continue;
 845       }
 846       if (bol->as_Bool()->_test._test == BoolTest::ne)
 847         continue; // not RC
 848 
 849       Node *cmp = bol->in(1);
 850       Node *rc_exp = cmp->in(1);
 851       Node *limit = cmp->in(2);
 852 


 863       }
 864 
 865       if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
 866         continue;
 867       }
 868       // Yeah!  Found a test like 'trip+off vs limit'
 869       // Test is an IfNode, has 2 projections.  If BOTH are in the loop
 870       // we need loop unswitching instead of iteration splitting.
 871       if( is_loop_exit(iff) )
 872         return true;            // Found reason to split iterations
 873     } // End of is IF
 874   }
 875 
 876   return false;
 877 }
 878 
 879 //------------------------------policy_peel_only-------------------------------
 880 // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned.  Useful
 881 // for unrolling loops with NO array accesses.
 882 bool IdealLoopTree::policy_peel_only( PhaseIdealLoop *phase ) const {


 883 
 884   for( uint i = 0; i < _body.size(); i++ )
 885     if( _body[i]->is_Mem() )
 886       return false;
 887 
 888   // No memory accesses at all!
 889   return true;
 890 }
 891 
 892 //------------------------------clone_up_backedge_goo--------------------------
 893 // If Node n lives in the back_ctrl block and cannot float, we clone a private
 894 // version of n in preheader_ctrl block and return that, otherwise return n.
 895 Node *PhaseIdealLoop::clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones ) {
 896   if( get_ctrl(n) != back_ctrl ) return n;
 897 
 898   // Only visit once
 899   if (visited.test_set(n->_idx)) {
 900     Node *x = clones.find(n->_idx);
 901     if (x != NULL)
 902       return x;




 263     Node *n = _body.at(i);
 264     for (int j = 0; j < 5; j++) {
 265       Node* nn = reassociate_add_sub(n, phase);
 266       if (nn == NULL) break;
 267       n = nn; // again
 268     };
 269   }
 270 }
 271 
 272 //------------------------------policy_peeling---------------------------------
 273 // Return TRUE or FALSE if the loop should be peeled or not.  Peel if we can
 274 // make some loop-invariant test (usually a null-check) happen before the loop.
 275 bool IdealLoopTree::policy_peeling( PhaseIdealLoop *phase ) const {
 276   Node *test = ((IdealLoopTree*)this)->tail();
 277   int  body_size = ((IdealLoopTree*)this)->_body.size();
 278   // Peeling does loop cloning which can result in O(N^2) node construction
 279   if( body_size > 255 /* Prevent overflow for large body_size */
 280       || (body_size * body_size + phase->C->live_nodes()) > phase->C->max_node_limit() ) {
 281     return false;           // too large to safely clone
 282   }
 283 
 284   // check for vectorized loops, any peeling done was already applied
 285   if (_head->is_CountedLoop() && _head->as_CountedLoop()->do_unroll_only()) return false;
 286 
 287   while( test != _head ) {      // Scan till run off top of loop
 288     if( test->is_If() ) {       // Test?
 289       Node *ctrl = phase->get_ctrl(test->in(1));
 290       if (ctrl->is_top())
 291         return false;           // Found dead test on live IF?  No peeling!
 292       // Standard IF only has one input value to check for loop invariance
 293       assert( test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added");
 294       // Condition is not a member of this loop?
 295       if( !is_member(phase->get_loop(ctrl)) &&
 296           is_loop_exit(test) )
 297         return true;            // Found reason to peel!
 298     }
 299     // Walk up dominators to loop _head looking for test which is
 300     // executed on every path thru loop.
 301     test = phase->idom(test);
 302   }
 303   return false;
 304 }
 305 
 306 //------------------------------peeled_dom_test_elim---------------------------


 643 
 644 
 645 //------------------------------policy_unroll----------------------------------
 646 // Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if
 647 // the loop is a CountedLoop and the body is small enough.
 648 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
 649 
 650   CountedLoopNode *cl = _head->as_CountedLoop();
 651   assert(cl->is_normal_loop() || cl->is_main_loop(), "");
 652 
 653   if (!cl->is_valid_counted_loop())
 654     return false; // Malformed counted loop
 655 
 656   // Protect against over-unrolling.
 657   // After split at least one iteration will be executed in pre-loop.
 658   if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
 659 
 660   _local_loop_unroll_limit = LoopUnrollLimit;
 661   _local_loop_unroll_factor = 4;
 662   int future_unroll_ct = cl->unrolled_count() * 2;
 663   if (!cl->do_unroll_only()) {
 664     if (future_unroll_ct > LoopMaxUnroll) return false;
 665   } else {
 666     // obey user constraints on vector mapped loops with additional unrolling applied
 667     if ((future_unroll_ct / cl->slp_max_unroll()) > LoopMaxUnroll) return false;
 668   }
 669 
 670   // Check for initial stride being a small enough constant
 671   if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false;
 672 
 673   // Don't unroll if the next round of unrolling would push us
 674   // over the expected trip count of the loop.  One is subtracted
 675   // from the expected trip count because the pre-loop normally
 676   // executes 1 iteration.
 677   if (UnrollLimitForProfileCheck > 0 &&
 678       cl->profile_trip_cnt() != COUNT_UNKNOWN &&
 679       future_unroll_ct        > UnrollLimitForProfileCheck &&
 680       (float)future_unroll_ct > cl->profile_trip_cnt() - 1.0) {
 681     return false;
 682   }
 683 
 684   // When unroll count is greater than LoopUnrollMin, don't unroll if:
 685   //   the residual iterations are more than 10% of the trip count
 686   //   and rounds of "unroll,optimize" are not making significant progress
 687   //   Progress defined as current size less than 20% larger than previous size.
 688   if (UseSuperWord && cl->node_count_before_unroll() > 0 &&


 751       case Op_FastLock:
 752       case Op_FastUnlock: {
 753         // Don't unroll RTM locking code because it is large.
 754         if (UseRTMLocking) {
 755           return false;
 756         }
 757       }
 758 #endif
 759     } // switch
 760   }
 761 
 762   if (UseSuperWord) {
 763     if (!cl->is_reduction_loop()) {
 764       phase->mark_reductions(this);
 765     }
 766 
 767     // Only attempt slp analysis when user controls do not prohibit it
 768     if (LoopMaxUnroll > _local_loop_unroll_factor) {
 769       // Once policy_slp_analysis succeeds, mark the loop with the
 770       // maximal unroll factor so that we minimize analysis passes
 771       if (future_unroll_ct >= _local_loop_unroll_factor) {

 772         policy_unroll_slp_analysis(cl, phase, future_unroll_ct);
 773       }
 774     }
 775   }
 776 
 777   int slp_max_unroll_factor = cl->slp_max_unroll();
 778   if (cl->has_passed_slp()) {
 779     if (slp_max_unroll_factor >= future_unroll_ct) return true;
 780     // Normal case: loop too big
 781     return false;
 782   }
 783 
 784   // Check for being too big
 785   if (body_size > (uint)_local_loop_unroll_limit) {
 786     if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
 787     // Normal case: loop too big
 788     return false;
 789   }
 790 
 791   if(cl->do_unroll_only()) {
 792     NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("policy_unroll passed vector loop(vlen=%d,factor = %d)\n", slp_max_unroll_factor, future_unroll_ct));
 793   }
 794 
 795   // Unroll once!  (Each trip will soon do double iterations)
 796   return true;
 797 }
 798 
 799 void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct) {
 800   // Enable this functionality target by target as needed
 801   if (SuperWordLoopUnrollAnalysis) {
 802     if (!cl->was_slp_analyzed()) {
 803       SuperWord sw(phase);
 804       sw.transform_loop(this, false);
 805 
 806       // If the loop is slp canonical analyze it
 807       if (sw.early_return() == false) {
 808         sw.unrolling_analysis(_local_loop_unroll_factor);
 809       }
 810     }
 811 
 812     if (cl->has_passed_slp()) {
 813       int slp_max_unroll_factor = cl->slp_max_unroll();
 814       if (slp_max_unroll_factor >= future_unroll_ct) {

 815         int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor;
 816         if (new_limit > LoopUnrollLimit) {
 817           NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit));





 818           _local_loop_unroll_limit = new_limit;
 819         }
 820       }
 821     }
 822   }
 823 }
 824 
 825 //------------------------------policy_align-----------------------------------
 826 // Return TRUE or FALSE if the loop should be cache-line aligned.  Gather the
 827 // expression that does the alignment.  Note that only one array base can be
 828 // aligned in a loop (unless the VM guarantees mutual alignment).  Note that
 829 // if we vectorize short memory ops into longer memory ops, we may want to
 830 // increase alignment.
 831 bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const {
 832   return false;
 833 }
 834 
 835 //------------------------------policy_range_check-----------------------------
 836 // Return TRUE or FALSE if the loop should be range-check-eliminated.
 837 // Actually we do iteration-splitting, a more powerful form of RCE.
 838 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const {
 839   if (!RangeCheckElimination) return false;
 840 
 841   CountedLoopNode *cl = _head->as_CountedLoop();
 842   // If we unrolled with no intention of doing RCE and we later
 843   // changed our minds, we got no pre-loop.  Either we need to
 844   // make a new pre-loop, or we gotta disallow RCE.
 845   if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
 846   Node *trip_counter = cl->phi();
 847 
 848   // check for vectorized loops, some opts are no longer needed
 849   if (cl->do_unroll_only()) return false;
 850 
 851   // Check loop body for tests of trip-counter plus loop-invariant vs
 852   // loop-invariant.
 853   for (uint i = 0; i < _body.size(); i++) {
 854     Node *iff = _body[i];
 855     if (iff->Opcode() == Op_If) { // Test?
 856 
 857       // Comparing trip+off vs limit
 858       Node *bol = iff->in(1);
 859       if (bol->req() != 2) continue; // dead constant test
 860       if (!bol->is_Bool()) {
 861         assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only");
 862         continue;
 863       }
 864       if (bol->as_Bool()->_test._test == BoolTest::ne)
 865         continue; // not RC
 866 
 867       Node *cmp = bol->in(1);
 868       Node *rc_exp = cmp->in(1);
 869       Node *limit = cmp->in(2);
 870 


 881       }
 882 
 883       if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
 884         continue;
 885       }
 886       // Yeah!  Found a test like 'trip+off vs limit'
 887       // Test is an IfNode, has 2 projections.  If BOTH are in the loop
 888       // we need loop unswitching instead of iteration splitting.
 889       if( is_loop_exit(iff) )
 890         return true;            // Found reason to split iterations
 891     } // End of is IF
 892   }
 893 
 894   return false;
 895 }
 896 
 897 //------------------------------policy_peel_only-------------------------------
 898 // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned.  Useful
 899 // for unrolling loops with NO array accesses.
 900 bool IdealLoopTree::policy_peel_only( PhaseIdealLoop *phase ) const {
 901   // check for vectorized loops, any peeling done was already applied
 902   if (_head->is_CountedLoop() && _head->as_CountedLoop()->do_unroll_only()) return false;
 903 
 904   for( uint i = 0; i < _body.size(); i++ )
 905     if( _body[i]->is_Mem() )
 906       return false;
 907 
 908   // No memory accesses at all!
 909   return true;
 910 }
 911 
 912 //------------------------------clone_up_backedge_goo--------------------------
 913 // If Node n lives in the back_ctrl block and cannot float, we clone a private
 914 // version of n in preheader_ctrl block and return that, otherwise return n.
 915 Node *PhaseIdealLoop::clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones ) {
 916   if( get_ctrl(n) != back_ctrl ) return n;
 917 
 918   // Only visit once
 919   if (visited.test_set(n->_idx)) {
 920     Node *x = clones.find(n->_idx);
 921     if (x != NULL)
 922       return x;


< prev index next >