< prev index next >

src/share/vm/opto/loopTransform.cpp

Print this page




  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "compiler/compileLog.hpp"
  27 #include "memory/allocation.inline.hpp"
  28 #include "opto/addnode.hpp"
  29 #include "opto/callnode.hpp"
  30 #include "opto/castnode.hpp"
  31 #include "opto/connode.hpp"
  32 #include "opto/convertnode.hpp"
  33 #include "opto/divnode.hpp"
  34 #include "opto/loopnode.hpp"
  35 #include "opto/mulnode.hpp"
  36 #include "opto/movenode.hpp"
  37 #include "opto/opaquenode.hpp"
  38 #include "opto/rootnode.hpp"
  39 #include "opto/runtime.hpp"
  40 #include "opto/subnode.hpp"

  41 #include "opto/vectornode.hpp"
  42 
  43 //------------------------------is_loop_exit-----------------------------------
  44 // Given an IfNode, return the loop-exiting projection or NULL if both
  45 // arms remain in the loop.
  46 Node *IdealLoopTree::is_loop_exit(Node *iff) const {
  47   if( iff->outcnt() != 2 ) return NULL; // Ignore partially dead tests
  48   PhaseIdealLoop *phase = _phase;
  49   // Test is an IfNode, has 2 projections.  If BOTH are in the loop
  50   // we need loop unswitching instead of peeling.
  51   if( !is_member(phase->get_loop( iff->raw_out(0) )) )
  52     return iff->raw_out(0);
  53   if( !is_member(phase->get_loop( iff->raw_out(1) )) )
  54     return iff->raw_out(1);
  55   return NULL;
  56 }
  57 
  58 
  59 //=============================================================================
  60 


 623       }
 624 #if INCLUDE_RTM_OPT
 625       case Op_FastLock:
 626       case Op_FastUnlock: {
 627         // Don't unroll RTM locking code because it is large.
 628         if (UseRTMLocking) {
 629           return false;
 630         }
 631       }
 632 #endif
 633     } // switch
 634   }
 635 
 636   return true; // Do maximally unroll
 637 }
 638 
 639 
 640 //------------------------------policy_unroll----------------------------------
 641 // Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if
 642 // the loop is a CountedLoop and the body is small enough.
 643 bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const {
 644 
 645   CountedLoopNode *cl = _head->as_CountedLoop();
 646   assert(cl->is_normal_loop() || cl->is_main_loop(), "");
 647 
 648   if (!cl->is_valid_counted_loop())
 649     return false; // Malformed counted loop
 650 
 651   // Protect against over-unrolling.
 652   // After split at least one iteration will be executed in pre-loop.
 653   if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
 654 


 655   int future_unroll_ct = cl->unrolled_count() * 2;
 656   if (future_unroll_ct > LoopMaxUnroll) return false;
 657 
 658   // Check for initial stride being a small enough constant
 659   if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false;
 660 
 661   // Don't unroll if the next round of unrolling would push us
 662   // over the expected trip count of the loop.  One is subtracted
 663   // from the expected trip count because the pre-loop normally
 664   // executes 1 iteration.
 665   if (UnrollLimitForProfileCheck > 0 &&
 666       cl->profile_trip_cnt() != COUNT_UNKNOWN &&
 667       future_unroll_ct        > UnrollLimitForProfileCheck &&
 668       (float)future_unroll_ct > cl->profile_trip_cnt() - 1.0) {
 669     return false;
 670   }
 671 
 672   // When unroll count is greater than LoopUnrollMin, don't unroll if:
 673   //   the residual iterations are more than 10% of the trip count
 674   //   and rounds of "unroll,optimize" are not making significant progress


 730       case Op_StrEquals:
 731       case Op_StrIndexOf:
 732       case Op_EncodeISOArray:
 733       case Op_AryEq: {
 734         // Do not unroll a loop with String intrinsics code.
 735         // String intrinsics are large and have loops.
 736         return false;
 737       }
 738 #if INCLUDE_RTM_OPT
 739       case Op_FastLock:
 740       case Op_FastUnlock: {
 741         // Don't unroll RTM locking code because it is large.
 742         if (UseRTMLocking) {
 743           return false;
 744         }
 745       }
 746 #endif
 747     } // switch
 748   }
 749 















 750   // Check for being too big
 751   if (body_size > (uint)LoopUnrollLimit) {
 752     if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
 753     // Normal case: loop too big
 754     return false;
 755   }
 756 
 757   // Unroll once!  (Each trip will soon do double iterations)
 758   return true;
 759 }
 760 






























 761 //------------------------------policy_align-----------------------------------
 762 // Return TRUE or FALSE if the loop should be cache-line aligned.  Gather the
 763 // expression that does the alignment.  Note that only one array base can be
 764 // aligned in a loop (unless the VM guarantees mutual alignment).  Note that
 765 // if we vectorize short memory ops into longer memory ops, we may want to
 766 // increase alignment.
 767 bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const {
 768   return false;
 769 }
 770 
 771 //------------------------------policy_range_check-----------------------------
 772 // Return TRUE or FALSE if the loop should be range-check-eliminated.
 773 // Actually we do iteration-splitting, a more powerful form of RCE.
 774 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const {
 775   if (!RangeCheckElimination) return false;
 776 
 777   CountedLoopNode *cl = _head->as_CountedLoop();
 778   // If we unrolled with no intention of doing RCE and we later
 779   // changed our minds, we got no pre-loop.  Either we need to
 780   // make a new pre-loop, or we gotta disallow RCE.


1568   }
1569 
1570   Node* trip_phi = loop_head->phi();
1571   for (DUIterator_Fast imax, i = loop_head->fast_outs(imax); i < imax; i++) {
1572     Node* phi = loop_head->fast_out(i);
1573     if (phi->is_Phi() && phi->outcnt() > 0 && phi != trip_phi) {
1574       // For definitions which are loop inclusive and not tripcounts.
1575       Node* def_node = phi->in(LoopNode::LoopBackControl);
1576 
1577       if (def_node != NULL) {
1578         Node* n_ctrl = get_ctrl(def_node);
1579         if (n_ctrl != NULL && loop->is_member(get_loop(n_ctrl))) {
1580           // Now test it to see if it fits the standard pattern for a reduction operator.
1581           int opc = def_node->Opcode();
1582           if (opc != ReductionNode::opcode(opc, def_node->bottom_type()->basic_type())) {
1583             if (!def_node->is_reduction()) { // Not marked yet
1584               // To be a reduction, the arithmetic node must have the phi as input and provide a def to it
1585               for (unsigned j = 1; j < def_node->req(); j++) {
1586                 Node* in = def_node->in(j);
1587                 if (in == phi) {

1588                   def_node->add_flag(Node::Flag_is_reduction);
1589                   break;
1590                 }
1591               }
1592             }
1593           }
1594         }
1595       }
1596     }
1597   }
1598 }
1599 
1600 //------------------------------dominates_backedge---------------------------------
1601 // Returns true if ctrl is executed on every complete iteration
1602 bool IdealLoopTree::dominates_backedge(Node* ctrl) {
1603   assert(ctrl->is_CFG(), "must be control");
1604   Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl);
1605   return _phase->dom_lca_internal(ctrl, backedge) == ctrl;
1606 }
1607 


2418 
2419   // If we have any of these conditions (RCE, alignment, unrolling) met, then
2420   // we switch to the pre-/main-/post-loop model.  This model also covers
2421   // peeling.
2422   if (should_rce || should_align || should_unroll) {
2423     if (cl->is_normal_loop())  // Convert to 'pre/main/post' loops
2424       phase->insert_pre_post_loops(this,old_new, !may_rce_align);
2425 
2426     // Adjust the pre- and main-loop limits to let the pre and post loops run
2427     // with full checks, but the main-loop with no checks.  Remove said
2428     // checks from the main body.
2429     if (should_rce)
2430       phase->do_range_check(this,old_new);
2431 
2432     // Double loop body for unrolling.  Adjust the minimum-trip test (will do
2433     // twice as many iterations as before) and the main body limit (only do
2434     // an even number of trips).  If we are peeling, we might enable some RCE
2435     // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
2436     // peeling.
2437     if (should_unroll && !should_peel) {
2438       phase->mark_reductions(this);
2439       phase->do_unroll(this, old_new, true);
2440     }
2441 
2442     // Adjust the pre-loop limits to align the main body
2443     // iterations.
2444     if (should_align)
2445       Unimplemented();
2446 
2447   } else {                      // Else we have an unchanged counted loop
2448     if (should_peel)           // Might want to peel but do nothing else
2449       phase->do_peeling(this,old_new);
2450   }
2451   return true;
2452 }
2453 
2454 
2455 //=============================================================================
2456 //------------------------------iteration_split--------------------------------
2457 bool IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) {
2458   // Recursively iteration split nested loops




  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "compiler/compileLog.hpp"
  27 #include "memory/allocation.inline.hpp"
  28 #include "opto/addnode.hpp"
  29 #include "opto/callnode.hpp"
  30 #include "opto/castnode.hpp"
  31 #include "opto/connode.hpp"
  32 #include "opto/convertnode.hpp"
  33 #include "opto/divnode.hpp"
  34 #include "opto/loopnode.hpp"
  35 #include "opto/mulnode.hpp"
  36 #include "opto/movenode.hpp"
  37 #include "opto/opaquenode.hpp"
  38 #include "opto/rootnode.hpp"
  39 #include "opto/runtime.hpp"
  40 #include "opto/subnode.hpp"
  41 #include "opto/superword.hpp"
  42 #include "opto/vectornode.hpp"
  43 
  44 //------------------------------is_loop_exit-----------------------------------
  45 // Given an IfNode, return the loop-exiting projection or NULL if both
  46 // arms remain in the loop.
  47 Node *IdealLoopTree::is_loop_exit(Node *iff) const {
  48   if( iff->outcnt() != 2 ) return NULL; // Ignore partially dead tests
  49   PhaseIdealLoop *phase = _phase;
  50   // Test is an IfNode, has 2 projections.  If BOTH are in the loop
  51   // we need loop unswitching instead of peeling.
  52   if( !is_member(phase->get_loop( iff->raw_out(0) )) )
  53     return iff->raw_out(0);
  54   if( !is_member(phase->get_loop( iff->raw_out(1) )) )
  55     return iff->raw_out(1);
  56   return NULL;
  57 }
  58 
  59 
  60 //=============================================================================
  61 


 624       }
 625 #if INCLUDE_RTM_OPT
 626       case Op_FastLock:
 627       case Op_FastUnlock: {
 628         // Don't unroll RTM locking code because it is large.
 629         if (UseRTMLocking) {
 630           return false;
 631         }
 632       }
 633 #endif
 634     } // switch
 635   }
 636 
 637   return true; // Do maximally unroll
 638 }
 639 
 640 
 641 //------------------------------policy_unroll----------------------------------
 642 // Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if
 643 // the loop is a CountedLoop and the body is small enough.
 644 bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) {
 645 
 646   CountedLoopNode *cl = _head->as_CountedLoop();
 647   assert(cl->is_normal_loop() || cl->is_main_loop(), "");
 648 
 649   if (!cl->is_valid_counted_loop())
 650     return false; // Malformed counted loop
 651 
 652   // Protect against over-unrolling.
 653   // After split at least one iteration will be executed in pre-loop.
 654   if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
 655 
 656   _local_loop_unroll_limit = LoopUnrollLimit;
 657   _local_loop_unroll_factor = 4;
 658   int future_unroll_ct = cl->unrolled_count() * 2;
 659   if (future_unroll_ct > LoopMaxUnroll) return false;
 660 
 661   // Check for initial stride being a small enough constant
 662   if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false;
 663 
 664   // Don't unroll if the next round of unrolling would push us
 665   // over the expected trip count of the loop.  One is subtracted
 666   // from the expected trip count because the pre-loop normally
 667   // executes 1 iteration.
 668   if (UnrollLimitForProfileCheck > 0 &&
 669       cl->profile_trip_cnt() != COUNT_UNKNOWN &&
 670       future_unroll_ct        > UnrollLimitForProfileCheck &&
 671       (float)future_unroll_ct > cl->profile_trip_cnt() - 1.0) {
 672     return false;
 673   }
 674 
 675   // When unroll count is greater than LoopUnrollMin, don't unroll if:
 676   //   the residual iterations are more than 10% of the trip count
 677   //   and rounds of "unroll,optimize" are not making significant progress


 733       case Op_StrEquals:
 734       case Op_StrIndexOf:
 735       case Op_EncodeISOArray:
 736       case Op_AryEq: {
 737         // Do not unroll a loop with String intrinsics code.
 738         // String intrinsics are large and have loops.
 739         return false;
 740       }
 741 #if INCLUDE_RTM_OPT
 742       case Op_FastLock:
 743       case Op_FastUnlock: {
 744         // Don't unroll RTM locking code because it is large.
 745         if (UseRTMLocking) {
 746           return false;
 747         }
 748       }
 749 #endif
 750     } // switch
 751   }
 752 
 753   if (UseSuperWord) {
 754     if (!cl->is_reduction_loop()) {
 755       phase->mark_reductions(this);
 756     }
 757 
 758     // Only attempt slp analysis when user controls do not prohibit it
 759     if (LoopMaxUnroll > _local_loop_unroll_factor) {
 760       // Once policy_slp_analysis succeeds, mark the loop with the
 761       // maximal unroll factor so that we minimize analysis passes
 762       if (future_unroll_ct > _local_loop_unroll_factor) {
 763         policy_unroll_slp_analysis(cl, phase, future_unroll_ct);
 764       }
 765     }
 766   }
 767 
 768   // Check for being too big
 769   if (body_size > (uint)_local_loop_unroll_limit) {
 770     if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
 771     // Normal case: loop too big
 772     return false;
 773   }
 774 
 775   // Unroll once!  (Each trip will soon do double iterations)
 776   return true;
 777 }
 778 
 779 void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct) {
 780   // Enable this functionality target by target as needed
 781   if (SuperWordLoopUnrollAnalysis) {
 782     if (!cl->has_passed_slp()) {
 783       SuperWord sw(phase);
 784       sw.transform_loop(this, false);
 785 
 786       // If the loop is slp canonical analyze it
 787       if (sw.early_return() == false) {
 788         sw.unrolling_analysis(cl, _local_loop_unroll_factor);
 789       }
 790     }
 791 
 792     int slp_max_unroll_factor = cl->slp_max_unroll();
 793     if ((slp_max_unroll_factor > 4) &&
 794       (slp_max_unroll_factor >= future_unroll_ct)) {
 795       int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor;
 796       if (new_limit > LoopUnrollLimit) {
 797 #ifndef PRODUCT
 798         if (TraceSuperWordLoopUnrollAnalysis) {
 799           tty->print_cr("slp analysis is applying unroll limit  %d, the original limit was %d\n",
 800             new_limit, _local_loop_unroll_limit);
 801         }
 802 #endif
 803         _local_loop_unroll_limit = new_limit;
 804       }
 805     }
 806   }
 807 }
 808 
 809 //------------------------------policy_align-----------------------------------
 810 // Return TRUE or FALSE if the loop should be cache-line aligned.  Gather the
 811 // expression that does the alignment.  Note that only one array base can be
 812 // aligned in a loop (unless the VM guarantees mutual alignment).  Note that
 813 // if we vectorize short memory ops into longer memory ops, we may want to
 814 // increase alignment.
 815 bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const {
 816   return false;
 817 }
 818 
 819 //------------------------------policy_range_check-----------------------------
 820 // Return TRUE or FALSE if the loop should be range-check-eliminated.
 821 // Actually we do iteration-splitting, a more powerful form of RCE.
 822 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const {
 823   if (!RangeCheckElimination) return false;
 824 
 825   CountedLoopNode *cl = _head->as_CountedLoop();
 826   // If we unrolled with no intention of doing RCE and we later
 827   // changed our minds, we got no pre-loop.  Either we need to
 828   // make a new pre-loop, or we gotta disallow RCE.


1616   }
1617 
1618   Node* trip_phi = loop_head->phi();
1619   for (DUIterator_Fast imax, i = loop_head->fast_outs(imax); i < imax; i++) {
1620     Node* phi = loop_head->fast_out(i);
1621     if (phi->is_Phi() && phi->outcnt() > 0 && phi != trip_phi) {
1622       // For definitions which are loop inclusive and not tripcounts.
1623       Node* def_node = phi->in(LoopNode::LoopBackControl);
1624 
1625       if (def_node != NULL) {
1626         Node* n_ctrl = get_ctrl(def_node);
1627         if (n_ctrl != NULL && loop->is_member(get_loop(n_ctrl))) {
1628           // Now test it to see if it fits the standard pattern for a reduction operator.
1629           int opc = def_node->Opcode();
1630           if (opc != ReductionNode::opcode(opc, def_node->bottom_type()->basic_type())) {
1631             if (!def_node->is_reduction()) { // Not marked yet
1632               // To be a reduction, the arithmetic node must have the phi as input and provide a def to it
1633               for (unsigned j = 1; j < def_node->req(); j++) {
1634                 Node* in = def_node->in(j);
1635                 if (in == phi) {
1636                   loop_head->mark_has_reductions();
1637                   def_node->add_flag(Node::Flag_is_reduction);
1638                   break;
1639                 }
1640               }
1641             }
1642           }
1643         }
1644       }
1645     }
1646   }
1647 }
1648 
1649 //------------------------------dominates_backedge---------------------------------
1650 // Returns true if ctrl is executed on every complete iteration
1651 bool IdealLoopTree::dominates_backedge(Node* ctrl) {
1652   assert(ctrl->is_CFG(), "must be control");
1653   Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl);
1654   return _phase->dom_lca_internal(ctrl, backedge) == ctrl;
1655 }
1656 


2467 
2468   // If we have any of these conditions (RCE, alignment, unrolling) met, then
2469   // we switch to the pre-/main-/post-loop model.  This model also covers
2470   // peeling.
2471   if (should_rce || should_align || should_unroll) {
2472     if (cl->is_normal_loop())  // Convert to 'pre/main/post' loops
2473       phase->insert_pre_post_loops(this,old_new, !may_rce_align);
2474 
2475     // Adjust the pre- and main-loop limits to let the pre and post loops run
2476     // with full checks, but the main-loop with no checks.  Remove said
2477     // checks from the main body.
2478     if (should_rce)
2479       phase->do_range_check(this,old_new);
2480 
2481     // Double loop body for unrolling.  Adjust the minimum-trip test (will do
2482     // twice as many iterations as before) and the main body limit (only do
2483     // an even number of trips).  If we are peeling, we might enable some RCE
2484     // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
2485     // peeling.
2486     if (should_unroll && !should_peel) {

2487       phase->do_unroll(this, old_new, true);
2488     }
2489 
2490     // Adjust the pre-loop limits to align the main body
2491     // iterations.
2492     if (should_align)
2493       Unimplemented();
2494 
2495   } else {                      // Else we have an unchanged counted loop
2496     if (should_peel)           // Might want to peel but do nothing else
2497       phase->do_peeling(this,old_new);
2498   }
2499   return true;
2500 }
2501 
2502 
2503 //=============================================================================
2504 //------------------------------iteration_split--------------------------------
2505 bool IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) {
2506   // Recursively iteration split nested loops


< prev index next >