hotspot Cdiff src/share/vm/opto/loopTransform.cpp

src/share/vm/opto/loopTransform.cpp


*** 36,45 ****
--- 36,46 ----
  #include "opto/movenode.hpp"
  #include "opto/opaquenode.hpp"
  #include "opto/rootnode.hpp"
  #include "opto/runtime.hpp"
  #include "opto/subnode.hpp"
+ #include "opto/superword.hpp"
  #include "opto/vectornode.hpp"
  
  //------------------------------is_loop_exit-----------------------------------
  // Given an IfNode, return the loop-exiting projection or NULL if both
  // arms remain in the loop.
*** 638,648 ****
  
  
  //------------------------------policy_unroll----------------------------------
  // Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if
  // the loop is a CountedLoop and the body is small enough.
! bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const {
  
    CountedLoopNode *cl = _head->as_CountedLoop();
    assert(cl->is_normal_loop() || cl->is_main_loop(), "");
  
    if (!cl->is_valid_counted_loop())
--- 639,649 ----
  
  
  //------------------------------policy_unroll----------------------------------
  // Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if
  // the loop is a CountedLoop and the body is small enough.
! bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) {
  
    CountedLoopNode *cl = _head->as_CountedLoop();
    assert(cl->is_normal_loop() || cl->is_main_loop(), "");
  
    if (!cl->is_valid_counted_loop())
*** 650,659 ****
--- 651,662 ----
  
    // Protect against over-unrolling.
    // After split at least one iteration will be executed in pre-loop.
    if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
  
+   _local_loop_unroll_limit = LoopUnrollLimit;
+   _local_loop_unroll_factor = 4;
    int future_unroll_ct = cl->unrolled_count() * 2;
    if (future_unroll_ct > LoopMaxUnroll) return false;
  
    // Check for initial stride being a small enough constant
    if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false;
*** 745,765 ****
        }
  #endif
      } // switch
    }
  
    // Check for being too big
!   if (body_size > (uint)LoopUnrollLimit) {
      if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
      // Normal case: loop too big
      return false;
    }
  
    // Unroll once!  (Each trip will soon do double iterations)
    return true;
  }
  
  //------------------------------policy_align-----------------------------------
  // Return TRUE or FALSE if the loop should be cache-line aligned.  Gather the
  // expression that does the alignment.  Note that only one array base can be
  // aligned in a loop (unless the VM guarantees mutual alignment).  Note that
  // if we vectorize short memory ops into longer memory ops, we may want to
--- 748,813 ----
        }
  #endif
      } // switch
    }
  
+   if (UseSuperWord) {
+     if (!cl->is_reduction_loop()) {
+       phase->mark_reductions(this);
+     }
+ 
+     // Only attempt slp analysis when user controls do not prohibit it
+     if (LoopMaxUnroll > _local_loop_unroll_factor) {
+       // Once policy_slp_analysis succeeds, mark the loop with the
+       // maximal unroll factor so that we minimize analysis passes
+       if (future_unroll_ct > _local_loop_unroll_factor) {
+         policy_unroll_slp_analysis(cl, phase, future_unroll_ct);
+       }
+     }
+   }
+ 
    // Check for being too big
!   if (body_size > (uint)_local_loop_unroll_limit) {
      if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
      // Normal case: loop too big
      return false;
    }
  
    // Unroll once!  (Each trip will soon do double iterations)
    return true;
  }
  
+ void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct) {
+   // Enable this functionality target by target as needed
+   if (SuperWordLoopUnrollAnalysis) {
+     if (!cl->has_passed_slp()) {
+       SuperWord sw(phase);
+       sw.transform_loop(this, false);
+ 
+       // If the loop is slp canonical analyze it
+       if (sw.early_return() == false) {
+         sw.unrolling_analysis(cl, _local_loop_unroll_factor);
+       }
+     }
+ 
+     int slp_max_unroll_factor = cl->slp_max_unroll();
+     if ((slp_max_unroll_factor > 4) &&
+       (slp_max_unroll_factor >= future_unroll_ct)) {
+       int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor;
+       if (new_limit > LoopUnrollLimit) {
+ #ifndef PRODUCT
+         if (TraceSuperWordLoopUnrollAnalysis) {
+           tty->print_cr("slp analysis is applying unroll limit  %d, the original limit was %d\n",
+             new_limit, _local_loop_unroll_limit);
+         }
+ #endif
+         _local_loop_unroll_limit = new_limit;
+       }
+     }
+   }
+ }
+ 
  //------------------------------policy_align-----------------------------------
  // Return TRUE or FALSE if the loop should be cache-line aligned.  Gather the
  // expression that does the alignment.  Note that only one array base can be
  // aligned in a loop (unless the VM guarantees mutual alignment).  Note that
  // if we vectorize short memory ops into longer memory ops, we may want to
*** 1583,1592 ****
--- 1631,1641 ----
              if (!def_node->is_reduction()) { // Not marked yet
                // To be a reduction, the arithmetic node must have the phi as input and provide a def to it
                for (unsigned j = 1; j < def_node->req(); j++) {
                  Node* in = def_node->in(j);
                  if (in == phi) {
+                   loop_head->mark_has_reductions();
                    def_node->add_flag(Node::Flag_is_reduction);
                    break;
                  }
                }
              }
*** 2433,2443 ****
      // twice as many iterations as before) and the main body limit (only do
      // an even number of trips).  If we are peeling, we might enable some RCE
      // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
      // peeling.
      if (should_unroll && !should_peel) {
-       phase->mark_reductions(this);
        phase->do_unroll(this, old_new, true);
      }
  
      // Adjust the pre-loop limits to align the main body
      // iterations.
--- 2482,2491 ----
< prev index next >