< prev index next >

src/share/vm/opto/loopTransform.cpp

Print this page

        

@@ -36,10 +36,11 @@
 #include "opto/movenode.hpp"
 #include "opto/opaquenode.hpp"
 #include "opto/rootnode.hpp"
 #include "opto/runtime.hpp"
 #include "opto/subnode.hpp"
+#include "opto/superword.hpp"
 #include "opto/vectornode.hpp"
 
 //------------------------------is_loop_exit-----------------------------------
 // Given an IfNode, return the loop-exiting projection or NULL if both
 // arms remain in the loop.

@@ -638,11 +639,11 @@
 
 
 //------------------------------policy_unroll----------------------------------
 // Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if
 // the loop is a CountedLoop and the body is small enough.
-bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const {
+bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
 
   CountedLoopNode *cl = _head->as_CountedLoop();
   assert(cl->is_normal_loop() || cl->is_main_loop(), "");
 
   if (!cl->is_valid_counted_loop())

@@ -650,10 +651,12 @@
 
   // Protect against over-unrolling.
   // After split at least one iteration will be executed in pre-loop.
   if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
 
+  _local_loop_unroll_limit = LoopUnrollLimit;
+  _local_loop_unroll_factor = 4;
   int future_unroll_ct = cl->unrolled_count() * 2;
   if (future_unroll_ct > LoopMaxUnroll) return false;
 
   // Check for initial stride being a small enough constant
   if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false;

@@ -745,21 +748,66 @@
       }
 #endif
     } // switch
   }
 
+  if (UseSuperWord) {
+    if (!cl->is_reduction_loop()) {
+      phase->mark_reductions(this);
+    }
+
+    // Only attempt slp analysis when user controls do not prohibit it
+    if (LoopMaxUnroll > _local_loop_unroll_factor) {
+      // Once policy_slp_analysis succeeds, mark the loop with the
+      // maximal unroll factor so that we minimize analysis passes
+      if (future_unroll_ct > _local_loop_unroll_factor) {
+        policy_unroll_slp_analysis(cl, phase, future_unroll_ct);
+      }
+    }
+  }
+
   // Check for being too big
-  if (body_size > (uint)LoopUnrollLimit) {
+  if (body_size > (uint)_local_loop_unroll_limit) {
     if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
     // Normal case: loop too big
     return false;
   }
 
   // Unroll once!  (Each trip will soon do double iterations)
   return true;
 }
 
+void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct) {
+  // Enable this functionality target by target as needed
+  if (SuperWordLoopUnrollAnalysis) {
+    if (!cl->has_passed_slp()) {
+      SuperWord sw(phase);
+      sw.transform_loop(this, false);
+
+      // If the loop is slp canonical analyze it
+      if (sw.early_return() == false) {
+        sw.unrolling_analysis(cl, _local_loop_unroll_factor);
+      }
+    }
+
+    int slp_max_unroll_factor = cl->slp_max_unroll();
+    if ((slp_max_unroll_factor > 4) &&
+      (slp_max_unroll_factor >= future_unroll_ct)) {
+      int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor;
+      if (new_limit > LoopUnrollLimit) {
+#ifndef PRODUCT
+        if (TraceSuperWordLoopUnrollAnalysis) {
+          tty->print_cr("slp analysis is applying unroll limit  %d, the original limit was %d\n",
+            new_limit, _local_loop_unroll_limit);
+        }
+#endif
+        _local_loop_unroll_limit = new_limit;
+      }
+    }
+  }
+}
+
 //------------------------------policy_align-----------------------------------
 // Return TRUE or FALSE if the loop should be cache-line aligned.  Gather the
 // expression that does the alignment.  Note that only one array base can be
 // aligned in a loop (unless the VM guarantees mutual alignment).  Note that
 // if we vectorize short memory ops into longer memory ops, we may want to

@@ -1583,10 +1631,11 @@
             if (!def_node->is_reduction()) { // Not marked yet
               // To be a reduction, the arithmetic node must have the phi as input and provide a def to it
               for (unsigned j = 1; j < def_node->req(); j++) {
                 Node* in = def_node->in(j);
                 if (in == phi) {
+                  loop_head->mark_has_reductions();
                   def_node->add_flag(Node::Flag_is_reduction);
                   break;
                 }
               }
             }

@@ -2433,11 +2482,10 @@
     // twice as many iterations as before) and the main body limit (only do
     // an even number of trips).  If we are peeling, we might enable some RCE
     // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
     // peeling.
     if (should_unroll && !should_peel) {
-      phase->mark_reductions(this);
       phase->do_unroll(this, old_new, true);
     }
 
     // Adjust the pre-loop limits to align the main body
     // iterations.
< prev index next >