< prev index next >

src/share/vm/opto/loopTransform.cpp

Print this page

        

@@ -278,10 +278,14 @@
   // Peeling does loop cloning which can result in O(N^2) node construction
   if( body_size > 255 /* Prevent overflow for large body_size */
       || (body_size * body_size + phase->C->live_nodes()) > phase->C->max_node_limit() ) {
     return false;           // too large to safely clone
   }
+
+  // check for vectorized loops, any peeling done was already applied
+  if (_head->is_CountedLoop() && _head->as_CountedLoop()->do_unroll_only()) return false;
+
   while( test != _head ) {      // Scan till run off top of loop
     if( test->is_If() ) {       // Test?
       Node *ctrl = phase->get_ctrl(test->in(1));
       if (ctrl->is_top())
         return false;           // Found dead test on live IF?  No peeling!

@@ -654,11 +658,16 @@
   if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
 
   _local_loop_unroll_limit = LoopUnrollLimit;
   _local_loop_unroll_factor = 4;
   int future_unroll_ct = cl->unrolled_count() * 2;
+  if (!cl->do_unroll_only()) {
   if (future_unroll_ct > LoopMaxUnroll) return false;
+  } else {
+    // obey user constraints on vector mapped loops with additional unrolling applied
+    if ((future_unroll_ct / cl->slp_max_unroll()) > LoopMaxUnroll) return false;
+  }
 
   // Check for initial stride being a small enough constant
   if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false;
 
   // Don't unroll if the next round of unrolling would push us

@@ -757,56 +766,62 @@
 
     // Only attempt slp analysis when user controls do not prohibit it
     if (LoopMaxUnroll > _local_loop_unroll_factor) {
       // Once policy_slp_analysis succeeds, mark the loop with the
       // maximal unroll factor so that we minimize analysis passes
-      if ((future_unroll_ct > _local_loop_unroll_factor) ||
-          (body_size > (uint)_local_loop_unroll_limit)) {
+      if (future_unroll_ct >= _local_loop_unroll_factor) {
         policy_unroll_slp_analysis(cl, phase, future_unroll_ct);
       }
     }
   }
 
+  int slp_max_unroll_factor = cl->slp_max_unroll();
+  if (cl->has_passed_slp()) {
+    if (slp_max_unroll_factor >= future_unroll_ct) return true;
+    // Normal case: loop too big
+    return false;
+  }
+
   // Check for being too big
   if (body_size > (uint)_local_loop_unroll_limit) {
     if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
     // Normal case: loop too big
     return false;
   }
 
+  if(cl->do_unroll_only()) {
+    NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("policy_unroll passed vector loop(vlen=%d,factor = %d)\n", slp_max_unroll_factor, future_unroll_ct));
+  }
+
   // Unroll once!  (Each trip will soon do double iterations)
   return true;
 }
 
 void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct) {
   // Enable this functionality target by target as needed
   if (SuperWordLoopUnrollAnalysis) {
-    if (!cl->has_passed_slp()) {
+    if (!cl->was_slp_analyzed()) {
       SuperWord sw(phase);
       sw.transform_loop(this, false);
 
       // If the loop is slp canonical analyze it
       if (sw.early_return() == false) {
-        sw.unrolling_analysis(cl, _local_loop_unroll_factor);
+        sw.unrolling_analysis(_local_loop_unroll_factor);
       }
     }
 
+    if (cl->has_passed_slp()) {
     int slp_max_unroll_factor = cl->slp_max_unroll();
-    if ((slp_max_unroll_factor > 4) &&
-        (slp_max_unroll_factor >= future_unroll_ct)) {
+      if (slp_max_unroll_factor >= future_unroll_ct) {
       int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor;
       if (new_limit > LoopUnrollLimit) {
-#ifndef PRODUCT
-        if (TraceSuperWordLoopUnrollAnalysis) {
-          tty->print_cr("slp analysis is applying unroll limit  %d, the original limit was %d\n",
-            new_limit, _local_loop_unroll_limit);
-        }
-#endif
+          NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit));
         _local_loop_unroll_limit = new_limit;
       }
     }
   }
+  }
 }
 
 //------------------------------policy_align-----------------------------------
 // Return TRUE or FALSE if the loop should be cache-line aligned.  Gather the
 // expression that does the alignment.  Note that only one array base can be

@@ -828,10 +843,13 @@
   // changed our minds, we got no pre-loop.  Either we need to
   // make a new pre-loop, or we gotta disallow RCE.
   if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
   Node *trip_counter = cl->phi();
 
+  // check for vectorized loops, some opts are no longer needed
+  if (cl->do_unroll_only()) return false;
+
   // Check loop body for tests of trip-counter plus loop-invariant vs
   // loop-invariant.
   for (uint i = 0; i < _body.size(); i++) {
     Node *iff = _body[i];
     if (iff->Opcode() == Op_If) { // Test?

@@ -878,10 +896,12 @@
 
 //------------------------------policy_peel_only-------------------------------
 // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned.  Useful
 // for unrolling loops with NO array accesses.
 bool IdealLoopTree::policy_peel_only( PhaseIdealLoop *phase ) const {
+  // check for vectorized loops, any peeling done was already applied
+  if (_head->is_CountedLoop() && _head->as_CountedLoop()->do_unroll_only()) return false;
 
   for( uint i = 0; i < _body.size(); i++ )
     if( _body[i]->is_Mem() )
       return false;
 
< prev index next >