< prev index next >

src/share/vm/opto/superword.cpp

Print this page
rev 8471 : SIMD: fixing bug in alignment - invariant and scale. Also A LOT of tracing.
rev 8472 : SIMD: fixing bug in alignment - invariant and scale. Also A LOT of tracing + fixing long lines.

@@ -71,13 +71,18 @@
   _num_work_vecs(0),                      // amount of vector work we have
   _num_reductions(0),                     // amount of reduction work we have
   _do_vector_loop(phase->C->do_vector_loop()),  // whether to do vectorization/simd style
   _ii_first(-1),                          // first loop generation index - only if do_vector_loop()
   _ii_last(-1),                           // last loop generation index - only if do_vector_loop()
-  _ii_order(arena(), 8, 0, 0),
-  _vector_loop_debug(phase->C->has_method() && phase->C->method_has_option("VectorizeDebug"))
-{}
+  _ii_order(arena(), 8, 0, 0)
+{
+#ifndef PRODUCT
+  if (_phase->C->method() != NULL) {
+    _phase->C->method()->has_option_value("VectorizeDebug", _vector_loop_debug);
+  }
+#endif
+}
 
 //------------------------------transform_loop---------------------------
 void SuperWord::transform_loop(IdealLoopTree* lpt) {
   assert(UseSuperWord, "should be");
   // Do vectors exist on this architecture?

@@ -87,14 +92,40 @@
   CountedLoopNode *cl = lpt->_head->as_CountedLoop();
 
   if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
 
   if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
-
+  #ifndef PRODUCT
+    if (_do_vector_loop && is_debug()) {
+      tty->print_cr("SuperWord::transform_loop: lpt->_head->_idx %d", lpt->_head->_idx);
+      Node_Stack stack(_arena, _phase->C->unique() >> 2);
+      Node_List rpo_list;
+      VectorSet visited(_arena);
+      visited.set(lpt->_head->_idx);
+      _phase->rpo(lpt->_head, stack, visited, rpo_list);
+      _phase->dump(lpt, rpo_list.size(), rpo_list );
+      if(is_trace_loop()) {
+        tty->print_cr("\nSuperWord::transform_loop: whole loop tree");
+        _phase->dump();
+        tty->print_cr("SuperWord::transform_loop: end of whole loop tree\n");
+      }  
+    }
+  #endif
   // Check for no control flow in body (other than exit)
   Node *cl_exit = cl->loopexit();
-  if (cl_exit->in(0) != lpt->_head) return;
+  if (cl_exit->in(0) != lpt->_head) {
+#ifndef PRODUCT
+    if (TraceSuperWord) {
+      tty->print_cr("SuperWord::transform_loop: loop too complicated, cl_exit->in(0) != lpt->_head");
+      tty->print("cl_exit %d", cl_exit->_idx); cl_exit->dump();
+      tty->print("cl_exit->in(0) %d", cl_exit->in(0)->_idx); cl_exit->in(0)->dump();
+      tty->print("lpt->_head %d", lpt->_head->_idx); lpt->_head->dump();
+      lpt->dump_head();
+    }
+#endif
+        return;
+  }
 
   // Make sure the are no extra control users of the loop backedge
   if (cl->back_control()->outcnt() != 1) {
     return;
   }

@@ -264,17 +295,25 @@
       // Set memory reference which is the best from all memory operations
       // to be used for alignment. The pre-loop trip count is modified to align
       // this reference to a vector-aligned address.
       best_align_to_mem_ref = mem_ref;
       best_iv_adjustment = iv_adjustment;
+      #ifndef PRODUCT
+        if(is_trace_adjacent()) {
+          tty->print("SuperWord::find_adjacent_refs best_align_to_mem_ref = %d, best_iv_adjustment = %d",
+            best_align_to_mem_ref->_idx, best_iv_adjustment); 
+          best_align_to_mem_ref->dump();
+        }
+      #endif
     }
 
     SWPointer align_to_ref_p(mem_ref, this);
     // Set alignment relative to "align_to_ref" for all related memory operations.
     for (int i = memops.size() - 1; i >= 0; i--) {
       MemNode* s = memops.at(i)->as_Mem();
-      if (isomorphic(s, mem_ref)) {
+      if (isomorphic(s, mem_ref) &&
+           (!_do_vector_loop || same_origin_idx(s, mem_ref))) {
         SWPointer p2(s, this);
         if (p2.comparable(align_to_ref_p)) {
           int align = memory_alignment(s, iv_adjustment);
           set_alignment(s, align);
         }

@@ -335,11 +374,11 @@
           if (s1 != s2 && are_adjacent_refs(s1, s2)) {
             if (stmts_can_pack(s1, s2, align)) {
               Node_List* pair = new Node_List();
               pair->push(s1);
               pair->push(s2);
-              if (!_do_vector_loop || _clone_map.idx(s1->_idx) == _clone_map.idx(s2->_idx)) {
+              if (!_do_vector_loop || same_origin_idx(s1, s2)) {
                 _packset.append(pair);
               }
             }
           }
         }

@@ -372,12 +411,26 @@
           MemNode* s = p->at(0)->as_Mem();
           assert(!same_velt_type(s, mem_ref), "sanity");
           memops.push(s);
         }
         MemNode* best_align_to_mem_ref = find_align_to_ref(memops);
-        if (best_align_to_mem_ref == NULL) break;
+        if (best_align_to_mem_ref == NULL) {
+          #ifndef PRODUCT
+            if (TraceSuperWord) {
+              tty->print_cr("SuperWord::find_adjacent_refs(): best_align_to_mem_ref == NULL");
+            }
+          #endif
+          break;
+        }
         best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref);
+        #ifndef PRODUCT
+          if(is_trace_adjacent()) {
+            tty->print("SuperWord::find_adjacent_refs best_align_to_mem_ref = %d, best_iv_adjustment = %d",
+              best_align_to_mem_ref->_idx, best_iv_adjustment);
+            best_align_to_mem_ref->dump();
+          }
+        #endif
         // Restore list.
         while (memops.size() > orig_msize)
           (void)memops.pop();
       }
     } // unaligned memory accesses

@@ -595,13 +648,15 @@
     // This memory op is not dependent on iv (scale == 0)
     iv_adjustment = 0;
   }
 
 #ifndef PRODUCT
-  if (TraceSuperWord)
-    tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
-                  offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
+  if (TraceSuperWord) {
+    tty->print("SuperWord::get_iv_adjustment: n = %d, noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d: ",
+      mem_ref->_idx, offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
+    mem_ref->dump();
+  }
 #endif
   return iv_adjustment;
 }
 
 //---------------------------dependence_graph---------------------------

@@ -702,16 +757,26 @@
 void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) {
   assert(preds.length() == 0, "start empty");
   Node* n = start;
   Node* prev = NULL;
   while (true) {
+    #ifndef PRODUCT
+      if(is_trace_mem_slice()) {
+        tty->print_cr("SuperWord::mem_slice_preds: n %d", n->_idx);
+      }
+    #endif
     assert(in_bb(n), "must be in block");
     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
       Node* out = n->fast_out(i);
       if (out->is_Load()) {
         if (in_bb(out)) {
           preds.push(out);
+        #ifndef PRODUCT
+          if (TraceSuperWord && Verbose) {
+            tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", out->_idx);
+          }
+        #endif
         }
       } else {
         // FIXME
         if (out->is_MergeMem() && !in_bb(out)) {
           // Either unrolling is causing a memory edge not to disappear,

@@ -726,10 +791,13 @@
         }
       }
     }
     if (n == stop) break;
     preds.push(n);
+    #ifndef PRODUCT
+      if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx);
+    #endif
     prev = n;
     assert(n->is_Mem(), err_msg_res("unexpected node %s", n->Name()));
     n = n->in(MemNode::Memory);
   }
 }

@@ -1964,22 +2032,42 @@
         assert(rpo_idx >= 0 || _stk.is_empty(), "");
       }
     } else {
       _stk.pop(); // Remove post-visited node from stack
     }
-  }
+  }//while
 
+  int ii_current = -1;
+  unsigned int load_idx = -1;
+  _ii_order.clear();
   // Create real map of block indices for nodes
   for (int j = 0; j < _block.length(); j++) {
     Node* n = _block.at(j);
     set_bb_idx(n, j);
+    if (_do_vector_loop && n->is_Load()) {
+      if (ii_current == -1) {
+        ii_current = _clone_map.gen(n->_idx);
+        _ii_order.push(ii_current);
+        load_idx = _clone_map.idx(n->_idx);
+      } else if (_clone_map.idx(n->_idx) == load_idx && _clone_map.gen(n->_idx) != ii_current) {
+        ii_current = _clone_map.gen(n->_idx);
+        _ii_order.push(ii_current);
   }
+    }
+  }//for
 
   // Ensure extra info is allocated.
   initialize_bb();
 
 #ifndef PRODUCT
+  if (_vector_loop_debug && _ii_order.length() > 0) {
+    tty->print("SuperWord::construct_bb: List of generations: ");
+    for (int jj = 0; jj < _ii_order.length(); ++jj) {
+      tty->print("  %d:%d", jj, _ii_order.at(jj));
+    }
+    tty->print_cr(" ");
+  }
   if (TraceSuperWord) {
     print_bb();
     tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE");
     for (int m = 0; m < _data_entry.length(); m++) {
       tty->print("%3d ", m);

@@ -2136,22 +2224,42 @@
 }
 
 //------------------------------memory_alignment---------------------------
 // Alignment within a vector memory reference
 int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
+  #ifndef PRODUCT
+    if(TraceSuperWord && Verbose) {
+      tty->print("SuperWord::memory_alignment within a vector memory reference for %d:  ", s->_idx); s->dump();
+    }
+  #endif
   SWPointer p(s, this);
   if (!p.valid()) {
+    #ifndef PRODUCT
+      if(is_trace_alignment()) {
+        tty->print("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align"); p.print();
+      }
+    #endif
     return bottom_align;
   }
   int vw = vector_width_in_bytes(s);
   if (vw < 2) {
+    #ifndef PRODUCT
+      if(is_trace_alignment()) {
+        tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");
+      }
+    #endif
     return bottom_align; // No vectors for this type
   }
   int offset  = p.offset_in_bytes();
   offset     += iv_adjust*p.memory_size();
   int off_rem = offset % vw;
   int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
+  #ifndef PRODUCT
+    if(TraceSuperWord && Verbose) {
+      tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod);
+    }
+  #endif
   return off_mod;
 }
 
 //---------------------------container_type---------------------------
 // Smallest type containing range of values

@@ -2555,16 +2663,24 @@
   return blanks;
 }
 
 
 //==============================SWPointer===========================
-
+#ifndef PRODUCT
+int SWPointer::_depth = 0;
+#endif
 //----------------------------SWPointer------------------------
 SWPointer::SWPointer(MemNode* mem, SuperWord* slp) :
   _mem(mem), _slp(slp),  _base(NULL),  _adr(NULL),
   _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {
 
+  #ifndef PRODUCT
+    if(_slp->is_trace_alignment()) {
+      print_depth(); tty->print(" %d SWPointer::SWPointer: ctor: ", mem->_idx); mem->dump();
+    }
+  #endif
+
   Node* adr = mem->in(MemNode::Address);
   if (!adr->is_AddP()) {
     assert(!valid(), "too complex");
     return;
   }

@@ -2578,20 +2694,53 @@
   //unsafe reference could not be aligned appropriately without runtime checking
   if (base == NULL || base->bottom_type() == Type::TOP) {
     assert(!valid(), "unsafe access");
     return;
   }
-  for (int i = 0; i < 3; i++) {
+  
+  #ifndef PRODUCT
+    int idepth = depth();
+    if(_slp->is_trace_alignment()) {
+      inc_depth();
+      print_depth(); tty->print(" %d (adr) SWPointer::SWPointer: ", adr->_idx); adr->dump();
+      print_depth(); tty->print("   %d (base) SWPointer::SWPointer: ", adr->in(AddPNode::Base)->_idx); adr->in(AddPNode::Base)->dump();
+    }
+  #endif
+  
+  int i;
+  for (i = 0; i < 3; i++) {
+    #ifndef PRODUCT
+      if(_slp->is_trace_alignment()) {
+        Node* offset = adr->in(AddPNode::Offset);
+        print_depth(); tty->print(" %d (offset) SWPointer::SWPointer: i = %d: ", offset->_idx, i); offset->dump();
+      }
+    #endif
     if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) {
       assert(!valid(), "too complex");
       return;
     }
     adr = adr->in(AddPNode::Address);
+    #ifndef PRODUCT
+      if(_slp->is_trace_alignment()) {
+        inc_depth();
+        print_depth();
+        tty->print(" %d (adr) SWPointer::SWPointer: i = %d: ", adr->_idx, i);
+        adr->dump();
+      }
+    #endif
     if (base == adr || !adr->is_AddP()) {
       break; // stop looking at addp's
     }
   }
+  #ifndef PRODUCT
+    if(_slp->is_trace_alignment()) {
+      set_depth(idepth);
+      print_depth();
+      tty->print(" %d (adr) SWPointer::SWPointer: stop address analysis: ", adr->_idx);
+      adr->dump();
+    }
+  #endif
   _base = base;
   _adr  = adr;
   assert(valid(), "Usable");
 }
 

@@ -2599,139 +2748,340 @@
 // the pattern match of an address expression.
 SWPointer::SWPointer(SWPointer* p) :
   _mem(p->_mem), _slp(p->_slp),  _base(NULL),  _adr(NULL),
   _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {}
 
+
+bool SWPointer::invariant(Node* n) {
+    NOT_PRODUCT(Depth dd;)
+    Node *n_c = phase()->get_ctrl(n);
+    #ifndef PRODUCT
+      if (_slp->do_vector_loop() && _slp->is_debug() &&
+           !lpt()->is_member(phase()->get_loop(n_c)) != !_slp->in_bb(n)) {
+        print_depth(); tty->print(" %d SWPointer::invariant  conditions differ: n_c %d", n->_idx, n_c->_idx);
+        n->dump();
+        n_c->dump();
+      }
+    #endif
+    return !lpt()->is_member(phase()->get_loop(n_c));
+}
 //------------------------scaled_iv_plus_offset--------------------
 // Match: k*iv + offset
 // where: k is a constant that maybe zero, and
 //        offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional
 bool SWPointer::scaled_iv_plus_offset(Node* n) {
+  #ifndef PRODUCT
+    Depth ddd;
+    if(_slp->is_trace_alignment()) {
+      print_depth(); tty->print(" %d SWPointer::scaled_iv_plus_offset testing node: ", n->_idx);
+      n->dump();
+    }
+  #endif
+
   if (scaled_iv(n)) {
+    #ifndef PRODUCT
+      if(_slp->is_trace_alignment()) {
+        print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: is scaled_iv_plus_offset", n->_idx);
+      }
+    #endif
     return true;
   }
+
   if (offset_plus_k(n)) {
+    #ifndef PRODUCT
+      if(_slp->is_trace_alignment()) {
+        print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: is scaled_iv_plus_offset", n->_idx);
+      }
+    #endif
     return true;
   }
+
   int opc = n->Opcode();
   if (opc == Op_AddI) {
     if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) {
+      #ifndef PRODUCT
+        if(_slp->is_trace_alignment()) {
+          print_depth(); tty->print(" %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump();
+          print_depth(); tty->print(" %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump();
+          print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI is scaled_iv_plus_offset", n->_idx);
+        }
+      #endif
       return true;
     }
     if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
+      #ifndef PRODUCT
+        if(_slp->is_trace_alignment()) {
+          print_depth(); tty->print(" %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump();
+          print_depth(); tty->print(" %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump();
+          print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI is scaled_iv_plus_offset", n->_idx);
+        }
+      #endif
       return true;
     }
   } else if (opc == Op_SubI) {
     if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2), true)) {
+    #ifndef PRODUCT
+      if(_slp->is_trace_alignment()) {
+        print_depth(); tty->print(" %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump();
+        print_depth(); tty->print(" %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump();
+        print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_SubI is scaled_iv_plus_offset", n->_idx);
+      }
+    #endif
       return true;
     }
     if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
       _scale *= -1;
+      #ifndef PRODUCT
+        if(_slp->is_trace_alignment()) {
+          print_depth(); tty->print(" %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump();
+          print_depth(); tty->print(" %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump();
+          print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_SubI is scaled_iv_plus_offset", n->_idx);
+        }
+      #endif
       return true;
     }
   }
+
+  #ifndef PRODUCT
+    if(_slp->is_trace_alignment()) {
+      print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: is NOT scaled_iv_plus_offset", n->_idx);
+    }
+  #endif
   return false;
 }
 
 //----------------------------scaled_iv------------------------
 // Match: k*iv where k is a constant that's not zero
 bool SWPointer::scaled_iv(Node* n) {
+  #ifndef PRODUCT
+    Depth ddd;
+    if(_slp->is_trace_alignment()) {
+      print_depth(); tty->print(" %d SWPointer::scaled_iv: testing node: ", n->_idx); n->dump();
+    }
+  #endif
+
   if (_scale != 0) {
-    return false;  // already found a scale
+    #ifndef PRODUCT
+      if(_slp->is_trace_alignment()) {
+        print_depth(); tty->print_cr("SWPointer::scaled_iv: _scale (%d) != 0", _scale);
+        print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: is NOT scaled_iv", n->_idx);
+      }
+    #endif
+    return _slp->do_vector_loop()? true: false;  // already found a scale
   }
+
   if (n == iv()) {
     _scale = 1;
+    #ifndef PRODUCT
+      if(_slp->is_trace_alignment()) {
+        print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: is iv, setting _scale = %d", n->_idx, _scale);
+      }
+    #endif
     return true;
   }
+
   int opc = n->Opcode();
   if (opc == Op_MulI) {
     if (n->in(1) == iv() && n->in(2)->is_Con()) {
       _scale = n->in(2)->get_int();
+      #ifndef PRODUCT
+        if(_slp->is_trace_alignment()) {
+          print_depth(); tty->print(" %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump();
+          print_depth(); tty->print(" %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump();
+          print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI is scaled_iv, setting _scale = %d", n->_idx, _scale);
+        }
+      #endif
       return true;
     } else if (n->in(2) == iv() && n->in(1)->is_Con()) {
       _scale = n->in(1)->get_int();
+      #ifndef PRODUCT
+        if(_slp->is_trace_alignment()) {
+          print_depth(); tty->print(" %d SWPointer::scaled_iv: in(2) is iv: ", n->in(2)->_idx); n->in(2)->dump();
+          print_depth(); tty->print(" %d SWPointer::scaled_iv: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump();
+          print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI is scaled_iv, setting _scale = %d", n->_idx, _scale);
+        }
+      #endif
       return true;
     }
   } else if (opc == Op_LShiftI) {
     if (n->in(1) == iv() && n->in(2)->is_Con()) {
       _scale = 1 << n->in(2)->get_int();
+      #ifndef PRODUCT
+        if(_slp->is_trace_alignment()) {
+          print_depth(); tty->print(" %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump();
+          print_depth(); tty->print(" %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump();
+          print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftI is scaled_iv, setting _scale = %d", n->_idx, _scale);
+        }
+      #endif
       return true;
     }
   } else if (opc == Op_ConvI2L) {
     if (scaled_iv_plus_offset(n->in(1))) {
+      #ifndef PRODUCT
+        if(_slp->is_trace_alignment()) {
+          print_depth(); tty->print(" %d SWPointer::scaled_iv: in(1) is scaled_iv_plus_offset: ", n->in(1)->_idx); n->in(1)->dump();
+          print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_ConvI2L is scaled_iv", n->_idx);
+        }
+      #endif
       return true;
     }
   } else if (opc == Op_LShiftL) {
     if (!has_iv() && _invar == NULL) {
       // Need to preserve the current _offset value, so
       // create a temporary object for this expression subtree.
       // Hacky, so should re-engineer the address pattern match.
+      NOT_PRODUCT(Depth dddd;)
       SWPointer tmp(this);
+      #ifndef PRODUCT
+        if(_slp->is_trace_alignment()) {
+          print_depth(); tty->print(" %d SWPointer::scaled_iv: Op_LShiftL, creating tmp SWPointer: ", n->_idx); tmp.print();
+        }
+      #endif
       if (tmp.scaled_iv_plus_offset(n->in(1))) {
-        if (tmp._invar == NULL) {
+        if (tmp._invar == NULL || _slp->do_vector_loop()) { //I do not know, why tmp._invar == NULL was here at first hand
           int mult = 1 << n->in(2)->get_int();
           _scale   = tmp._scale  * mult;
           _offset += tmp._offset * mult;
+          #ifndef PRODUCT
+            if(_slp->is_trace_alignment()) {
+                print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftL is scaled_iv, setting _scale = %d, _offset = %d", n->_idx, _scale, _offset);
+            }
+          #endif
           return true;
         }
       }
     }
   }
+  #ifndef PRODUCT
+    if(_slp->is_trace_alignment()) {
+      print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: is NOT scaled_iv", n->_idx);
+    }
+  #endif
   return false;
 }
 
 //----------------------------offset_plus_k------------------------
 // Match: offset is (k [+/- invariant])
 // where k maybe zero and invariant is optional, but not both.
 bool SWPointer::offset_plus_k(Node* n, bool negate) {
+  #ifndef PRODUCT
+    Depth ddd;
+    if(_slp->is_trace_alignment()) {
+      print_depth(); tty->print(" %d SWPointer::offset_plus_k: testing node: ", n->_idx); n->dump();
+    }
+  #endif
   int opc = n->Opcode();
   if (opc == Op_ConI) {
     _offset += negate ? -(n->get_int()) : n->get_int();
+    #ifndef PRODUCT
+      if(_slp->is_trace_alignment()) {
+        print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConI is offset_plus_k, setting _offset = %d", n->_idx, _offset);
+      }
+    #endif
     return true;
   } else if (opc == Op_ConL) {
     // Okay if value fits into an int
     const TypeLong* t = n->find_long_type();
     if (t->higher_equal(TypeLong::INT)) {
       jlong loff = n->get_long();
       jint  off  = (jint)loff;
       _offset += negate ? -off : loff;
+      #ifndef PRODUCT
+        if(_slp->is_trace_alignment()) {
+          print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConL is offset_plus_k, setting _offset = %d", n->_idx, _offset);
+        }
+      #endif
       return true;
     }
+    #ifndef PRODUCT
+      if(_slp->is_trace_alignment()) {
+        print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConL is NOT offset_plus_k, k is too big", n->_idx);
+        print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: is NOT offset_plus_k", n->_idx);
+      }
+    #endif
     return false;
   }
-  if (_invar != NULL) return false; // already have an invariant
+  if (_invar != NULL) {
+    #ifndef PRODUCT
+      if(_slp->is_trace_alignment()) { 
+        print_depth(); tty->print(" %d SWPointer::offset_plus_k: _invar != NULL: ", _invar->_idx); _invar->dump();
+        print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: is NOT offset_plus_k", n->_idx);
+      }
+    #endif
+    return _slp->do_vector_loop()? true: false; // already have an invariant
+  }
   if (opc == Op_AddI) {
     if (n->in(2)->is_Con() && invariant(n->in(1))) {
       _negate_invar = negate;
       _invar = n->in(1);
       _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
+      #ifndef PRODUCT
+        if(_slp->is_trace_alignment()) {
+          print_depth(); tty->print(" %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump();
+          print_depth(); tty->print(" %d SWPointer::offset_plus_k: in(1) is invariant: ", n->in(1)->_idx); n->in(1)->dump();
+          print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI is offset_plus_k, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset);
+        }
+      #endif
       return true;
     } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
       _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
       _negate_invar = negate;
       _invar = n->in(2);
+      #ifndef PRODUCT
+        if(_slp->is_trace_alignment()) {
+          print_depth(); tty->print(" %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump();
+          print_depth(); tty->print(" %d SWPointer::offset_plus_k: in(2) is invariant: ", n->in(2)->_idx); n->in(2)->dump();
+          print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI is offset_plus_k, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset);
+        }
+      #endif
       return true;
     }
   }
   if (opc == Op_SubI) {
     if (n->in(2)->is_Con() && invariant(n->in(1))) {
       _negate_invar = negate;
       _invar = n->in(1);
       _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
+      #ifndef PRODUCT
+        if(_slp->is_trace_alignment()) {
+          print_depth(); tty->print(" %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump();
+          print_depth(); tty->print(" %d SWPointer::offset_plus_k: in(1) is invariant: ", n->in(1)->_idx); n->in(1)->dump();
+          print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI is offset_plus_k, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset);
+        }
+      #endif
       return true;
     } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
       _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
       _negate_invar = !negate;
       _invar = n->in(2);
+      #ifndef PRODUCT
+        if(_slp->is_trace_alignment()) {
+          print_depth(); tty->print(" %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump();
+          print_depth(); tty->print(" %d SWPointer::offset_plus_k: in(2) is invariant: ", n->in(2)->_idx); n->in(2)->dump();
+          print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI is offset_plus_k, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset);
+        }
+      #endif
       return true;
     }
   }
   if (invariant(n)) {
     _negate_invar = negate;
     _invar = n;
+    #ifndef PRODUCT
+      if(_slp->is_trace_alignment()) {
+        print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: n is invariant", n->_idx);
+        print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: n is offset_plus_k, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset);
+      }
+    #endif
     return true;
   }
+  
+  #ifndef PRODUCT
+    if(_slp->is_trace_alignment()) {
+      print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: is NOT offset_plus_k", n->_idx);
+    }
+  #endif
   return false;
 }
 
 //----------------------------print------------------------
 void SWPointer::print() {

@@ -2743,10 +3093,16 @@
              _negate_invar?'-':'+',
              _invar != NULL ? _invar->_idx : 0);
 #endif
 }
 
+#ifndef PRODUCT
+void SWPointer::print_depth() {
+  for (int ii = 0; ii<_depth; ++ii) tty->print("  ");
+}  
+#endif
+
 // ========================= OrderedPair =====================
 
 const OrderedPair OrderedPair::initial;
 
 // ========================= SWNodeInfo =====================

@@ -2889,10 +3245,17 @@
 }
 
 //
 // --------------------------------- vectorization/simd -----------------------------------
 //
+bool SuperWord::same_origin_idx(Node* a, Node* b) const {
+  return a != NULL && b != NULL && _clone_map.same_idx(a->_idx, b->_idx);
+}
+bool SuperWord::same_generation(Node* a, Node* b) const {
+  return a != NULL && b != NULL && _clone_map.same_gen(a->_idx, b->_idx);
+}
+
 Node*  SuperWord::find_phi_for_mem_dep(LoadNode* ld) {
   assert(in_bb(ld), "must be in block");
   if (_clone_map.gen(ld->_idx) == _ii_first) {
 #ifndef PRODUCT
     if (_vector_loop_debug) {

@@ -2914,11 +3277,11 @@
       mem->dump();
     }
 #endif
     return NULL;
   }
-  if (!in_bb(mem) || _clone_map.gen(mem->_idx) == _clone_map.gen(ld->_idx)) {
+  if (!in_bb(mem) || same_generation(mem, ld)) {
 #ifndef PRODUCT
     if (_vector_loop_debug) {
       tty->print_cr("SuperWord::find_phi_for_mem_dep _clone_map.gen(mem->_idx)=%d",
                     _clone_map.gen(mem->_idx));
     }

@@ -2964,11 +3327,11 @@
 }
 
 Node* SuperWord::first_node(Node* nd) {
   for (int ii = 0; ii < _iteration_first.length(); ii++) {
     Node* nnn = _iteration_first.at(ii);
-    if (_clone_map.idx(nnn->_idx) == _clone_map.idx(nd->_idx)) {
+    if (same_origin_idx(nnn, nd)) {
 #ifndef PRODUCT
       if (_vector_loop_debug) {
         tty->print_cr("SuperWord::first_node: %d is the first iteration node for %d (_clone_map.idx(nnn->_idx) = %d)",
                       nnn->_idx, nd->_idx, _clone_map.idx(nnn->_idx));
       }

@@ -2987,11 +3350,11 @@
 }
 
 Node* SuperWord::last_node(Node* nd) {
   for (int ii = 0; ii < _iteration_last.length(); ii++) {
     Node* nnn = _iteration_last.at(ii);
-    if (_clone_map.idx(nnn->_idx) == _clone_map.idx(nd->_idx)) {
+    if (same_origin_idx(nnn, nd)) {
 #ifndef PRODUCT
       if (_vector_loop_debug) {
         tty->print_cr("SuperWord::last_node _clone_map.idx(nnn->_idx)=%d, _clone_map.idx(nd->_idx)=%d",
                       _clone_map.idx(nnn->_idx), _clone_map.idx(nd->_idx));
       }

@@ -3032,14 +3395,16 @@
           ii_err = ii;
           _ii_first = _clone_map.gen(ii->_idx);
         } else if (_ii_first != _clone_map.gen(ii->_idx)) {
 #ifndef PRODUCT
           if (TraceSuperWord && Verbose) {
-            tty->print_cr("SuperWord::mark_generations _ii_first error - found different generations in two nodes ");
+            tty->print_cr("SuperWord::mark_generations: _ii_first was found before and not equal to one in this node (%d)", _ii_first);
             ii->dump();
+            if (ii_err!= 0) {
             ii_err->dump();
           }
+          }
 #endif
           return -1; // this phi has Stores from different generations of unroll and cannot be simd/vectorized
         }
       }
     }//for (DUIterator_Fast imax,

@@ -3065,20 +3430,19 @@
       _iteration_last.push(n);
     }
   }
 
   // building order of iterations
-  assert(_ii_order.length() == 0, "should be empty");
-  if (ii_err != 0) {
+  if (_ii_order.length() == 0 && ii_err != 0) {
     assert(in_bb(ii_err) && ii_err->is_Store(), "should be Store in bb");
     Node* nd = ii_err;
     while(_clone_map.gen(nd->_idx) != _ii_last) {
       _ii_order.push(_clone_map.gen(nd->_idx));
       bool found = false;
       for (DUIterator_Fast imax, i = nd->fast_outs(imax); i < imax; i++) {
         Node* use = nd->fast_out(i);
-        if (_clone_map.idx(use->_idx) == _clone_map.idx(nd->_idx) && use->as_Store()->in(MemNode::Memory) == nd) {
+        if (same_origin_idx(use, nd) && use->as_Store()->in(MemNode::Memory) == nd) {
           found = true;
           nd = use;
           break;
         }
       }//for

@@ -3116,24 +3480,24 @@
   return _ii_first;
 }
 
 bool SuperWord::fix_commutative_inputs(Node* gold, Node* fix) {
   assert(gold->is_Add() && fix->is_Add() || gold->is_Mul() && fix->is_Mul(), "should be only Add or Mul nodes");
-  assert(_clone_map.idx(gold->_idx) == _clone_map.idx(fix->_idx), "should be clones of the same node");
+  assert(same_origin_idx(gold, fix), "should be clones of the same node");
   Node* gin1 = gold->in(1);
   Node* gin2 = gold->in(2);
   Node* fin1 = fix->in(1);
   Node* fin2 = fix->in(2);
   bool swapped = false;
 
   if (in_bb(gin1) && in_bb(gin2) && in_bb(fin1) && in_bb(fin1)) {
-    if (_clone_map.idx(gin1->_idx) == _clone_map.idx(fin1->_idx) &&
-        _clone_map.idx(gin2->_idx) == _clone_map.idx(fin2->_idx)) {
+    if (same_origin_idx(gin1, fin1) &&
+        same_origin_idx(gin2, fin2)) {
       return true; // nothing to fix
     }
-    if (_clone_map.idx(gin1->_idx) == _clone_map.idx(fin2->_idx) &&
-        _clone_map.idx(gin2->_idx) == _clone_map.idx(fin1->_idx)) {
+    if (same_origin_idx(gin1, fin2) &&
+        same_origin_idx(gin2, fin1)) {
       fix->swap_edges(1, 2);
       swapped = true;
     }
   }
   // at least one input comes from outside of bb

@@ -3177,11 +3541,11 @@
       Node_List* pk = new Node_List();
       pk->push(nd);
       for (int gen = 1; gen < _ii_order.length(); ++gen) {
         for (int kk = 0; kk < _block.length(); kk++) {
           Node* clone = _block.at(kk);
-          if (_clone_map.idx(clone->_idx) == _clone_map.idx(nd->_idx) &&
+          if (same_origin_idx(clone, nd) &&
               _clone_map.gen(clone->_idx) == _ii_order.at(gen)) {
             if (nd->is_Add() || nd->is_Mul()) {
               fix_commutative_inputs(nd, clone);
             }
             pk->push(clone);

@@ -3242,13 +3606,12 @@
     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
       Node* ld = n->fast_out(i);
       if (ld->is_Load() && ld->as_Load()->in(MemNode::Memory) == n && in_bb(ld)) {
         for (int i = 0; i < _block.length(); i++) {
           Node* ld2 = _block.at(i);
-          if (ld2->is_Load() &&
-              _clone_map.idx(ld->_idx) == _clone_map.idx(ld2->_idx) &&
-              _clone_map.gen(ld->_idx) != _clone_map.gen(ld2->_idx)) { // <= do not collect the first generation ld
+          if (ld2->is_Load() && same_origin_idx(ld, ld2) &&
+              !same_generation(ld, ld2)) { // <= do not collect the first generation ld
 #ifndef PRODUCT
             if (_vector_loop_debug) {
               tty->print_cr("SuperWord::hoist_loads_in_graph: will try to hoist load ld2->_idx=%d, cloned from %d (ld->_idx=%d)",
                             ld2->_idx, _clone_map.idx(ld->_idx), ld->_idx);
             }
< prev index next >