< prev index next >

src/share/vm/opto/superword.cpp

Print this page




  49   _arena(phase->C->comp_arena()),
  50   _packset(arena(), 8,  0, NULL),         // packs for the current block
  51   _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
  52   _block(arena(), 8,  0, NULL),           // nodes in current block
  53   _data_entry(arena(), 8,  0, NULL),      // nodes with all inputs from outside
  54   _mem_slice_head(arena(), 8,  0, NULL),  // memory slice heads
  55   _mem_slice_tail(arena(), 8,  0, NULL),  // memory slice tails
  56   _node_info(arena(), 8,  0, SWNodeInfo::initial), // info needed per node
  57   _align_to_ref(NULL),                    // memory reference to align vectors to
  58   _disjoint_ptrs(arena(), 8,  0, OrderedPair::initial), // runtime disambiguated pointer pairs
  59   _dg(_arena),                            // dependence graph
  60   _visited(arena()),                      // visited node set
  61   _post_visited(arena()),                 // post visited node set
  62   _n_idx_list(arena(), 8),                // scratch list of (node,index) pairs
  63   _stk(arena(), 8, 0, NULL),              // scratch stack of nodes
  64   _nlist(arena(), 8, 0, NULL),            // scratch list of nodes
  65   _lpt(NULL),                             // loop tree node
  66   _lp(NULL),                              // LoopNode
  67   _bb(NULL),                              // basic block
  68   _iv(NULL),                              // induction var
  69   _race_possible(false)                   // cases where SDMU is true

  70 {}
  71 
  72 //------------------------------transform_loop---------------------------
  73 void SuperWord::transform_loop(IdealLoopTree* lpt) {
  74   assert(UseSuperWord, "should be");
  75   // Do vectors exist on this architecture?
  76   if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
  77 
  78   assert(lpt->_head->is_CountedLoop(), "must be");
  79   CountedLoopNode *cl = lpt->_head->as_CountedLoop();
  80 
  81   if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
  82 
  83   if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
  84 
  85   // Check for no control flow in body (other than exit)
  86   Node *cl_exit = cl->loopexit();
  87   if (cl_exit->in(0) != lpt->_head) return;
  88 
  89   // Make sure the are no extra control users of the loop backedge
  90   if (cl->back_control()->outcnt() != 1) {
  91     return;
  92   }
  93 
  94   // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
  95   CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
  96   if (pre_end == NULL) return;
  97   Node *pre_opaq1 = pre_end->limit();
  98   if (pre_opaq1->Opcode() != Op_Opaque1) return;
  99 
 100   init(); // initialize data structures
 101 
 102   set_lpt(lpt);
 103   set_lp(cl);
 104 
 105   // For now, define one block which is the entire loop body
 106   set_bb(cl);
 107 

 108   assert(_packset.length() == 0, "packset must be empty");
 109   SLP_extract();

 110 }
 111 
 112 //------------------------------SLP_extract---------------------------
 113 // Extract the superword level parallelism
 114 //
 115 // 1) A reverse post-order of nodes in the block is constructed.  By scanning
 116 //    this list from first to last, all definitions are visited before their uses.
 117 //
 118 // 2) A point-to-point dependence graph is constructed between memory references.
 119 //    This simplies the upcoming "independence" checker.
 120 //
 121 // 3) The maximum depth in the node graph from the beginning of the block
 122 //    to each node is computed.  This is used to prune the graph search
 123 //    in the independence checker.
 124 //
 125 // 4) For integer types, the necessary bit width is propagated backwards
 126 //    from stores to allow packed operations on byte, char, and short
 127 //    integers.  This reverses the promotion to type "int" that javac
 128 //    did for operations like: char c1,c2,c3;  c1 = c2 + c3.
 129 //


 193 
 194   Node_List align_to_refs;
 195   int best_iv_adjustment = 0;
 196   MemNode* best_align_to_mem_ref = NULL;
 197 
 198   while (memops.size() != 0) {
 199     // Find a memory reference to align to.
 200     MemNode* mem_ref = find_align_to_ref(memops);
 201     if (mem_ref == NULL) break;
 202     align_to_refs.push(mem_ref);
 203     int iv_adjustment = get_iv_adjustment(mem_ref);
 204 
 205     if (best_align_to_mem_ref == NULL) {
 206       // Set memory reference which is the best from all memory operations
 207       // to be used for alignment. The pre-loop trip count is modified to align
 208       // this reference to a vector-aligned address.
 209       best_align_to_mem_ref = mem_ref;
 210       best_iv_adjustment = iv_adjustment;
 211     }
 212 
 213     SWPointer align_to_ref_p(mem_ref, this);
 214     // Set alignment relative to "align_to_ref" for all related memory operations.
 215     for (int i = memops.size() - 1; i >= 0; i--) {
 216       MemNode* s = memops.at(i)->as_Mem();
 217       if (isomorphic(s, mem_ref)) {
 218         SWPointer p2(s, this);
 219         if (p2.comparable(align_to_ref_p)) {
 220           int align = memory_alignment(s, iv_adjustment);
 221           set_alignment(s, align);
 222         }
 223       }
 224     }
 225 
 226     // Create initial pack pairs of memory operations for which
 227     // alignment is set and vectors will be aligned.
 228     bool create_pack = true;
 229     if (memory_alignment(mem_ref, best_iv_adjustment) == 0) {
 230       if (!Matcher::misaligned_vectors_ok()) {
 231         int vw = vector_width(mem_ref);
 232         int vw_best = vector_width(best_align_to_mem_ref);
 233         if (vw > vw_best) {
 234           // Do not vectorize a memory access with more elements per vector
 235           // if unaligned memory access is not allowed because number of
 236           // iterations in pre-loop will be not enough to align it.
 237           create_pack = false;
 238         }


 327   set_align_to_ref(best_align_to_mem_ref);
 328 
 329 #ifndef PRODUCT
 330   if (TraceSuperWord) {
 331     tty->print_cr("\nAfter find_adjacent_refs");
 332     print_packset();
 333   }
 334 #endif
 335 }
 336 
 337 //------------------------------find_align_to_ref---------------------------
 338 // Find a memory reference to align the loop induction variable to.
 339 // Looks first at stores then at loads, looking for a memory reference
 340 // with the largest number of references similar to it.
 341 MemNode* SuperWord::find_align_to_ref(Node_List &memops) {
 342   GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);
 343 
 344   // Count number of comparable memory ops
 345   for (uint i = 0; i < memops.size(); i++) {
 346     MemNode* s1 = memops.at(i)->as_Mem();
 347     SWPointer p1(s1, this);
 348     // Discard if pre loop can't align this reference
 349     if (!ref_is_alignable(p1)) {
 350       *cmp_ct.adr_at(i) = 0;
 351       continue;
 352     }
 353     for (uint j = i+1; j < memops.size(); j++) {
 354       MemNode* s2 = memops.at(j)->as_Mem();
 355       if (isomorphic(s1, s2)) {
 356         SWPointer p2(s2, this);
 357         if (p1.comparable(p2)) {
 358           (*cmp_ct.adr_at(i))++;
 359           (*cmp_ct.adr_at(j))++;
 360         }
 361       }
 362     }
 363   }
 364 
 365   // Find Store (or Load) with the greatest number of "comparable" references,
 366   // biggest vector size, smallest data size and smallest iv offset.
 367   int max_ct        = 0;
 368   int max_vw        = 0;
 369   int max_idx       = -1;
 370   int min_size      = max_jint;
 371   int min_iv_offset = max_jint;
 372   for (uint j = 0; j < memops.size(); j++) {
 373     MemNode* s = memops.at(j)->as_Mem();
 374     if (s->is_Store()) {
 375       int vw = vector_width_in_bytes(s);
 376       assert(vw > 1, "sanity");
 377       SWPointer p(s, this);
 378       if (cmp_ct.at(j) >  max_ct ||
 379           cmp_ct.at(j) == max_ct &&
 380             (vw >  max_vw ||
 381              vw == max_vw &&
 382               (data_size(s) <  min_size ||
 383                data_size(s) == min_size &&
 384                  (p.offset_in_bytes() < min_iv_offset)))) {
 385         max_ct = cmp_ct.at(j);
 386         max_vw = vw;
 387         max_idx = j;
 388         min_size = data_size(s);
 389         min_iv_offset = p.offset_in_bytes();
 390       }
 391     }
 392   }
 393   // If no stores, look at loads
 394   if (max_ct == 0) {
 395     for (uint j = 0; j < memops.size(); j++) {
 396       MemNode* s = memops.at(j)->as_Mem();
 397       if (s->is_Load()) {
 398         int vw = vector_width_in_bytes(s);
 399         assert(vw > 1, "sanity");
 400         SWPointer p(s, this);
 401         if (cmp_ct.at(j) >  max_ct ||
 402             cmp_ct.at(j) == max_ct &&
 403               (vw >  max_vw ||
 404                vw == max_vw &&
 405                 (data_size(s) <  min_size ||
 406                  data_size(s) == min_size &&
 407                    (p.offset_in_bytes() < min_iv_offset)))) {
 408           max_ct = cmp_ct.at(j);
 409           max_vw = vw;
 410           max_idx = j;
 411           min_size = data_size(s);
 412           min_iv_offset = p.offset_in_bytes();
 413         }
 414       }
 415     }
 416   }
 417 
 418 #ifdef ASSERT
 419   if (TraceSuperWord && Verbose) {
 420     tty->print_cr("\nVector memops after find_align_to_refs");


 465     if (init_nd->is_Con() && p.invar() == NULL) {
 466       int init = init_nd->bottom_type()->is_int()->get_con();
 467 
 468       int init_offset = init * p.scale_in_bytes() + offset;
 469       assert(init_offset >= 0, "positive offset from object start");
 470 
 471       if (span > 0) {
 472         return (vw - (init_offset % vw)) % span == 0;
 473       } else {
 474         assert(span < 0, "nonzero stride * scale");
 475         return (init_offset % vw) % -span == 0;
 476       }
 477     }
 478   }
 479   return false;
 480 }
 481 
 482 //---------------------------get_iv_adjustment---------------------------
 483 // Calculate loop's iv adjustment for this memory ops.
 484 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
 485   SWPointer align_to_ref_p(mem_ref, this);
 486   int offset = align_to_ref_p.offset_in_bytes();
 487   int scale  = align_to_ref_p.scale_in_bytes();
 488   int vw       = vector_width_in_bytes(mem_ref);
 489   assert(vw > 1, "sanity");
 490   int stride_sign   = (scale * iv_stride()) > 0 ? 1 : -1;
 491   // At least one iteration is executed in pre-loop by default. As result
 492   // several iterations are needed to align memory operations in main-loop even
 493   // if offset is 0.
 494   int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));
 495   int elt_size = align_to_ref_p.memory_size();
 496   assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0),
 497          err_msg_res("(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size));
 498   int iv_adjustment = iv_adjustment_in_bytes/elt_size;
 499 
 500 #ifndef PRODUCT
 501   if (TraceSuperWord)
 502     tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
 503                   offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
 504 #endif
 505   return iv_adjustment;


 525 
 526     // Get slice in predecessor order (last is first)
 527     mem_slice_preds(n_tail, n, _nlist);
 528 
 529     // Make the slice dependent on the root
 530     DepMem* slice = _dg.dep(n);
 531     _dg.make_edge(_dg.root(), slice);
 532 
 533     // Create a sink for the slice
 534     DepMem* slice_sink = _dg.make_node(NULL);
 535     _dg.make_edge(slice_sink, _dg.tail());
 536 
 537     // Now visit each pair of memory ops, creating the edges
 538     for (int j = _nlist.length() - 1; j >= 0 ; j--) {
 539       Node* s1 = _nlist.at(j);
 540 
 541       // If no dependency yet, use slice
 542       if (_dg.dep(s1)->in_cnt() == 0) {
 543         _dg.make_edge(slice, s1);
 544       }
 545       SWPointer p1(s1->as_Mem(), this);
 546       bool sink_dependent = true;
 547       for (int k = j - 1; k >= 0; k--) {
 548         Node* s2 = _nlist.at(k);
 549         if (s1->is_Load() && s2->is_Load())
 550           continue;
 551         SWPointer p2(s2->as_Mem(), this);
 552 
 553         int cmp = p1.cmp(p2);
 554         if (SuperWordRTDepCheck &&
 555             p1.base() != p2.base() && p1.valid() && p2.valid()) {
 556           // Create a runtime check to disambiguate
 557           OrderedPair pp(p1.base(), p2.base());
 558           _disjoint_ptrs.append_if_missing(pp);
 559         } else if (!SWPointer::not_equal(cmp)) {
 560           // Possibly same address
 561           _dg.make_edge(s1, s2);
 562           sink_dependent = false;
 563         }
 564       }
 565       if (sink_dependent) {
 566         _dg.make_edge(s1, slice_sink);
 567       }
 568     }
 569 #ifndef PRODUCT
 570     if (TraceSuperWord) {
 571       tty->print_cr("\nDependence graph for slice: %d", n->_idx);


 671   return false;
 672 }
 673 
 674 //------------------------------are_adjacent_refs---------------------------
 675 // Is s1 immediately before s2 in memory?
 676 bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) {
 677   if (!s1->is_Mem() || !s2->is_Mem()) return false;
 678   if (!in_bb(s1)    || !in_bb(s2))    return false;
 679 
 680   // Do not use superword for non-primitives
 681   if (!is_java_primitive(s1->as_Mem()->memory_type()) ||
 682       !is_java_primitive(s2->as_Mem()->memory_type())) {
 683     return false;
 684   }
 685 
 686   // FIXME - co_locate_pack fails on Stores in different mem-slices, so
 687   // only pack memops that are in the same alias set until that's fixed.
 688   if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) !=
 689       _phase->C->get_alias_index(s2->as_Mem()->adr_type()))
 690     return false;
 691   SWPointer p1(s1->as_Mem(), this);
 692   SWPointer p2(s2->as_Mem(), this);
 693   if (p1.base() != p2.base() || !p1.comparable(p2)) return false;
 694   int diff = p2.offset_in_bytes() - p1.offset_in_bytes();
 695   return diff == data_size(s1);
 696 }
 697 
 698 //------------------------------isomorphic---------------------------
 699 // Are s1 and s2 similar?
 700 bool SuperWord::isomorphic(Node* s1, Node* s2) {
 701   if (s1->Opcode() != s2->Opcode()) return false;
 702   if (s1->req() != s2->req()) return false;
 703   if (s1->in(0) != s2->in(0)) return false;
 704   if (!same_velt_type(s1, s2)) return false;
 705   return true;
 706 }
 707 
 708 //------------------------------independent---------------------------
 709 // Is there no data path from s1 to s2 or s2 to s1?
 710 bool SuperWord::independent(Node* s1, Node* s2) {
 711   //  assert(s1->Opcode() == s2->Opcode(), "check isomorphic first");
 712   int d1 = depth(s1);


1480   // Insert extract (unpack) operations for scalar uses
1481   for (int i = 0; i < _packset.length(); i++) {
1482     insert_extracts(_packset.at(i));
1483   }
1484 
1485   Compile* C = _phase->C;
1486   uint max_vlen_in_bytes = 0;
1487   for (int i = 0; i < _block.length(); i++) {
1488     Node* n = _block.at(i);
1489     Node_List* p = my_pack(n);
1490     if (p && n == executed_last(p)) {
1491       uint vlen = p->size();
1492       uint vlen_in_bytes = 0;
1493       Node* vn = NULL;
1494       Node* low_adr = p->at(0);
1495       Node* first   = executed_first(p);
1496       int   opc = n->Opcode();
1497       if (n->is_Load()) {
1498         Node* ctl = n->in(MemNode::Control);
1499         Node* mem = first->in(MemNode::Memory);
1500         SWPointer p1(n->as_Mem(), this);
1501         // Identify the memory dependency for the new loadVector node by
1502         // walking up through memory chain.
1503         // This is done to give flexibility to the new loadVector node so that
1504         // it can move above independent storeVector nodes.
1505         while (mem->is_StoreVector()) {
1506           SWPointer p2(mem->as_Mem(), this);
1507           int cmp = p1.cmp(p2);
1508           if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) {
1509             mem = mem->in(MemNode::Memory);
1510           } else {
1511             break; // dependent memory
1512           }
1513         }
1514         Node* adr = low_adr->in(MemNode::Address);
1515         const TypePtr* atyp = n->adr_type();
1516         vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
1517         vlen_in_bytes = vn->as_LoadVector()->memory_size();
1518       } else if (n->is_Store()) {
1519         // Promote value to be stored to vector
1520         Node* val = vector_opd(p, MemNode::ValueIn);
1521         Node* ctl = n->in(MemNode::Control);
1522         Node* mem = first->in(MemNode::Memory);
1523         Node* adr = low_adr->in(MemNode::Address);
1524         const TypePtr* atyp = n->adr_type();
1525         vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
1526         vlen_in_bytes = vn->as_StoreVector()->memory_size();


2003           }
2004         }
2005       }
2006     }
2007   }
2008 #ifndef PRODUCT
2009   if (TraceSuperWord && Verbose) {
2010     for (int i = 0; i < _block.length(); i++) {
2011       Node* n = _block.at(i);
2012       velt_type(n)->dump();
2013       tty->print("\t");
2014       n->dump();
2015     }
2016   }
2017 #endif
2018 }
2019 
2020 //------------------------------memory_alignment---------------------------
2021 // Alignment within a vector memory reference
2022 int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
2023   SWPointer p(s, this);
2024   if (!p.valid()) {
2025     return bottom_align;
2026   }
2027   int vw = vector_width_in_bytes(s);
2028   if (vw < 2) {
2029     return bottom_align; // No vectors for this type
2030   }
2031   int offset  = p.offset_in_bytes();
2032   offset     += iv_adjust*p.memory_size();
2033   int off_rem = offset % vw;
2034   int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
2035   return off_mod;
2036 }
2037 
2038 //---------------------------container_type---------------------------
2039 // Smallest type containing range of values
2040 const Type* SuperWord::container_type(Node* n) {
2041   if (n->is_Mem()) {
2042     BasicType bt = n->as_Mem()->memory_type();
2043     if (n->is_Store() && (bt == T_CHAR)) {


2167 // to align_to_ref will be a position zero in the vector.
2168 //   (iv + k) mod vector_align == 0
2169 void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
2170   CountedLoopNode *main_head = lp()->as_CountedLoop();
2171   assert(main_head->is_main_loop(), "");
2172   CountedLoopEndNode* pre_end = get_pre_loop_end(main_head);
2173   assert(pre_end != NULL, "we must have a correct pre-loop");
2174   Node *pre_opaq1 = pre_end->limit();
2175   assert(pre_opaq1->Opcode() == Op_Opaque1, "");
2176   Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
2177   Node *lim0 = pre_opaq->in(1);
2178 
2179   // Where we put new limit calculations
2180   Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
2181 
2182   // Ensure the original loop limit is available from the
2183   // pre-loop Opaque1 node.
2184   Node *orig_limit = pre_opaq->original_loop_limit();
2185   assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "");
2186 
2187   SWPointer align_to_ref_p(align_to_ref, this);
2188   assert(align_to_ref_p.valid(), "sanity");
2189 
2190   // Given:
2191   //     lim0 == original pre loop limit
2192   //     V == v_align (power of 2)
2193   //     invar == extra invariant piece of the address expression
2194   //     e == offset [ +/- invar ]
2195   //
2196   // When reassociating expressions involving '%' the basic rules are:
2197   //     (a - b) % k == 0   =>  a % k == b % k
2198   // and:
2199   //     (a + b) % k == 0   =>  a % k == (k - b) % k
2200   //
2201   // For stride > 0 && scale > 0,
2202   //   Derive the new pre-loop limit "lim" such that the two constraints:
2203   //     (1) lim = lim0 + N           (where N is some positive integer < V)
2204   //     (2) (e + lim) % V == 0
2205   //   are true.
2206   //
2207   //   Substituting (1) into (2),


2338   if (loop_node == NULL || !loop_node->is_pre_loop()) return NULL;
2339   return pre_end;
2340 }
2341 
2342 
2343 //------------------------------init---------------------------
2344 void SuperWord::init() {
2345   _dg.init();
2346   _packset.clear();
2347   _disjoint_ptrs.clear();
2348   _block.clear();
2349   _data_entry.clear();
2350   _mem_slice_head.clear();
2351   _mem_slice_tail.clear();
2352   _node_info.clear();
2353   _align_to_ref = NULL;
2354   _lpt = NULL;
2355   _lp = NULL;
2356   _bb = NULL;
2357   _iv = NULL;

2358 }
2359 
2360 //------------------------------print_packset---------------------------
2361 void SuperWord::print_packset() {
2362 #ifndef PRODUCT
2363   tty->print_cr("packset");
2364   for (int i = 0; i < _packset.length(); i++) {
2365     tty->print_cr("Pack: %d", i);
2366     Node_List* p = _packset.at(i);
2367     print_pack(p);
2368   }
2369 #endif
2370 }
2371 
2372 //------------------------------print_pack---------------------------
2373 void SuperWord::print_pack(Node_List* p) {
2374   for (uint i = 0; i < p->size(); i++) {
2375     print_stmt(p->at(i));
2376   }
2377 }


2394 void SuperWord::print_stmt(Node* s) {
2395 #ifndef PRODUCT
2396   tty->print(" align: %d \t", alignment(s));
2397   s->dump();
2398 #endif
2399 }
2400 
2401 //------------------------------blank---------------------------
2402 char* SuperWord::blank(uint depth) {
2403   static char blanks[101];
2404   assert(depth < 101, "too deep");
2405   for (uint i = 0; i < depth; i++) blanks[i] = ' ';
2406   blanks[depth] = '\0';
2407   return blanks;
2408 }
2409 
2410 
2411 //==============================SWPointer===========================
2412 
2413 //----------------------------SWPointer------------------------
2414 SWPointer::SWPointer(MemNode* mem, SuperWord* slp) :
2415   _mem(mem), _slp(slp),  _base(NULL),  _adr(NULL),
2416   _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {


2417 
2418   Node* adr = mem->in(MemNode::Address);
2419   if (!adr->is_AddP()) {
2420     assert(!valid(), "too complex");
2421     return;
2422   }
2423   // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)
2424   Node* base = adr->in(AddPNode::Base);
2425   //unsafe reference could not be aligned appropriately without runtime checking
2426   if (base == NULL || base->bottom_type() == Type::TOP) {
2427     assert(!valid(), "unsafe access");
2428     return;
2429   }
2430   for (int i = 0; i < 3; i++) {
2431     if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) {
2432       assert(!valid(), "too complex");
2433       return;
2434     }
2435     adr = adr->in(AddPNode::Address);
2436     if (base == adr || !adr->is_AddP()) {
2437       break; // stop looking at addp's
2438     }
2439   }
2440   _base = base;
2441   _adr  = adr;
2442   assert(valid(), "Usable");
2443 }
2444 
2445 // Following is used to create a temporary object during
2446 // the pattern match of an address expression.
2447 SWPointer::SWPointer(SWPointer* p) :
2448   _mem(p->_mem), _slp(p->_slp),  _base(NULL),  _adr(NULL),
2449   _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {}


2450 
2451 //------------------------scaled_iv_plus_offset--------------------
2452 // Match: k*iv + offset
2453 // where: k is a constant that maybe zero, and
2454 //        offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional
2455 bool SWPointer::scaled_iv_plus_offset(Node* n) {
2456   if (scaled_iv(n)) {
2457     return true;
2458   }
2459   if (offset_plus_k(n)) {
2460     return true;
2461   }
2462   int opc = n->Opcode();
2463   if (opc == Op_AddI) {
2464     if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) {
2465       return true;
2466     }
2467     if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
2468       return true;
2469     }


2472       return true;
2473     }
2474     if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
2475       _scale *= -1;
2476       return true;
2477     }
2478   }
2479   return false;
2480 }
2481 
2482 //----------------------------scaled_iv------------------------
2483 // Match: k*iv where k is a constant that's not zero
2484 bool SWPointer::scaled_iv(Node* n) {
2485   if (_scale != 0) {
2486     return false;  // already found a scale
2487   }
2488   if (n == iv()) {
2489     _scale = 1;
2490     return true;
2491   }



2492   int opc = n->Opcode();
2493   if (opc == Op_MulI) {
2494     if (n->in(1) == iv() && n->in(2)->is_Con()) {
2495       _scale = n->in(2)->get_int();
2496       return true;
2497     } else if (n->in(2) == iv() && n->in(1)->is_Con()) {
2498       _scale = n->in(1)->get_int();
2499       return true;
2500     }
2501   } else if (opc == Op_LShiftI) {
2502     if (n->in(1) == iv() && n->in(2)->is_Con()) {
2503       _scale = 1 << n->in(2)->get_int();
2504       return true;
2505     }
2506   } else if (opc == Op_ConvI2L) {
2507     if (scaled_iv_plus_offset(n->in(1))) {
2508       return true;
2509     }
2510   } else if (opc == Op_LShiftL) {
2511     if (!has_iv() && _invar == NULL) {


2529 //----------------------------offset_plus_k------------------------
2530 // Match: offset is (k [+/- invariant])
2531 // where k maybe zero and invariant is optional, but not both.
2532 bool SWPointer::offset_plus_k(Node* n, bool negate) {
2533   int opc = n->Opcode();
2534   if (opc == Op_ConI) {
2535     _offset += negate ? -(n->get_int()) : n->get_int();
2536     return true;
2537   } else if (opc == Op_ConL) {
2538     // Okay if value fits into an int
2539     const TypeLong* t = n->find_long_type();
2540     if (t->higher_equal(TypeLong::INT)) {
2541       jlong loff = n->get_long();
2542       jint  off  = (jint)loff;
2543       _offset += negate ? -off : loff;
2544       return true;
2545     }
2546     return false;
2547   }
2548   if (_invar != NULL) return false; // already have an invariant



2549   if (opc == Op_AddI) {
2550     if (n->in(2)->is_Con() && invariant(n->in(1))) {
2551       _negate_invar = negate;
2552       _invar = n->in(1);
2553       _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
2554       return true;
2555     } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
2556       _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
2557       _negate_invar = negate;
2558       _invar = n->in(2);
2559       return true;
2560     }
2561   }
2562   if (opc == Op_SubI) {
2563     if (n->in(2)->is_Con() && invariant(n->in(1))) {
2564       _negate_invar = negate;
2565       _invar = n->in(1);
2566       _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
2567       return true;
2568     } else if (n->in(1)->is_Con() && invariant(n->in(2))) {




  49   _arena(phase->C->comp_arena()),
  50   _packset(arena(), 8,  0, NULL),         // packs for the current block
  51   _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
  52   _block(arena(), 8,  0, NULL),           // nodes in current block
  53   _data_entry(arena(), 8,  0, NULL),      // nodes with all inputs from outside
  54   _mem_slice_head(arena(), 8,  0, NULL),  // memory slice heads
  55   _mem_slice_tail(arena(), 8,  0, NULL),  // memory slice tails
  56   _node_info(arena(), 8,  0, SWNodeInfo::initial), // info needed per node
  57   _align_to_ref(NULL),                    // memory reference to align vectors to
  58   _disjoint_ptrs(arena(), 8,  0, OrderedPair::initial), // runtime disambiguated pointer pairs
  59   _dg(_arena),                            // dependence graph
  60   _visited(arena()),                      // visited node set
  61   _post_visited(arena()),                 // post visited node set
  62   _n_idx_list(arena(), 8),                // scratch list of (node,index) pairs
  63   _stk(arena(), 8, 0, NULL),              // scratch stack of nodes
  64   _nlist(arena(), 8, 0, NULL),            // scratch list of nodes
  65   _lpt(NULL),                             // loop tree node
  66   _lp(NULL),                              // LoopNode
  67   _bb(NULL),                              // basic block
  68   _iv(NULL),                              // induction var
  69   _race_possible(false),                  // cases where SDMU is true
  70   _early_return(true)
  71 {}
  72 
  73 //------------------------------transform_loop---------------------------
  74 void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
  75   assert(UseSuperWord, "should be");
  76   // Do vectors exist on this architecture?
  77   if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
  78 
  79   assert(lpt->_head->is_CountedLoop(), "must be");
  80   CountedLoopNode *cl = lpt->_head->as_CountedLoop();
  81 
  82   if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
  83 
  84   if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
  85 
  86   // Check for no control flow in body (other than exit)
  87   Node *cl_exit = cl->loopexit();
  88   if (cl_exit->in(0) != lpt->_head) return;
  89 
  90   // Make sure the are no extra control users of the loop backedge
  91   if (cl->back_control()->outcnt() != 1) {
  92     return;
  93   }
  94 
  95   // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
  96   CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
  97   if (pre_end == NULL) return;
  98   Node *pre_opaq1 = pre_end->limit();
  99   if (pre_opaq1->Opcode() != Op_Opaque1) return;
 100 
 101   init(); // initialize data structures
 102 
 103   set_lpt(lpt);
 104   set_lp(cl);
 105 
 106   // For now, define one block which is the entire loop body
 107   set_bb(cl);
 108 
 109   if (do_optimization) {
 110     assert(_packset.length() == 0, "packset must be empty");
 111     SLP_extract();
 112   }
 113 }
 114 
 115 //------------------------------SLP_extract---------------------------
 116 // Extract the superword level parallelism
 117 //
 118 // 1) A reverse post-order of nodes in the block is constructed.  By scanning
 119 //    this list from first to last, all definitions are visited before their uses.
 120 //
 121 // 2) A point-to-point dependence graph is constructed between memory references.
 122 //    This simplies the upcoming "independence" checker.
 123 //
 124 // 3) The maximum depth in the node graph from the beginning of the block
 125 //    to each node is computed.  This is used to prune the graph search
 126 //    in the independence checker.
 127 //
 128 // 4) For integer types, the necessary bit width is propagated backwards
 129 //    from stores to allow packed operations on byte, char, and short
 130 //    integers.  This reverses the promotion to type "int" that javac
 131 //    did for operations like: char c1,c2,c3;  c1 = c2 + c3.
 132 //


 196 
 197   Node_List align_to_refs;
 198   int best_iv_adjustment = 0;
 199   MemNode* best_align_to_mem_ref = NULL;
 200 
 201   while (memops.size() != 0) {
 202     // Find a memory reference to align to.
 203     MemNode* mem_ref = find_align_to_ref(memops);
 204     if (mem_ref == NULL) break;
 205     align_to_refs.push(mem_ref);
 206     int iv_adjustment = get_iv_adjustment(mem_ref);
 207 
 208     if (best_align_to_mem_ref == NULL) {
 209       // Set memory reference which is the best from all memory operations
 210       // to be used for alignment. The pre-loop trip count is modified to align
 211       // this reference to a vector-aligned address.
 212       best_align_to_mem_ref = mem_ref;
 213       best_iv_adjustment = iv_adjustment;
 214     }
 215 
 216     SWPointer align_to_ref_p(mem_ref, this, NULL, false);
 217     // Set alignment relative to "align_to_ref" for all related memory operations.
 218     for (int i = memops.size() - 1; i >= 0; i--) {
 219       MemNode* s = memops.at(i)->as_Mem();
 220       if (isomorphic(s, mem_ref)) {
 221         SWPointer p2(s, this, NULL, false);
 222         if (p2.comparable(align_to_ref_p)) {
 223           int align = memory_alignment(s, iv_adjustment);
 224           set_alignment(s, align);
 225         }
 226       }
 227     }
 228 
 229     // Create initial pack pairs of memory operations for which
 230     // alignment is set and vectors will be aligned.
 231     bool create_pack = true;
 232     if (memory_alignment(mem_ref, best_iv_adjustment) == 0) {
 233       if (!Matcher::misaligned_vectors_ok()) {
 234         int vw = vector_width(mem_ref);
 235         int vw_best = vector_width(best_align_to_mem_ref);
 236         if (vw > vw_best) {
 237           // Do not vectorize a memory access with more elements per vector
 238           // if unaligned memory access is not allowed because number of
 239           // iterations in pre-loop will be not enough to align it.
 240           create_pack = false;
 241         }


 330   set_align_to_ref(best_align_to_mem_ref);
 331 
 332 #ifndef PRODUCT
 333   if (TraceSuperWord) {
 334     tty->print_cr("\nAfter find_adjacent_refs");
 335     print_packset();
 336   }
 337 #endif
 338 }
 339 
 340 //------------------------------find_align_to_ref---------------------------
 341 // Find a memory reference to align the loop induction variable to.
 342 // Looks first at stores then at loads, looking for a memory reference
 343 // with the largest number of references similar to it.
 344 MemNode* SuperWord::find_align_to_ref(Node_List &memops) {
 345   GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);
 346 
 347   // Count number of comparable memory ops
 348   for (uint i = 0; i < memops.size(); i++) {
 349     MemNode* s1 = memops.at(i)->as_Mem();
 350     SWPointer p1(s1, this, NULL, false);
 351     // Discard if pre loop can't align this reference
 352     if (!ref_is_alignable(p1)) {
 353       *cmp_ct.adr_at(i) = 0;
 354       continue;
 355     }
 356     for (uint j = i+1; j < memops.size(); j++) {
 357       MemNode* s2 = memops.at(j)->as_Mem();
 358       if (isomorphic(s1, s2)) {
 359         SWPointer p2(s2, this, NULL, false);
 360         if (p1.comparable(p2)) {
 361           (*cmp_ct.adr_at(i))++;
 362           (*cmp_ct.adr_at(j))++;
 363         }
 364       }
 365     }
 366   }
 367 
 368   // Find Store (or Load) with the greatest number of "comparable" references,
 369   // biggest vector size, smallest data size and smallest iv offset.
 370   int max_ct        = 0;
 371   int max_vw        = 0;
 372   int max_idx       = -1;
 373   int min_size      = max_jint;
 374   int min_iv_offset = max_jint;
 375   for (uint j = 0; j < memops.size(); j++) {
 376     MemNode* s = memops.at(j)->as_Mem();
 377     if (s->is_Store()) {
 378       int vw = vector_width_in_bytes(s);
 379       assert(vw > 1, "sanity");
 380       SWPointer p(s, this, NULL, false);
 381       if (cmp_ct.at(j) >  max_ct ||
 382           cmp_ct.at(j) == max_ct &&
 383             (vw >  max_vw ||
 384              vw == max_vw &&
 385               (data_size(s) <  min_size ||
 386                data_size(s) == min_size &&
 387                  (p.offset_in_bytes() < min_iv_offset)))) {
 388         max_ct = cmp_ct.at(j);
 389         max_vw = vw;
 390         max_idx = j;
 391         min_size = data_size(s);
 392         min_iv_offset = p.offset_in_bytes();
 393       }
 394     }
 395   }
 396   // If no stores, look at loads
 397   if (max_ct == 0) {
 398     for (uint j = 0; j < memops.size(); j++) {
 399       MemNode* s = memops.at(j)->as_Mem();
 400       if (s->is_Load()) {
 401         int vw = vector_width_in_bytes(s);
 402         assert(vw > 1, "sanity");
 403         SWPointer p(s, this, NULL, false);
 404         if (cmp_ct.at(j) >  max_ct ||
 405             cmp_ct.at(j) == max_ct &&
 406               (vw >  max_vw ||
 407                vw == max_vw &&
 408                 (data_size(s) <  min_size ||
 409                  data_size(s) == min_size &&
 410                    (p.offset_in_bytes() < min_iv_offset)))) {
 411           max_ct = cmp_ct.at(j);
 412           max_vw = vw;
 413           max_idx = j;
 414           min_size = data_size(s);
 415           min_iv_offset = p.offset_in_bytes();
 416         }
 417       }
 418     }
 419   }
 420 
 421 #ifdef ASSERT
 422   if (TraceSuperWord && Verbose) {
 423     tty->print_cr("\nVector memops after find_align_to_refs");


 468     if (init_nd->is_Con() && p.invar() == NULL) {
 469       int init = init_nd->bottom_type()->is_int()->get_con();
 470 
 471       int init_offset = init * p.scale_in_bytes() + offset;
 472       assert(init_offset >= 0, "positive offset from object start");
 473 
 474       if (span > 0) {
 475         return (vw - (init_offset % vw)) % span == 0;
 476       } else {
 477         assert(span < 0, "nonzero stride * scale");
 478         return (init_offset % vw) % -span == 0;
 479       }
 480     }
 481   }
 482   return false;
 483 }
 484 
 485 //---------------------------get_iv_adjustment---------------------------
 486 // Calculate loop's iv adjustment for this memory ops.
 487 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
 488   SWPointer align_to_ref_p(mem_ref, this, NULL, false);
 489   int offset = align_to_ref_p.offset_in_bytes();
 490   int scale  = align_to_ref_p.scale_in_bytes();
 491   int vw       = vector_width_in_bytes(mem_ref);
 492   assert(vw > 1, "sanity");
 493   int stride_sign   = (scale * iv_stride()) > 0 ? 1 : -1;
 494   // At least one iteration is executed in pre-loop by default. As result
 495   // several iterations are needed to align memory operations in main-loop even
 496   // if offset is 0.
 497   int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));
 498   int elt_size = align_to_ref_p.memory_size();
 499   assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0),
 500          err_msg_res("(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size));
 501   int iv_adjustment = iv_adjustment_in_bytes/elt_size;
 502 
 503 #ifndef PRODUCT
 504   if (TraceSuperWord)
 505     tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
 506                   offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
 507 #endif
 508   return iv_adjustment;


 528 
 529     // Get slice in predecessor order (last is first)
 530     mem_slice_preds(n_tail, n, _nlist);
 531 
 532     // Make the slice dependent on the root
 533     DepMem* slice = _dg.dep(n);
 534     _dg.make_edge(_dg.root(), slice);
 535 
 536     // Create a sink for the slice
 537     DepMem* slice_sink = _dg.make_node(NULL);
 538     _dg.make_edge(slice_sink, _dg.tail());
 539 
 540     // Now visit each pair of memory ops, creating the edges
 541     for (int j = _nlist.length() - 1; j >= 0 ; j--) {
 542       Node* s1 = _nlist.at(j);
 543 
 544       // If no dependency yet, use slice
 545       if (_dg.dep(s1)->in_cnt() == 0) {
 546         _dg.make_edge(slice, s1);
 547       }
 548       SWPointer p1(s1->as_Mem(), this, NULL, false);
 549       bool sink_dependent = true;
 550       for (int k = j - 1; k >= 0; k--) {
 551         Node* s2 = _nlist.at(k);
 552         if (s1->is_Load() && s2->is_Load())
 553           continue;
 554         SWPointer p2(s2->as_Mem(), this, NULL, false);
 555 
 556         int cmp = p1.cmp(p2);
 557         if (SuperWordRTDepCheck &&
 558             p1.base() != p2.base() && p1.valid() && p2.valid()) {
 559           // Create a runtime check to disambiguate
 560           OrderedPair pp(p1.base(), p2.base());
 561           _disjoint_ptrs.append_if_missing(pp);
 562         } else if (!SWPointer::not_equal(cmp)) {
 563           // Possibly same address
 564           _dg.make_edge(s1, s2);
 565           sink_dependent = false;
 566         }
 567       }
 568       if (sink_dependent) {
 569         _dg.make_edge(s1, slice_sink);
 570       }
 571     }
 572 #ifndef PRODUCT
 573     if (TraceSuperWord) {
 574       tty->print_cr("\nDependence graph for slice: %d", n->_idx);


 674   return false;
 675 }
 676 
 677 //------------------------------are_adjacent_refs---------------------------
 678 // Is s1 immediately before s2 in memory?
 679 bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) {
 680   if (!s1->is_Mem() || !s2->is_Mem()) return false;
 681   if (!in_bb(s1)    || !in_bb(s2))    return false;
 682 
 683   // Do not use superword for non-primitives
 684   if (!is_java_primitive(s1->as_Mem()->memory_type()) ||
 685       !is_java_primitive(s2->as_Mem()->memory_type())) {
 686     return false;
 687   }
 688 
 689   // FIXME - co_locate_pack fails on Stores in different mem-slices, so
 690   // only pack memops that are in the same alias set until that's fixed.
 691   if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) !=
 692       _phase->C->get_alias_index(s2->as_Mem()->adr_type()))
 693     return false;
 694   SWPointer p1(s1->as_Mem(), this, NULL, false);
 695   SWPointer p2(s2->as_Mem(), this, NULL, false);
 696   if (p1.base() != p2.base() || !p1.comparable(p2)) return false;
 697   int diff = p2.offset_in_bytes() - p1.offset_in_bytes();
 698   return diff == data_size(s1);
 699 }
 700 
 701 //------------------------------isomorphic---------------------------
 702 // Are s1 and s2 similar?
 703 bool SuperWord::isomorphic(Node* s1, Node* s2) {
 704   if (s1->Opcode() != s2->Opcode()) return false;
 705   if (s1->req() != s2->req()) return false;
 706   if (s1->in(0) != s2->in(0)) return false;
 707   if (!same_velt_type(s1, s2)) return false;
 708   return true;
 709 }
 710 
 711 //------------------------------independent---------------------------
 712 // Is there no data path from s1 to s2 or s2 to s1?
 713 bool SuperWord::independent(Node* s1, Node* s2) {
 714   //  assert(s1->Opcode() == s2->Opcode(), "check isomorphic first");
 715   int d1 = depth(s1);


1483   // Insert extract (unpack) operations for scalar uses
1484   for (int i = 0; i < _packset.length(); i++) {
1485     insert_extracts(_packset.at(i));
1486   }
1487 
1488   Compile* C = _phase->C;
1489   uint max_vlen_in_bytes = 0;
1490   for (int i = 0; i < _block.length(); i++) {
1491     Node* n = _block.at(i);
1492     Node_List* p = my_pack(n);
1493     if (p && n == executed_last(p)) {
1494       uint vlen = p->size();
1495       uint vlen_in_bytes = 0;
1496       Node* vn = NULL;
1497       Node* low_adr = p->at(0);
1498       Node* first   = executed_first(p);
1499       int   opc = n->Opcode();
1500       if (n->is_Load()) {
1501         Node* ctl = n->in(MemNode::Control);
1502         Node* mem = first->in(MemNode::Memory);
1503         SWPointer p1(n->as_Mem(), this, NULL, false);
1504         // Identify the memory dependency for the new loadVector node by
1505         // walking up through memory chain.
1506         // This is done to give flexibility to the new loadVector node so that
1507         // it can move above independent storeVector nodes.
1508         while (mem->is_StoreVector()) {
1509           SWPointer p2(mem->as_Mem(), this, NULL, false);
1510           int cmp = p1.cmp(p2);
1511           if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) {
1512             mem = mem->in(MemNode::Memory);
1513           } else {
1514             break; // dependent memory
1515           }
1516         }
1517         Node* adr = low_adr->in(MemNode::Address);
1518         const TypePtr* atyp = n->adr_type();
1519         vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
1520         vlen_in_bytes = vn->as_LoadVector()->memory_size();
1521       } else if (n->is_Store()) {
1522         // Promote value to be stored to vector
1523         Node* val = vector_opd(p, MemNode::ValueIn);
1524         Node* ctl = n->in(MemNode::Control);
1525         Node* mem = first->in(MemNode::Memory);
1526         Node* adr = low_adr->in(MemNode::Address);
1527         const TypePtr* atyp = n->adr_type();
1528         vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
1529         vlen_in_bytes = vn->as_StoreVector()->memory_size();


2006           }
2007         }
2008       }
2009     }
2010   }
2011 #ifndef PRODUCT
2012   if (TraceSuperWord && Verbose) {
2013     for (int i = 0; i < _block.length(); i++) {
2014       Node* n = _block.at(i);
2015       velt_type(n)->dump();
2016       tty->print("\t");
2017       n->dump();
2018     }
2019   }
2020 #endif
2021 }
2022 
2023 //------------------------------memory_alignment---------------------------
2024 // Alignment within a vector memory reference
2025 int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
2026   SWPointer p(s, this, NULL, false);
2027   if (!p.valid()) {
2028     return bottom_align;
2029   }
2030   int vw = vector_width_in_bytes(s);
2031   if (vw < 2) {
2032     return bottom_align; // No vectors for this type
2033   }
2034   int offset  = p.offset_in_bytes();
2035   offset     += iv_adjust*p.memory_size();
2036   int off_rem = offset % vw;
2037   int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
2038   return off_mod;
2039 }
2040 
2041 //---------------------------container_type---------------------------
2042 // Smallest type containing range of values
2043 const Type* SuperWord::container_type(Node* n) {
2044   if (n->is_Mem()) {
2045     BasicType bt = n->as_Mem()->memory_type();
2046     if (n->is_Store() && (bt == T_CHAR)) {


2170 // to align_to_ref will be a position zero in the vector.
2171 //   (iv + k) mod vector_align == 0
2172 void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
2173   CountedLoopNode *main_head = lp()->as_CountedLoop();
2174   assert(main_head->is_main_loop(), "");
2175   CountedLoopEndNode* pre_end = get_pre_loop_end(main_head);
2176   assert(pre_end != NULL, "we must have a correct pre-loop");
2177   Node *pre_opaq1 = pre_end->limit();
2178   assert(pre_opaq1->Opcode() == Op_Opaque1, "");
2179   Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
2180   Node *lim0 = pre_opaq->in(1);
2181 
2182   // Where we put new limit calculations
2183   Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
2184 
2185   // Ensure the original loop limit is available from the
2186   // pre-loop Opaque1 node.
2187   Node *orig_limit = pre_opaq->original_loop_limit();
2188   assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "");
2189 
2190   SWPointer align_to_ref_p(align_to_ref, this, NULL, false);
2191   assert(align_to_ref_p.valid(), "sanity");
2192 
2193   // Given:
2194   //     lim0 == original pre loop limit
2195   //     V == v_align (power of 2)
2196   //     invar == extra invariant piece of the address expression
2197   //     e == offset [ +/- invar ]
2198   //
2199   // When reassociating expressions involving '%' the basic rules are:
2200   //     (a - b) % k == 0   =>  a % k == b % k
2201   // and:
2202   //     (a + b) % k == 0   =>  a % k == (k - b) % k
2203   //
2204   // For stride > 0 && scale > 0,
2205   //   Derive the new pre-loop limit "lim" such that the two constraints:
2206   //     (1) lim = lim0 + N           (where N is some positive integer < V)
2207   //     (2) (e + lim) % V == 0
2208   //   are true.
2209   //
2210   //   Substituting (1) into (2),


2341   if (loop_node == NULL || !loop_node->is_pre_loop()) return NULL;
2342   return pre_end;
2343 }
2344 
2345 
2346 //------------------------------init---------------------------
2347 void SuperWord::init() {
2348   _dg.init();
2349   _packset.clear();
2350   _disjoint_ptrs.clear();
2351   _block.clear();
2352   _data_entry.clear();
2353   _mem_slice_head.clear();
2354   _mem_slice_tail.clear();
2355   _node_info.clear();
2356   _align_to_ref = NULL;
2357   _lpt = NULL;
2358   _lp = NULL;
2359   _bb = NULL;
2360   _iv = NULL;
2361   _early_return = false;
2362 }
2363 
2364 //------------------------------print_packset---------------------------
2365 void SuperWord::print_packset() {
2366 #ifndef PRODUCT
2367   tty->print_cr("packset");
2368   for (int i = 0; i < _packset.length(); i++) {
2369     tty->print_cr("Pack: %d", i);
2370     Node_List* p = _packset.at(i);
2371     print_pack(p);
2372   }
2373 #endif
2374 }
2375 
2376 //------------------------------print_pack---------------------------
2377 void SuperWord::print_pack(Node_List* p) {
2378   for (uint i = 0; i < p->size(); i++) {
2379     print_stmt(p->at(i));
2380   }
2381 }


2398 void SuperWord::print_stmt(Node* s) {
2399 #ifndef PRODUCT
2400   tty->print(" align: %d \t", alignment(s));
2401   s->dump();
2402 #endif
2403 }
2404 
2405 //------------------------------blank---------------------------
2406 char* SuperWord::blank(uint depth) {
2407   static char blanks[101];
2408   assert(depth < 101, "too deep");
2409   for (uint i = 0; i < depth; i++) blanks[i] = ' ';
2410   blanks[depth] = '\0';
2411   return blanks;
2412 }
2413 
2414 
2415 //==============================SWPointer===========================
2416 
2417 //----------------------------SWPointer------------------------
2418 SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only) :
2419   _mem(mem), _slp(slp),  _base(NULL),  _adr(NULL),
2420   _scale(0), _offset(0), _invar(NULL), _negate_invar(false),
2421   _nstack(nstack), _analyze_only(analyze_only),
2422   _stack_idx(0) {
2423 
2424   Node* adr = mem->in(MemNode::Address);
2425   if (!adr->is_AddP()) {
2426     assert(!valid(), "too complex");
2427     return;
2428   }
2429   // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)
2430   Node* base = adr->in(AddPNode::Base);
2431   //unsafe reference could not be aligned appropriately without runtime checking
2432   if (base == NULL || base->bottom_type() == Type::TOP) {
2433     assert(!valid(), "unsafe access");
2434     return;
2435   }
2436   for (int i = 0; i < 3; i++) {
2437     if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) {
2438       assert(!valid(), "too complex");
2439       return;
2440     }
2441     adr = adr->in(AddPNode::Address);
2442     if (base == adr || !adr->is_AddP()) {
2443       break; // stop looking at addp's
2444     }
2445   }
2446   _base = base;
2447   _adr  = adr;
2448   assert(valid(), "Usable");
2449 }
2450 
2451 // Following is used to create a temporary object during
2452 // the pattern match of an address expression.
2453 SWPointer::SWPointer(SWPointer* p) :
2454   _mem(p->_mem), _slp(p->_slp),  _base(NULL),  _adr(NULL),
2455   _scale(0), _offset(0), _invar(NULL), _negate_invar(false),
2456   _nstack(p->_nstack), _analyze_only(p->_analyze_only),
2457   _stack_idx(p->_stack_idx) {}
2458 
2459 //------------------------scaled_iv_plus_offset--------------------
2460 // Match: k*iv + offset
2461 // where: k is a constant that maybe zero, and
2462 //        offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional
2463 bool SWPointer::scaled_iv_plus_offset(Node* n) {
2464   if (scaled_iv(n)) {
2465     return true;
2466   }
2467   if (offset_plus_k(n)) {
2468     return true;
2469   }
2470   int opc = n->Opcode();
2471   if (opc == Op_AddI) {
2472     if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) {
2473       return true;
2474     }
2475     if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
2476       return true;
2477     }


2480       return true;
2481     }
2482     if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
2483       _scale *= -1;
2484       return true;
2485     }
2486   }
2487   return false;
2488 }
2489 
2490 //----------------------------scaled_iv------------------------
2491 // Match: k*iv where k is a constant that's not zero
2492 bool SWPointer::scaled_iv(Node* n) {
2493   if (_scale != 0) {
2494     return false;  // already found a scale
2495   }
2496   if (n == iv()) {
2497     _scale = 1;
2498     return true;
2499   }
2500   if (_analyze_only && (invariant(n) == false)) {
2501     _nstack->push(n, _stack_idx++);
2502   }
2503   int opc = n->Opcode();
2504   if (opc == Op_MulI) {
2505     if (n->in(1) == iv() && n->in(2)->is_Con()) {
2506       _scale = n->in(2)->get_int();
2507       return true;
2508     } else if (n->in(2) == iv() && n->in(1)->is_Con()) {
2509       _scale = n->in(1)->get_int();
2510       return true;
2511     }
2512   } else if (opc == Op_LShiftI) {
2513     if (n->in(1) == iv() && n->in(2)->is_Con()) {
2514       _scale = 1 << n->in(2)->get_int();
2515       return true;
2516     }
2517   } else if (opc == Op_ConvI2L) {
2518     if (scaled_iv_plus_offset(n->in(1))) {
2519       return true;
2520     }
2521   } else if (opc == Op_LShiftL) {
2522     if (!has_iv() && _invar == NULL) {


2540 //----------------------------offset_plus_k------------------------
2541 // Match: offset is (k [+/- invariant])
2542 // where k maybe zero and invariant is optional, but not both.
2543 bool SWPointer::offset_plus_k(Node* n, bool negate) {
2544   int opc = n->Opcode();
2545   if (opc == Op_ConI) {
2546     _offset += negate ? -(n->get_int()) : n->get_int();
2547     return true;
2548   } else if (opc == Op_ConL) {
2549     // Okay if value fits into an int
2550     const TypeLong* t = n->find_long_type();
2551     if (t->higher_equal(TypeLong::INT)) {
2552       jlong loff = n->get_long();
2553       jint  off  = (jint)loff;
2554       _offset += negate ? -off : loff;
2555       return true;
2556     }
2557     return false;
2558   }
2559   if (_invar != NULL) return false; // already have an invariant
2560   if (_analyze_only && (invariant(n) == false)) {
2561     _nstack->push(n, _stack_idx++);
2562   }
2563   if (opc == Op_AddI) {
2564     if (n->in(2)->is_Con() && invariant(n->in(1))) {
2565       _negate_invar = negate;
2566       _invar = n->in(1);
2567       _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
2568       return true;
2569     } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
2570       _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
2571       _negate_invar = negate;
2572       _invar = n->in(2);
2573       return true;
2574     }
2575   }
2576   if (opc == Op_SubI) {
2577     if (n->in(2)->is_Con() && invariant(n->in(1))) {
2578       _negate_invar = negate;
2579       _invar = n->in(1);
2580       _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
2581       return true;
2582     } else if (n->in(1)->is_Con() && invariant(n->in(2))) {


< prev index next >