< prev index next >

hotspot/src/share/vm/opto/superword.cpp

Print this page
rev 7350 : 8078497: C2's superword optimization causes unaligned memory accesses
Summary: Prevent vectorization of memory operations with different invariant offsets if unaligned memory accesses are not allowed.
Reviewed-by: kvn


 215         SWPointer p2(s, this);
 216         if (p2.comparable(align_to_ref_p)) {
 217           int align = memory_alignment(s, iv_adjustment);
 218           set_alignment(s, align);
 219         }
 220       }
 221     }
 222 
 223     // Create initial pack pairs of memory operations for which
 224     // alignment is set and vectors will be aligned.
 225     bool create_pack = true;
 226     if (memory_alignment(mem_ref, best_iv_adjustment) == 0) {
 227       if (!Matcher::misaligned_vectors_ok()) {
 228         int vw = vector_width(mem_ref);
 229         int vw_best = vector_width(best_align_to_mem_ref);
 230         if (vw > vw_best) {
 231           // Do not vectorize a memory access with more elements per vector
 232           // if unaligned memory access is not allowed because number of
 233           // iterations in pre-loop will be not enough to align it.
 234           create_pack = false;







 235         }
 236       }
 237     } else {
 238       if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
 239         // Can't allow vectorization of unaligned memory accesses with the
 240         // same type since it could be overlapped accesses to the same array.
 241         create_pack = false;
 242       } else {
 243         // Allow independent (different type) unaligned memory operations
 244         // if HW supports them.
 245         if (!Matcher::misaligned_vectors_ok()) {
 246           create_pack = false;
 247         } else {
 248           // Check if packs of the same memory type but
 249           // with a different alignment were created before.
 250           for (uint i = 0; i < align_to_refs.size(); i++) {
 251             MemNode* mr = align_to_refs.at(i)->as_Mem();
 252             if (same_velt_type(mr, mem_ref) &&
 253                 memory_alignment(mr, iv_adjustment) != 0)
 254               create_pack = false;


 428       tty->print("\nVector align to node: ");
 429       memops.at(max_idx)->as_Mem()->dump();
 430     }
 431 #endif
 432     return memops.at(max_idx)->as_Mem();
 433   }
 434   return NULL;
 435 }
 436 
 437 //------------------------------ref_is_alignable---------------------------
 438 // Can the preloop align the reference to position zero in the vector?
 439 bool SuperWord::ref_is_alignable(SWPointer& p) {
 440   if (!p.has_iv()) {
 441     return true;   // no induction variable
 442   }
 443   CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop());
 444   assert(pre_end->stride_is_con(), "pre loop stride is constant");
 445   int preloop_stride = pre_end->stride_con();
 446 
 447   int span = preloop_stride * p.scale_in_bytes();
 448 
 449   // Stride one accesses are alignable.
 450   if (ABS(span) == p.memory_size())


 451     return true;
 452 
 453   // If initial offset from start of object is computable,
 454   // compute alignment within the vector.















 455   int vw = vector_width_in_bytes(p.mem());
 456   assert(vw > 1, "sanity");
 457   if (vw % span == 0) {
 458     Node* init_nd = pre_end->init_trip();
 459     if (init_nd->is_Con() && p.invar() == NULL) {
 460       int init = init_nd->bottom_type()->is_int()->get_con();
 461 
 462       int init_offset = init * p.scale_in_bytes() + p.offset_in_bytes();
 463       assert(init_offset >= 0, "positive offset from object start");
 464 

 465       if (span > 0) {
 466         return (vw - (init_offset % vw)) % span == 0;
 467       } else {
 468         assert(span < 0, "nonzero stride * scale");
 469         return (init_offset % vw) % -span == 0;
 470       }












 471     }
 472   }
 473   return false;
 474 }
 475 
 476 //---------------------------get_iv_adjustment---------------------------
 477 // Calculate loop's iv adjustment for this memory ops.
 478 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
 479   SWPointer align_to_ref_p(mem_ref, this);
 480   int offset = align_to_ref_p.offset_in_bytes();
 481   int scale  = align_to_ref_p.scale_in_bytes();

 482   int vw       = vector_width_in_bytes(mem_ref);
 483   assert(vw > 1, "sanity");


 484   int stride_sign   = (scale * iv_stride()) > 0 ? 1 : -1;
 485   // At least one iteration is executed in pre-loop by default. As result
 486   // several iterations are needed to align memory operations in main-loop even
 487   // if offset is 0.
 488   int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));
 489   int elt_size = align_to_ref_p.memory_size();
 490   assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0),
 491          err_msg_res("(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size));
 492   int iv_adjustment = iv_adjustment_in_bytes/elt_size;




 493 
 494 #ifndef PRODUCT
 495   if (TraceSuperWord)
 496     tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
 497                   offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
 498 #endif
 499   return iv_adjustment;
 500 }
 501 
 502 //---------------------------dependence_graph---------------------------
 503 // Construct dependency graph.
 504 // Add dependence edges to load/store nodes for memory dependence
 505 //    A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
 506 void SuperWord::dependence_graph() {
 507   // First, assign a dependence node to each memory node
 508   for (int i = 0; i < _block.length(); i++ ) {
 509     Node *n = _block.at(i);
 510     if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) {
 511       _dg.make_node(n);
 512     }


2230   for (uint i = 0; i < depth; i++) blanks[i] = ' ';
2231   blanks[depth] = '\0';
2232   return blanks;
2233 }
2234 
2235 
2236 //==============================SWPointer===========================
2237 
2238 //----------------------------SWPointer------------------------
2239 SWPointer::SWPointer(MemNode* mem, SuperWord* slp) :
2240   _mem(mem), _slp(slp),  _base(NULL),  _adr(NULL),
2241   _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {
2242 
2243   Node* adr = mem->in(MemNode::Address);
2244   if (!adr->is_AddP()) {
2245     assert(!valid(), "too complex");
2246     return;
2247   }
2248   // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)
2249   Node* base = adr->in(AddPNode::Base);





2250   //unsafe reference could not be aligned appropriately without runtime checking
2251   if (base == NULL || base->bottom_type() == Type::TOP) {
2252     assert(!valid(), "unsafe access");
2253     return;
2254   }
2255   for (int i = 0; i < 3; i++) {
2256     if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) {
2257       assert(!valid(), "too complex");
2258       return;
2259     }
2260     adr = adr->in(AddPNode::Address);
2261     if (base == adr || !adr->is_AddP()) {
2262       break; // stop looking at addp's
2263     }
2264   }
2265   _base = base;
2266   _adr  = adr;
2267   assert(valid(), "Usable");
2268 }
2269 




 215         SWPointer p2(s, this);
 216         if (p2.comparable(align_to_ref_p)) {
 217           int align = memory_alignment(s, iv_adjustment);
 218           set_alignment(s, align);
 219         }
 220       }
 221     }
 222 
 223     // Create initial pack pairs of memory operations for which
 224     // alignment is set and vectors will be aligned.
 225     bool create_pack = true;
 226     if (memory_alignment(mem_ref, best_iv_adjustment) == 0) {
 227       if (!Matcher::misaligned_vectors_ok()) {
 228         int vw = vector_width(mem_ref);
 229         int vw_best = vector_width(best_align_to_mem_ref);
 230         if (vw > vw_best) {
 231           // Do not vectorize a memory access with more elements per vector
 232           // if unaligned memory access is not allowed because number of
 233           // iterations in pre-loop will be not enough to align it.
 234           create_pack = false;
 235         } else {
 236           SWPointer p2(best_align_to_mem_ref, this);
 237           if (align_to_ref_p.invar() != p2.invar()) {
 238             // Do not vectorize memory accesses with different invariants
 239             // if unaligned memory accesses are not allowed.
 240             create_pack = false;
 241           }
 242         }
 243       }
 244     } else {
 245       if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
 246         // Can't allow vectorization of unaligned memory accesses with the
 247         // same type since it could be overlapped accesses to the same array.
 248         create_pack = false;
 249       } else {
 250         // Allow independent (different type) unaligned memory operations
 251         // if HW supports them.
 252         if (!Matcher::misaligned_vectors_ok()) {
 253           create_pack = false;
 254         } else {
 255           // Check if packs of the same memory type but
 256           // with a different alignment were created before.
 257           for (uint i = 0; i < align_to_refs.size(); i++) {
 258             MemNode* mr = align_to_refs.at(i)->as_Mem();
 259             if (same_velt_type(mr, mem_ref) &&
 260                 memory_alignment(mr, iv_adjustment) != 0)
 261               create_pack = false;


 435       tty->print("\nVector align to node: ");
 436       memops.at(max_idx)->as_Mem()->dump();
 437     }
 438 #endif
 439     return memops.at(max_idx)->as_Mem();
 440   }
 441   return NULL;
 442 }
 443 
 444 //------------------------------ref_is_alignable---------------------------
 445 // Can the preloop align the reference to position zero in the vector?
 446 bool SuperWord::ref_is_alignable(SWPointer& p) {
 447   if (!p.has_iv()) {
 448     return true;   // no induction variable
 449   }
 450   CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop());
 451   assert(pre_end->stride_is_con(), "pre loop stride is constant");
 452   int preloop_stride = pre_end->stride_con();
 453 
 454   int span = preloop_stride * p.scale_in_bytes();
 455   int mem_size = p.memory_size();
 456   int offset   = p.offset_in_bytes();
 457   // Stride one accesses are alignable if offset is aligned to memory operation size.
 458   // Offset can be unaligned when UseUnalignedAccesses is used.
 459   if (ABS(span) == mem_size && (ABS(offset) % mem_size) == 0) {
 460     return true;
 461   }
 462   // If the initial offset from start of the object is computable,
 463   // check if the pre-loop can align the final offset accordingly.
 464   //
 465   // In other words: Can we find an i such that the offset
 466   // after i pre-loop iterations is aligned to vw?
 467   //   (init_offset + pre_loop) % vw == 0              (1)
 468   // where
 469   //   pre_loop = i * span
 470   // is the number of bytes added to the offset by i pre-loop iterations.
 471   //
 472   // For this to hold we need pre_loop to increase init_offset by
 473   //   pre_loop = vw - (init_offset % vw)
 474   //
 475   // This is only possible if pre_loop is divisible by span because each
 476   // pre-loop iteration increases the initial offset by 'span' bytes:
 477   //   (vw - (init_offset % vw)) % span == 0
 478   //
 479   int vw = vector_width_in_bytes(p.mem());
 480   assert(vw > 1, "sanity");

 481   Node* init_nd = pre_end->init_trip();
 482   if (init_nd->is_Con() && p.invar() == NULL) {
 483     int init = init_nd->bottom_type()->is_int()->get_con();
 484     int init_offset = init * p.scale_in_bytes() + offset;

 485     assert(init_offset >= 0, "positive offset from object start");
 486     if (vw % span == 0) {
 487       // If vm is a multiple of span, we use formula (1).
 488       if (span > 0) {
 489         return (vw - (init_offset % vw)) % span == 0;
 490       } else {
 491         assert(span < 0, "nonzero stride * scale");
 492         return (init_offset % vw) % -span == 0;
 493       }
 494     } else if (span % vw == 0) {
 495       // If span is a multiple of vw, we can simplify formula (1) to:
 496       //   (init_offset + i * span) % vw == 0
 497       //     =>
 498       //   (init_offset % vw) + ((i * span) % vw) == 0
 499       //     =>
 500       //   init_offset % vw == 0
 501       //
 502       // Because we add a multiple of vw to the initial offset, the final
 503       // offset is a multiple of vw if and only if init_offset is a multiple.
 504       //
 505       return (init_offset % vw) == 0;
 506     }
 507   }
 508   return false;
 509 }
 510 
 511 //---------------------------get_iv_adjustment---------------------------
 512 // Calculate loop's iv adjustment for this memory ops.
 513 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
 514   SWPointer align_to_ref_p(mem_ref, this);
 515   int offset = align_to_ref_p.offset_in_bytes();
 516   int scale  = align_to_ref_p.scale_in_bytes();
 517   int elt_size = align_to_ref_p.memory_size();
 518   int vw       = vector_width_in_bytes(mem_ref);
 519   assert(vw > 1, "sanity");
 520   int iv_adjustment;
 521   if (scale != 0) {
 522     int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
 523     // At least one iteration is executed in pre-loop by default. As result
 524     // several iterations are needed to align memory operations in main-loop even
 525     // if offset is 0.
 526     int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));

 527     assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0),
 528            err_msg_res("(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size));
 529     iv_adjustment = iv_adjustment_in_bytes/elt_size;
 530   } else {
 531     // This memory op is not dependent on iv (scale == 0)
 532     iv_adjustment = 0;
 533   }
 534 
 535 #ifndef PRODUCT
 536   if (TraceSuperWord)
 537     tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
 538                   offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
 539 #endif
 540   return iv_adjustment;
 541 }
 542 
 543 //---------------------------dependence_graph---------------------------
 544 // Construct dependency graph.
 545 // Add dependence edges to load/store nodes for memory dependence
 546 //    A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
 547 void SuperWord::dependence_graph() {
 548   // First, assign a dependence node to each memory node
 549   for (int i = 0; i < _block.length(); i++ ) {
 550     Node *n = _block.at(i);
 551     if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) {
 552       _dg.make_node(n);
 553     }


2271   for (uint i = 0; i < depth; i++) blanks[i] = ' ';
2272   blanks[depth] = '\0';
2273   return blanks;
2274 }
2275 
2276 
2277 //==============================SWPointer===========================
2278 
2279 //----------------------------SWPointer------------------------
2280 SWPointer::SWPointer(MemNode* mem, SuperWord* slp) :
2281   _mem(mem), _slp(slp),  _base(NULL),  _adr(NULL),
2282   _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {
2283 
2284   Node* adr = mem->in(MemNode::Address);
2285   if (!adr->is_AddP()) {
2286     assert(!valid(), "too complex");
2287     return;
2288   }
2289   // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)
2290   Node* base = adr->in(AddPNode::Base);
2291   // The base address should be loop invariant
2292   if (!invariant(base)) {
2293     assert(!valid(), "base address is loop variant");
2294     return;
2295   }
2296   //unsafe reference could not be aligned appropriately without runtime checking
2297   if (base == NULL || base->bottom_type() == Type::TOP) {
2298     assert(!valid(), "unsafe access");
2299     return;
2300   }
2301   for (int i = 0; i < 3; i++) {
2302     if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) {
2303       assert(!valid(), "too complex");
2304       return;
2305     }
2306     adr = adr->in(AddPNode::Address);
2307     if (base == adr || !adr->is_AddP()) {
2308       break; // stop looking at addp's
2309     }
2310   }
2311   _base = base;
2312   _adr  = adr;
2313   assert(valid(), "Usable");
2314 }
2315 


< prev index next >