< prev index next >

src/share/vm/opto/superword.cpp

Print this page




 218         SWPointer p2(s, this);
 219         if (p2.comparable(align_to_ref_p)) {
 220           int align = memory_alignment(s, iv_adjustment);
 221           set_alignment(s, align);
 222         }
 223       }
 224     }
 225 
 226     // Create initial pack pairs of memory operations for which
 227     // alignment is set and vectors will be aligned.
 228     bool create_pack = true;
 229     if (memory_alignment(mem_ref, best_iv_adjustment) == 0) {
 230       if (!Matcher::misaligned_vectors_ok()) {
 231         int vw = vector_width(mem_ref);
 232         int vw_best = vector_width(best_align_to_mem_ref);
 233         if (vw > vw_best) {
 234           // Do not vectorize a memory access with more elements per vector
 235           // if unaligned memory access is not allowed because number of
 236           // iterations in pre-loop will be not enough to align it.
 237           create_pack = false;







 238         }
 239       }
 240     } else {
 241       if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
 242         // Can't allow vectorization of unaligned memory accesses with the
 243         // same type since it could be overlapped accesses to the same array.
 244         create_pack = false;
 245       } else {
 246         // Allow independent (different type) unaligned memory operations
 247         // if HW supports them.
 248         if (!Matcher::misaligned_vectors_ok()) {
 249           create_pack = false;
 250         } else {
 251           // Check if packs of the same memory type but
 252           // with a different alignment were created before.
 253           for (uint i = 0; i < align_to_refs.size(); i++) {
 254             MemNode* mr = align_to_refs.at(i)->as_Mem();
 255             if (same_velt_type(mr, mem_ref) &&
 256                 memory_alignment(mr, iv_adjustment) != 0)
 257               create_pack = false;


 439 
 440 //------------------------------ref_is_alignable---------------------------
 441 // Can the preloop align the reference to position zero in the vector?
 442 bool SuperWord::ref_is_alignable(SWPointer& p) {
 443   if (!p.has_iv()) {
 444     return true;   // no induction variable
 445   }
 446   CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop());
 447   assert(pre_end != NULL, "we must have a correct pre-loop");
 448   assert(pre_end->stride_is_con(), "pre loop stride is constant");
 449   int preloop_stride = pre_end->stride_con();
 450 
 451   int span = preloop_stride * p.scale_in_bytes();
 452   int mem_size = p.memory_size();
 453   int offset   = p.offset_in_bytes();
 454   // Stride one accesses are alignable if offset is aligned to memory operation size.
 455   // Offset can be unaligned when UseUnalignedAccesses is used.
 456   if (ABS(span) == mem_size && (ABS(offset) % mem_size) == 0) {
 457     return true;
 458   }
 459   // If initial offset from start of object is computable,
 460   // compute alignment within the vector.















 461   int vw = vector_width_in_bytes(p.mem());
 462   assert(vw > 1, "sanity");
 463   if (vw % span == 0) {
 464     Node* init_nd = pre_end->init_trip();
 465     if (init_nd->is_Con() && p.invar() == NULL) {
 466       int init = init_nd->bottom_type()->is_int()->get_con();
 467 
 468       int init_offset = init * p.scale_in_bytes() + offset;
 469       assert(init_offset >= 0, "positive offset from object start");
 470 
 471       if (span > 0) {
 472         return (vw - (init_offset % vw)) % span == 0;
 473       } else {
 474         assert(span < 0, "nonzero stride * scale");
 475         return (init_offset % vw) % -span == 0;
 476       }
 477     }


















 478   }
 479   return false;
 480 }
 481 
 482 //---------------------------get_iv_adjustment---------------------------
 483 // Calculate loop's iv adjustment for this memory ops.
 484 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
 485   SWPointer align_to_ref_p(mem_ref, this);
 486   int offset = align_to_ref_p.offset_in_bytes();
 487   int scale  = align_to_ref_p.scale_in_bytes();

 488   int vw       = vector_width_in_bytes(mem_ref);
 489   assert(vw > 1, "sanity");


 490   int stride_sign   = (scale * iv_stride()) > 0 ? 1 : -1;
 491   // At least one iteration is executed in pre-loop by default. As result
 492   // several iterations are needed to align memory operations in main-loop even
 493   // if offset is 0.
 494   int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));
 495   int elt_size = align_to_ref_p.memory_size();
 496   assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0),
 497          err_msg_res("(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size));
 498   int iv_adjustment = iv_adjustment_in_bytes/elt_size;




 499 
 500 #ifndef PRODUCT
 501   if (TraceSuperWord)
 502     tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
 503                   offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
 504 #endif
 505   return iv_adjustment;
 506 }
 507 
 508 //---------------------------dependence_graph---------------------------
 509 // Construct dependency graph.
 510 // Add dependence edges to load/store nodes for memory dependence
 511 //    A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
 512 void SuperWord::dependence_graph() {
 513   // First, assign a dependence node to each memory node
 514   for (int i = 0; i < _block.length(); i++ ) {
 515     Node *n = _block.at(i);
 516     if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) {
 517       _dg.make_node(n);
 518     }




 218         SWPointer p2(s, this);
 219         if (p2.comparable(align_to_ref_p)) {
 220           int align = memory_alignment(s, iv_adjustment);
 221           set_alignment(s, align);
 222         }
 223       }
 224     }
 225 
 226     // Create initial pack pairs of memory operations for which
 227     // alignment is set and vectors will be aligned.
 228     bool create_pack = true;
 229     if (memory_alignment(mem_ref, best_iv_adjustment) == 0) {
 230       if (!Matcher::misaligned_vectors_ok()) {
 231         int vw = vector_width(mem_ref);
 232         int vw_best = vector_width(best_align_to_mem_ref);
 233         if (vw > vw_best) {
 234           // Do not vectorize a memory access with more elements per vector
 235           // if unaligned memory access is not allowed because number of
 236           // iterations in pre-loop will be not enough to align it.
 237           create_pack = false;
 238         } else {
 239           SWPointer p2(best_align_to_mem_ref, this);
 240           if (align_to_ref_p.invar() != p2.invar()) {
 241             // Do not vectorize memory accesses with different invariants
 242             // if unaligned memory accesses are not allowed.
 243             create_pack = false;
 244           }
 245         }
 246       }
 247     } else {
 248       if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
 249         // Can't allow vectorization of unaligned memory accesses with the
 250         // same type since it could be overlapped accesses to the same array.
 251         create_pack = false;
 252       } else {
 253         // Allow independent (different type) unaligned memory operations
 254         // if HW supports them.
 255         if (!Matcher::misaligned_vectors_ok()) {
 256           create_pack = false;
 257         } else {
 258           // Check if packs of the same memory type but
 259           // with a different alignment were created before.
 260           for (uint i = 0; i < align_to_refs.size(); i++) {
 261             MemNode* mr = align_to_refs.at(i)->as_Mem();
 262             if (same_velt_type(mr, mem_ref) &&
 263                 memory_alignment(mr, iv_adjustment) != 0)
 264               create_pack = false;


 446 
 447 //------------------------------ref_is_alignable---------------------------
 448 // Can the preloop align the reference to position zero in the vector?
 449 bool SuperWord::ref_is_alignable(SWPointer& p) {
 450   if (!p.has_iv()) {
 451     return true;   // no induction variable
 452   }
 453   CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop());
 454   assert(pre_end != NULL, "we must have a correct pre-loop");
 455   assert(pre_end->stride_is_con(), "pre loop stride is constant");
 456   int preloop_stride = pre_end->stride_con();
 457 
 458   int span = preloop_stride * p.scale_in_bytes();
 459   int mem_size = p.memory_size();
 460   int offset   = p.offset_in_bytes();
 461   // Stride one accesses are alignable if offset is aligned to memory operation size.
 462   // Offset can be unaligned when UseUnalignedAccesses is used.
 463   if (ABS(span) == mem_size && (ABS(offset) % mem_size) == 0) {
 464     return true;
 465   }
 466   // If the initial offset from start of the object is computable,
 467   // check if the pre-loop can align the final offset accordingly.
 468   //
 469   // In other words: Can we find an i such that the offset
 470   // after i pre-loop iterations is aligned to vw?
 471   //   (init_offset + pre_loop) % vw == 0              (1)
 472   // where
 473   //   pre_loop = i * span
 474   // is the number of bytes added to the offset by i pre-loop iterations.
 475   //
 476   // For this to hold we need pre_loop to increase init_offset by
 477   //   pre_loop = vw - (init_offset % vw)
 478   //
 479   // This is only possible if pre_loop is divisible by span because each
 480   // pre-loop iteration increases the initial offset by 'span' bytes:
 481   //   (vw - (init_offset % vw)) % span == 0
 482   //
 483   int vw = vector_width_in_bytes(p.mem());
 484   assert(vw > 1, "sanity");
 485   if (vw % span == 0) {
 486     Node* init_nd = pre_end->init_trip();
 487     if (init_nd->is_Con() && p.invar() == NULL) {
 488       int init = init_nd->bottom_type()->is_int()->get_con();
 489 
 490       int init_offset = init * p.scale_in_bytes() + offset;
 491       assert(init_offset >= 0, "positive offset from object start");
 492 
 493       if (span > 0) {
 494         return (vw - (init_offset % vw)) % span == 0;
 495       } else {
 496         assert(span < 0, "nonzero stride * scale");
 497         return (init_offset % vw) % -span == 0;
 498       }
 499     }
 500   } else if (span % vw == 0) {
 501     // If span is a multiple of vw, we can simplify formula (1) to:
 502     //   (init_offset + i * span) % vw == 0
 503     //     =>
 504     //   (init_offset % vw) + ((i * span) % vw) == 0
 505     //     =>
 506     //   init_offset % vw == 0
 507     //
 508     // Because we add a multiple of vw to the initial offset, the
 509     // final offset is a multiple of vw iff init_offset is a multiple.
 510     //
 511     Node* init_nd = pre_end->init_trip();
 512     if (init_nd->is_Con() && p.invar() == NULL) {
 513       int init = init_nd->bottom_type()->is_int()->get_con();
 514       int init_offset = init * p.scale_in_bytes() + offset;
 515       assert(init_offset >= 0, "positive offset from object start");
 516       return (init_offset % vw) == 0;
 517     }
 518   }
 519   return false;
 520 }
 521 
 522 //---------------------------get_iv_adjustment---------------------------
 523 // Calculate loop's iv adjustment for this memory ops.
 524 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
 525   SWPointer align_to_ref_p(mem_ref, this);
 526   int offset = align_to_ref_p.offset_in_bytes();
 527   int scale  = align_to_ref_p.scale_in_bytes();
 528   int elt_size = align_to_ref_p.memory_size();
 529   int vw       = vector_width_in_bytes(mem_ref);
 530   assert(vw > 1, "sanity");
 531   int iv_adjustment;
 532   if (scale != 0) {
 533     int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
 534     // At least one iteration is executed in pre-loop by default. As result
 535     // several iterations are needed to align memory operations in main-loop even
 536     // if offset is 0.
 537     int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));

 538     assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0),
 539            err_msg_res("(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size));
 540     iv_adjustment = iv_adjustment_in_bytes/elt_size;
 541   } else {
 542     // This memory op is not dependent on iv (scale == 0)
 543     iv_adjustment = 0;
 544   }
 545 
 546 #ifndef PRODUCT
 547   if (TraceSuperWord)
 548     tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
 549                   offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
 550 #endif
 551   return iv_adjustment;
 552 }
 553 
 554 //---------------------------dependence_graph---------------------------
 555 // Construct dependency graph.
 556 // Add dependence edges to load/store nodes for memory dependence
 557 //    A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
 558 void SuperWord::dependence_graph() {
 559   // First, assign a dependence node to each memory node
 560   for (int i = 0; i < _block.length(); i++ ) {
 561     Node *n = _block.at(i);
 562     if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) {
 563       _dg.make_node(n);
 564     }


< prev index next >