218 SWPointer p2(s, this); 219 if (p2.comparable(align_to_ref_p)) { 220 int align = memory_alignment(s, iv_adjustment); 221 set_alignment(s, align); 222 } 223 } 224 } 225 226 // Create initial pack pairs of memory operations for which 227 // alignment is set and vectors will be aligned. 228 bool create_pack = true; 229 if (memory_alignment(mem_ref, best_iv_adjustment) == 0) { 230 if (!Matcher::misaligned_vectors_ok()) { 231 int vw = vector_width(mem_ref); 232 int vw_best = vector_width(best_align_to_mem_ref); 233 if (vw > vw_best) { 234 // Do not vectorize a memory access with more elements per vector 235 // if unaligned memory access is not allowed because number of 236 // iterations in pre-loop will be not enough to align it. 237 create_pack = false; 238 } 239 } 240 } else { 241 if (same_velt_type(mem_ref, best_align_to_mem_ref)) { 242 // Can't allow vectorization of unaligned memory accesses with the 243 // same type since it could be overlapped accesses to the same array. 244 create_pack = false; 245 } else { 246 // Allow independent (different type) unaligned memory operations 247 // if HW supports them. 248 if (!Matcher::misaligned_vectors_ok()) { 249 create_pack = false; 250 } else { 251 // Check if packs of the same memory type but 252 // with a different alignment were created before. 253 for (uint i = 0; i < align_to_refs.size(); i++) { 254 MemNode* mr = align_to_refs.at(i)->as_Mem(); 255 if (same_velt_type(mr, mem_ref) && 256 memory_alignment(mr, iv_adjustment) != 0) 257 create_pack = false; 439 440 //------------------------------ref_is_alignable--------------------------- 441 // Can the preloop align the reference to position zero in the vector? 442 bool SuperWord::ref_is_alignable(SWPointer& p) { 443 if (!p.has_iv()) { 444 return true; // no induction variable 445 } 446 CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop()); 447 assert(pre_end != NULL, "we must have a correct pre-loop"); 448 assert(pre_end->stride_is_con(), "pre loop stride is constant"); 449 int preloop_stride = pre_end->stride_con(); 450 451 int span = preloop_stride * p.scale_in_bytes(); 452 int mem_size = p.memory_size(); 453 int offset = p.offset_in_bytes(); 454 // Stride one accesses are alignable if offset is aligned to memory operation size. 455 // Offset can be unaligned when UseUnalignedAccesses is used. 456 if (ABS(span) == mem_size && (ABS(offset) % mem_size) == 0) { 457 return true; 458 } 459 // If initial offset from start of object is computable, 460 // compute alignment within the vector. 461 int vw = vector_width_in_bytes(p.mem()); 462 assert(vw > 1, "sanity"); 463 if (vw % span == 0) { 464 Node* init_nd = pre_end->init_trip(); 465 if (init_nd->is_Con() && p.invar() == NULL) { 466 int init = init_nd->bottom_type()->is_int()->get_con(); 467 468 int init_offset = init * p.scale_in_bytes() + offset; 469 assert(init_offset >= 0, "positive offset from object start"); 470 471 if (span > 0) { 472 return (vw - (init_offset % vw)) % span == 0; 473 } else { 474 assert(span < 0, "nonzero stride * scale"); 475 return (init_offset % vw) % -span == 0; 476 } 477 } 478 } 479 return false; 480 } 481 482 //---------------------------get_iv_adjustment--------------------------- 483 // Calculate loop's iv adjustment for this memory ops. 484 int SuperWord::get_iv_adjustment(MemNode* mem_ref) { 485 SWPointer align_to_ref_p(mem_ref, this); 486 int offset = align_to_ref_p.offset_in_bytes(); 487 int scale = align_to_ref_p.scale_in_bytes(); 488 int vw = vector_width_in_bytes(mem_ref); 489 assert(vw > 1, "sanity"); 490 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; 491 // At least one iteration is executed in pre-loop by default. As result 492 // several iterations are needed to align memory operations in main-loop even 493 // if offset is 0. 494 int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw)); 495 int elt_size = align_to_ref_p.memory_size(); 496 assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0), 497 err_msg_res("(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size)); 498 int iv_adjustment = iv_adjustment_in_bytes/elt_size; 499 500 #ifndef PRODUCT 501 if (TraceSuperWord) 502 tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d", 503 offset, iv_adjustment, elt_size, scale, iv_stride(), vw); 504 #endif 505 return iv_adjustment; 506 } 507 508 //---------------------------dependence_graph--------------------------- 509 // Construct dependency graph. 510 // Add dependence edges to load/store nodes for memory dependence 511 // A.out()->DependNode.in(1) and DependNode.out()->B.prec(x) 512 void SuperWord::dependence_graph() { 513 // First, assign a dependence node to each memory node 514 for (int i = 0; i < _block.length(); i++ ) { 515 Node *n = _block.at(i); 516 if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) { 517 _dg.make_node(n); 518 } | 218 SWPointer p2(s, this); 219 if (p2.comparable(align_to_ref_p)) { 220 int align = memory_alignment(s, iv_adjustment); 221 set_alignment(s, align); 222 } 223 } 224 } 225 226 // Create initial pack pairs of memory operations for which 227 // alignment is set and vectors will be aligned. 228 bool create_pack = true; 229 if (memory_alignment(mem_ref, best_iv_adjustment) == 0) { 230 if (!Matcher::misaligned_vectors_ok()) { 231 int vw = vector_width(mem_ref); 232 int vw_best = vector_width(best_align_to_mem_ref); 233 if (vw > vw_best) { 234 // Do not vectorize a memory access with more elements per vector 235 // if unaligned memory access is not allowed because number of 236 // iterations in pre-loop will be not enough to align it. 237 create_pack = false; 238 } else { 239 SWPointer p2(best_align_to_mem_ref, this); 240 if (align_to_ref_p.invar() != p2.invar()) { 241 // Do not vectorize memory accesses with different invariants 242 // if unaligned memory accesses are not allowed. 243 create_pack = false; 244 } 245 } 246 } 247 } else { 248 if (same_velt_type(mem_ref, best_align_to_mem_ref)) { 249 // Can't allow vectorization of unaligned memory accesses with the 250 // same type since it could be overlapped accesses to the same array. 251 create_pack = false; 252 } else { 253 // Allow independent (different type) unaligned memory operations 254 // if HW supports them. 255 if (!Matcher::misaligned_vectors_ok()) { 256 create_pack = false; 257 } else { 258 // Check if packs of the same memory type but 259 // with a different alignment were created before. 260 for (uint i = 0; i < align_to_refs.size(); i++) { 261 MemNode* mr = align_to_refs.at(i)->as_Mem(); 262 if (same_velt_type(mr, mem_ref) && 263 memory_alignment(mr, iv_adjustment) != 0) 264 create_pack = false; 446 447 //------------------------------ref_is_alignable--------------------------- 448 // Can the preloop align the reference to position zero in the vector? 449 bool SuperWord::ref_is_alignable(SWPointer& p) { 450 if (!p.has_iv()) { 451 return true; // no induction variable 452 } 453 CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop()); 454 assert(pre_end != NULL, "we must have a correct pre-loop"); 455 assert(pre_end->stride_is_con(), "pre loop stride is constant"); 456 int preloop_stride = pre_end->stride_con(); 457 458 int span = preloop_stride * p.scale_in_bytes(); 459 int mem_size = p.memory_size(); 460 int offset = p.offset_in_bytes(); 461 // Stride one accesses are alignable if offset is aligned to memory operation size. 462 // Offset can be unaligned when UseUnalignedAccesses is used. 463 if (ABS(span) == mem_size && (ABS(offset) % mem_size) == 0) { 464 return true; 465 } 466 // If the initial offset from start of the object is computable, 467 // check if the pre-loop can align the final offset accordingly. 468 // 469 // In other words: Can we find an i such that the offset 470 // after i pre-loop iterations is aligned to vw? 471 // (init_offset + pre_loop) % vw == 0 (1) 472 // where 473 // pre_loop = i * span 474 // is the number of bytes added to the offset by i pre-loop iterations. 475 // 476 // For this to hold we need pre_loop to increase init_offset by 477 // pre_loop = vw - (init_offset % vw) 478 // 479 // This is only possible if pre_loop is divisible by span because each 480 // pre-loop iteration increases the initial offset by 'span' bytes: 481 // (vw - (init_offset % vw)) % span == 0 482 // 483 int vw = vector_width_in_bytes(p.mem()); 484 assert(vw > 1, "sanity"); 485 if (vw % span == 0) { 486 Node* init_nd = pre_end->init_trip(); 487 if (init_nd->is_Con() && p.invar() == NULL) { 488 int init = init_nd->bottom_type()->is_int()->get_con(); 489 490 int init_offset = init * p.scale_in_bytes() + offset; 491 assert(init_offset >= 0, "positive offset from object start"); 492 493 if (span > 0) { 494 return (vw - (init_offset % vw)) % span == 0; 495 } else { 496 assert(span < 0, "nonzero stride * scale"); 497 return (init_offset % vw) % -span == 0; 498 } 499 } 500 } else if (span % vw == 0) { 501 // If span is a multiple of vw, we can simplify formula (1) to: 502 // (init_offset + i * span) % vw == 0 503 // => 504 // (init_offset % vw) + ((i * span) % vw) == 0 505 // => 506 // init_offset % vw == 0 507 // 508 // Because we add a multiple of vw to the initial offset, the 509 // final offset is a multiple of vw iff init_offset is a multiple. 510 // 511 Node* init_nd = pre_end->init_trip(); 512 if (init_nd->is_Con() && p.invar() == NULL) { 513 int init = init_nd->bottom_type()->is_int()->get_con(); 514 int init_offset = init * p.scale_in_bytes() + offset; 515 assert(init_offset >= 0, "positive offset from object start"); 516 return (init_offset % vw) == 0; 517 } 518 } 519 return false; 520 } 521 522 //---------------------------get_iv_adjustment--------------------------- 523 // Calculate loop's iv adjustment for this memory ops. 524 int SuperWord::get_iv_adjustment(MemNode* mem_ref) { 525 SWPointer align_to_ref_p(mem_ref, this); 526 int offset = align_to_ref_p.offset_in_bytes(); 527 int scale = align_to_ref_p.scale_in_bytes(); 528 int elt_size = align_to_ref_p.memory_size(); 529 int vw = vector_width_in_bytes(mem_ref); 530 assert(vw > 1, "sanity"); 531 int iv_adjustment; 532 if (scale != 0) { 533 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; 534 // At least one iteration is executed in pre-loop by default. As result 535 // several iterations are needed to align memory operations in main-loop even 536 // if offset is 0. 537 int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw)); 538 assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0), 539 err_msg_res("(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size)); 540 iv_adjustment = iv_adjustment_in_bytes/elt_size; 541 } else { 542 // This memory op is not dependent on iv (scale == 0) 543 iv_adjustment = 0; 544 } 545 546 #ifndef PRODUCT 547 if (TraceSuperWord) 548 tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d", 549 offset, iv_adjustment, elt_size, scale, iv_stride(), vw); 550 #endif 551 return iv_adjustment; 552 } 553 554 //---------------------------dependence_graph--------------------------- 555 // Construct dependency graph. 556 // Add dependence edges to load/store nodes for memory dependence 557 // A.out()->DependNode.in(1) and DependNode.out()->B.prec(x) 558 void SuperWord::dependence_graph() { 559 // First, assign a dependence node to each memory node 560 for (int i = 0; i < _block.length(); i++ ) { 561 Node *n = _block.at(i); 562 if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) { 563 _dg.make_node(n); 564 } |