215 SWPointer p2(s, this);
216 if (p2.comparable(align_to_ref_p)) {
217 int align = memory_alignment(s, iv_adjustment);
218 set_alignment(s, align);
219 }
220 }
221 }
222
223 // Create initial pack pairs of memory operations for which
224 // alignment is set and vectors will be aligned.
225 bool create_pack = true;
226 if (memory_alignment(mem_ref, best_iv_adjustment) == 0) {
227 if (!Matcher::misaligned_vectors_ok()) {
228 int vw = vector_width(mem_ref);
229 int vw_best = vector_width(best_align_to_mem_ref);
230 if (vw > vw_best) {
231 // Do not vectorize a memory access with more elements per vector
232 // if unaligned memory access is not allowed because number of
233 // iterations in pre-loop will be not enough to align it.
234 create_pack = false;
235 }
236 }
237 } else {
238 if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
239 // Can't allow vectorization of unaligned memory accesses with the
240 // same type since it could be overlapped accesses to the same array.
241 create_pack = false;
242 } else {
243 // Allow independent (different type) unaligned memory operations
244 // if HW supports them.
245 if (!Matcher::misaligned_vectors_ok()) {
246 create_pack = false;
247 } else {
248 // Check if packs of the same memory type but
249 // with a different alignment were created before.
250 for (uint i = 0; i < align_to_refs.size(); i++) {
251 MemNode* mr = align_to_refs.at(i)->as_Mem();
252 if (same_velt_type(mr, mem_ref) &&
253 memory_alignment(mr, iv_adjustment) != 0)
254 create_pack = false;
428 tty->print("\nVector align to node: ");
429 memops.at(max_idx)->as_Mem()->dump();
430 }
431 #endif
432 return memops.at(max_idx)->as_Mem();
433 }
434 return NULL;
435 }
436
437 //------------------------------ref_is_alignable---------------------------
438 // Can the preloop align the reference to position zero in the vector?
439 bool SuperWord::ref_is_alignable(SWPointer& p) {
440 if (!p.has_iv()) {
441 return true; // no induction variable
442 }
443 CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop());
444 assert(pre_end->stride_is_con(), "pre loop stride is constant");
445 int preloop_stride = pre_end->stride_con();
446
447 int span = preloop_stride * p.scale_in_bytes();
448
449 // Stride one accesses are alignable.
450 if (ABS(span) == p.memory_size())
451 return true;
452
453 // If initial offset from start of object is computable,
454 // compute alignment within the vector.
455 int vw = vector_width_in_bytes(p.mem());
456 assert(vw > 1, "sanity");
457 if (vw % span == 0) {
458 Node* init_nd = pre_end->init_trip();
459 if (init_nd->is_Con() && p.invar() == NULL) {
460 int init = init_nd->bottom_type()->is_int()->get_con();
461
462 int init_offset = init * p.scale_in_bytes() + p.offset_in_bytes();
463 assert(init_offset >= 0, "positive offset from object start");
464
465 if (span > 0) {
466 return (vw - (init_offset % vw)) % span == 0;
467 } else {
468 assert(span < 0, "nonzero stride * scale");
469 return (init_offset % vw) % -span == 0;
470 }
471 }
472 }
473 return false;
474 }
475
476 //---------------------------get_iv_adjustment---------------------------
477 // Calculate loop's iv adjustment for this memory ops.
478 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
479 SWPointer align_to_ref_p(mem_ref, this);
480 int offset = align_to_ref_p.offset_in_bytes();
481 int scale = align_to_ref_p.scale_in_bytes();
482 int vw = vector_width_in_bytes(mem_ref);
483 assert(vw > 1, "sanity");
484 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
485 // At least one iteration is executed in pre-loop by default. As result
486 // several iterations are needed to align memory operations in main-loop even
487 // if offset is 0.
488 int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));
489 int elt_size = align_to_ref_p.memory_size();
490 assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0),
491 err_msg_res("(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size));
492 int iv_adjustment = iv_adjustment_in_bytes/elt_size;
493
494 #ifndef PRODUCT
495 if (TraceSuperWord)
496 tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
497 offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
498 #endif
499 return iv_adjustment;
500 }
501
502 //---------------------------dependence_graph---------------------------
503 // Construct dependency graph.
504 // Add dependence edges to load/store nodes for memory dependence
505 // A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
506 void SuperWord::dependence_graph() {
507 // First, assign a dependence node to each memory node
508 for (int i = 0; i < _block.length(); i++ ) {
509 Node *n = _block.at(i);
510 if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) {
511 _dg.make_node(n);
512 }
2230 for (uint i = 0; i < depth; i++) blanks[i] = ' ';
2231 blanks[depth] = '\0';
2232 return blanks;
2233 }
2234
2235
2236 //==============================SWPointer===========================
2237
2238 //----------------------------SWPointer------------------------
2239 SWPointer::SWPointer(MemNode* mem, SuperWord* slp) :
2240 _mem(mem), _slp(slp), _base(NULL), _adr(NULL),
2241 _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {
2242
2243 Node* adr = mem->in(MemNode::Address);
2244 if (!adr->is_AddP()) {
2245 assert(!valid(), "too complex");
2246 return;
2247 }
2248 // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)
2249 Node* base = adr->in(AddPNode::Base);
2250 //unsafe reference could not be aligned appropriately without runtime checking
2251 if (base == NULL || base->bottom_type() == Type::TOP) {
2252 assert(!valid(), "unsafe access");
2253 return;
2254 }
2255 for (int i = 0; i < 3; i++) {
2256 if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) {
2257 assert(!valid(), "too complex");
2258 return;
2259 }
2260 adr = adr->in(AddPNode::Address);
2261 if (base == adr || !adr->is_AddP()) {
2262 break; // stop looking at addp's
2263 }
2264 }
2265 _base = base;
2266 _adr = adr;
2267 assert(valid(), "Usable");
2268 }
2269
|
215 SWPointer p2(s, this);
216 if (p2.comparable(align_to_ref_p)) {
217 int align = memory_alignment(s, iv_adjustment);
218 set_alignment(s, align);
219 }
220 }
221 }
222
223 // Create initial pack pairs of memory operations for which
224 // alignment is set and vectors will be aligned.
225 bool create_pack = true;
226 if (memory_alignment(mem_ref, best_iv_adjustment) == 0) {
227 if (!Matcher::misaligned_vectors_ok()) {
228 int vw = vector_width(mem_ref);
229 int vw_best = vector_width(best_align_to_mem_ref);
230 if (vw > vw_best) {
231 // Do not vectorize a memory access with more elements per vector
232 // if unaligned memory access is not allowed because number of
233 // iterations in pre-loop will be not enough to align it.
234 create_pack = false;
235 } else {
236 SWPointer p2(best_align_to_mem_ref, this);
237 if (align_to_ref_p.invar() != p2.invar()) {
238 // Do not vectorize memory accesses with different invariants
239 // if unaligned memory accesses are not allowed.
240 create_pack = false;
241 }
242 }
243 }
244 } else {
245 if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
246 // Can't allow vectorization of unaligned memory accesses with the
247 // same type since it could be overlapped accesses to the same array.
248 create_pack = false;
249 } else {
250 // Allow independent (different type) unaligned memory operations
251 // if HW supports them.
252 if (!Matcher::misaligned_vectors_ok()) {
253 create_pack = false;
254 } else {
255 // Check if packs of the same memory type but
256 // with a different alignment were created before.
257 for (uint i = 0; i < align_to_refs.size(); i++) {
258 MemNode* mr = align_to_refs.at(i)->as_Mem();
259 if (same_velt_type(mr, mem_ref) &&
260 memory_alignment(mr, iv_adjustment) != 0)
261 create_pack = false;
435 tty->print("\nVector align to node: ");
436 memops.at(max_idx)->as_Mem()->dump();
437 }
438 #endif
439 return memops.at(max_idx)->as_Mem();
440 }
441 return NULL;
442 }
443
444 //------------------------------ref_is_alignable---------------------------
445 // Can the preloop align the reference to position zero in the vector?
446 bool SuperWord::ref_is_alignable(SWPointer& p) {
447 if (!p.has_iv()) {
448 return true; // no induction variable
449 }
450 CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop());
451 assert(pre_end->stride_is_con(), "pre loop stride is constant");
452 int preloop_stride = pre_end->stride_con();
453
454 int span = preloop_stride * p.scale_in_bytes();
455 int mem_size = p.memory_size();
456 int offset = p.offset_in_bytes();
457 // Stride one accesses are alignable if offset is aligned to memory operation size.
458 // Offset can be unaligned when UseUnalignedAccesses is used.
459 if (ABS(span) == mem_size && (ABS(offset) % mem_size) == 0) {
460 return true;
461 }
462 // If the initial offset from start of the object is computable,
463 // check if the pre-loop can align the final offset accordingly.
464 //
465 // In other words: Can we find an i such that the offset
466 // after i pre-loop iterations is aligned to vw?
467 // (init_offset + pre_loop) % vw == 0 (1)
468 // where
469 // pre_loop = i * span
470 // is the number of bytes added to the offset by i pre-loop iterations.
471 //
472 // For this to hold we need pre_loop to increase init_offset by
473 // pre_loop = vw - (init_offset % vw)
474 //
475 // This is only possible if pre_loop is divisible by span because each
476 // pre-loop iteration increases the initial offset by 'span' bytes:
477 // (vw - (init_offset % vw)) % span == 0
478 //
479 int vw = vector_width_in_bytes(p.mem());
480 assert(vw > 1, "sanity");
481 Node* init_nd = pre_end->init_trip();
482 if (init_nd->is_Con() && p.invar() == NULL) {
483 int init = init_nd->bottom_type()->is_int()->get_con();
484 int init_offset = init * p.scale_in_bytes() + offset;
485 assert(init_offset >= 0, "positive offset from object start");
486 if (vw % span == 0) {
487 // If vm is a multiple of span, we use formula (1).
488 if (span > 0) {
489 return (vw - (init_offset % vw)) % span == 0;
490 } else {
491 assert(span < 0, "nonzero stride * scale");
492 return (init_offset % vw) % -span == 0;
493 }
494 } else if (span % vw == 0) {
495 // If span is a multiple of vw, we can simplify formula (1) to:
496 // (init_offset + i * span) % vw == 0
497 // =>
498 // (init_offset % vw) + ((i * span) % vw) == 0
499 // =>
500 // init_offset % vw == 0
501 //
502 // Because we add a multiple of vw to the initial offset, the final
503 // offset is a multiple of vw if and only if init_offset is a multiple.
504 //
505 return (init_offset % vw) == 0;
506 }
507 }
508 return false;
509 }
510
511 //---------------------------get_iv_adjustment---------------------------
512 // Calculate loop's iv adjustment for this memory ops.
513 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
514 SWPointer align_to_ref_p(mem_ref, this);
515 int offset = align_to_ref_p.offset_in_bytes();
516 int scale = align_to_ref_p.scale_in_bytes();
517 int elt_size = align_to_ref_p.memory_size();
518 int vw = vector_width_in_bytes(mem_ref);
519 assert(vw > 1, "sanity");
520 int iv_adjustment;
521 if (scale != 0) {
522 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
523 // At least one iteration is executed in pre-loop by default. As result
524 // several iterations are needed to align memory operations in main-loop even
525 // if offset is 0.
526 int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));
527 assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0),
528 err_msg_res("(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size));
529 iv_adjustment = iv_adjustment_in_bytes/elt_size;
530 } else {
531 // This memory op is not dependent on iv (scale == 0)
532 iv_adjustment = 0;
533 }
534
535 #ifndef PRODUCT
536 if (TraceSuperWord)
537 tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
538 offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
539 #endif
540 return iv_adjustment;
541 }
542
543 //---------------------------dependence_graph---------------------------
544 // Construct dependency graph.
545 // Add dependence edges to load/store nodes for memory dependence
546 // A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
547 void SuperWord::dependence_graph() {
548 // First, assign a dependence node to each memory node
549 for (int i = 0; i < _block.length(); i++ ) {
550 Node *n = _block.at(i);
551 if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) {
552 _dg.make_node(n);
553 }
2271 for (uint i = 0; i < depth; i++) blanks[i] = ' ';
2272 blanks[depth] = '\0';
2273 return blanks;
2274 }
2275
2276
2277 //==============================SWPointer===========================
2278
2279 //----------------------------SWPointer------------------------
2280 SWPointer::SWPointer(MemNode* mem, SuperWord* slp) :
2281 _mem(mem), _slp(slp), _base(NULL), _adr(NULL),
2282 _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {
2283
2284 Node* adr = mem->in(MemNode::Address);
2285 if (!adr->is_AddP()) {
2286 assert(!valid(), "too complex");
2287 return;
2288 }
2289 // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)
2290 Node* base = adr->in(AddPNode::Base);
2291 // The base address should be loop invariant
2292 if (!invariant(base)) {
2293 assert(!valid(), "base address is loop variant");
2294 return;
2295 }
2296 //unsafe reference could not be aligned appropriately without runtime checking
2297 if (base == NULL || base->bottom_type() == Type::TOP) {
2298 assert(!valid(), "unsafe access");
2299 return;
2300 }
2301 for (int i = 0; i < 3; i++) {
2302 if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) {
2303 assert(!valid(), "too complex");
2304 return;
2305 }
2306 adr = adr->in(AddPNode::Address);
2307 if (base == adr || !adr->is_AddP()) {
2308 break; // stop looking at addp's
2309 }
2310 }
2311 _base = base;
2312 _adr = adr;
2313 assert(valid(), "Usable");
2314 }
2315
|