596 } 597 598 SWPointer align_to_ref_p(mem_ref, this, NULL, false); 599 // Set alignment relative to "align_to_ref" for all related memory operations. 600 for (int i = memops.size() - 1; i >= 0; i--) { 601 MemNode* s = memops.at(i)->as_Mem(); 602 if (isomorphic(s, mem_ref) && 603 (!_do_vector_loop || same_origin_idx(s, mem_ref))) { 604 SWPointer p2(s, this, NULL, false); 605 if (p2.comparable(align_to_ref_p)) { 606 int align = memory_alignment(s, iv_adjustment); 607 set_alignment(s, align); 608 } 609 } 610 } 611 612 // Create initial pack pairs of memory operations for which 613 // alignment is set and vectors will be aligned. 614 bool create_pack = true; 615 if (memory_alignment(mem_ref, best_iv_adjustment) == 0 || _do_vector_loop) { 616 if (!Matcher::misaligned_vectors_ok()) { 617 int vw = vector_width(mem_ref); 618 int vw_best = vector_width(best_align_to_mem_ref); 619 if (vw > vw_best) { 620 // Do not vectorize a memory access with more elements per vector 621 // if unaligned memory access is not allowed because number of 622 // iterations in pre-loop will be not enough to align it. 623 create_pack = false; 624 } else { 625 SWPointer p2(best_align_to_mem_ref, this, NULL, false); 626 if (align_to_ref_p.invar() != p2.invar()) { 627 // Do not vectorize memory accesses with different invariants 628 // if unaligned memory accesses are not allowed. 629 create_pack = false; 630 } 631 } 632 } 633 } else { 634 if (same_velt_type(mem_ref, best_align_to_mem_ref)) { 635 // Can't allow vectorization of unaligned memory accesses with the 636 // same type since it could be overlapped accesses to the same array. 637 create_pack = false; 638 } else { 639 // Allow independent (different type) unaligned memory operations 640 // if HW supports them. 641 if (!Matcher::misaligned_vectors_ok()) { 642 create_pack = false; 643 } else { 644 // Check if packs of the same memory type but 645 // with a different alignment were created before. 646 for (uint i = 0; i < align_to_refs.size(); i++) { 647 MemNode* mr = align_to_refs.at(i)->as_Mem(); 648 if (mr == mem_ref) { 649 // Skip when we are looking at same memory operation. 650 continue; 651 } 652 if (same_velt_type(mr, mem_ref) && 653 memory_alignment(mr, iv_adjustment) != 0) 654 create_pack = false; 655 } 656 } 657 } 658 } 659 if (create_pack) { 660 for (uint i = 0; i < memops.size(); i++) { 661 Node* s1 = memops.at(i); 3341 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); 3342 Node* invar = align_to_ref_p.invar(); 3343 if (_igvn.type(invar)->isa_long()) { 3344 // Computations are done % (vector width/element size) so it's 3345 // safe to simply convert invar to an int and loose the upper 32 3346 // bit half. 3347 invar = new ConvL2INode(invar); 3348 _igvn.register_new_node_with_optimizer(invar); 3349 } 3350 Node* aref = new URShiftINode(invar, log2_elt); 3351 _igvn.register_new_node_with_optimizer(aref); 3352 _phase->set_ctrl(aref, pre_ctrl); 3353 if (align_to_ref_p.negate_invar()) { 3354 e = new SubINode(e, aref); 3355 } else { 3356 e = new AddINode(e, aref); 3357 } 3358 _igvn.register_new_node_with_optimizer(e); 3359 _phase->set_ctrl(e, pre_ctrl); 3360 } 3361 if (vw > ObjectAlignmentInBytes) { 3362 // incorporate base e +/- base && Mask >>> log2(elt) 3363 Node* xbase = new CastP2XNode(NULL, align_to_ref_p.base()); 3364 _igvn.register_new_node_with_optimizer(xbase); 3365 #ifdef _LP64 3366 xbase = new ConvL2INode(xbase); 3367 _igvn.register_new_node_with_optimizer(xbase); 3368 #endif 3369 Node* mask = _igvn.intcon(vw-1); 3370 Node* masked_xbase = new AndINode(xbase, mask); 3371 _igvn.register_new_node_with_optimizer(masked_xbase); 3372 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); 3373 Node* bref = new URShiftINode(masked_xbase, log2_elt); 3374 _igvn.register_new_node_with_optimizer(bref); 3375 _phase->set_ctrl(bref, pre_ctrl); 3376 e = new AddINode(e, bref); 3377 _igvn.register_new_node_with_optimizer(e); 3378 _phase->set_ctrl(e, pre_ctrl); 3379 } 3380 3381 // compute e +/- lim0 3382 if (scale < 0) { 3383 e = new SubINode(e, lim0); 3535 _nstack(nstack), _analyze_only(analyze_only), 3536 _stack_idx(0) 3537 #ifndef PRODUCT 3538 , _tracer(slp) 3539 #endif 3540 { 3541 NOT_PRODUCT(_tracer.ctor_1(mem);) 3542 3543 Node* adr = mem->in(MemNode::Address); 3544 if (!adr->is_AddP()) { 3545 assert(!valid(), "too complex"); 3546 return; 3547 } 3548 // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant) 3549 Node* base = adr->in(AddPNode::Base); 3550 // The base address should be loop invariant 3551 if (!invariant(base)) { 3552 assert(!valid(), "base address is loop variant"); 3553 return; 3554 } 3555 //unsafe reference could not be aligned appropriately without runtime checking 3556 if (base == NULL || base->bottom_type() == Type::TOP) { 3557 assert(!valid(), "unsafe access"); 3558 return; 3559 } 3560 3561 NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.store_depth();) 3562 NOT_PRODUCT(_tracer.ctor_2(adr);) 3563 3564 int i; 3565 for (i = 0; i < 3; i++) { 3566 NOT_PRODUCT(_tracer.ctor_3(adr, i);) 3567 3568 if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) { 3569 assert(!valid(), "too complex"); 3570 return; 3571 } 3572 adr = adr->in(AddPNode::Address); 3573 NOT_PRODUCT(_tracer.ctor_4(adr, i);) 3574 3575 if (base == adr || !adr->is_AddP()) { 3576 NOT_PRODUCT(_tracer.ctor_5(adr, base, i);) 3577 break; // stop looking at addp's 3578 } 3579 } 3580 NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.restore_depth();) 3581 NOT_PRODUCT(_tracer.ctor_6(mem);) 3582 3583 _base = base; 3584 _adr = adr; 3585 assert(valid(), "Usable"); 3586 } 3587 3588 // Following is used to create a temporary object during 3589 // the pattern match of an address expression. 3590 SWPointer::SWPointer(SWPointer* p) : 3591 _mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL), 3592 _scale(0), _offset(0), _invar(NULL), _negate_invar(false), 3593 _nstack(p->_nstack), _analyze_only(p->_analyze_only), 3594 _stack_idx(p->_stack_idx) 3595 #ifndef PRODUCT 3596 , _tracer(p->_slp) 3597 #endif 3598 {} 3599 | 596 } 597 598 SWPointer align_to_ref_p(mem_ref, this, NULL, false); 599 // Set alignment relative to "align_to_ref" for all related memory operations. 600 for (int i = memops.size() - 1; i >= 0; i--) { 601 MemNode* s = memops.at(i)->as_Mem(); 602 if (isomorphic(s, mem_ref) && 603 (!_do_vector_loop || same_origin_idx(s, mem_ref))) { 604 SWPointer p2(s, this, NULL, false); 605 if (p2.comparable(align_to_ref_p)) { 606 int align = memory_alignment(s, iv_adjustment); 607 set_alignment(s, align); 608 } 609 } 610 } 611 612 // Create initial pack pairs of memory operations for which 613 // alignment is set and vectors will be aligned. 614 bool create_pack = true; 615 if (memory_alignment(mem_ref, best_iv_adjustment) == 0 || _do_vector_loop) { 616 if (!Matcher::misaligned_vectors_ok() || AlignVector) { 617 int vw = vector_width(mem_ref); 618 int vw_best = vector_width(best_align_to_mem_ref); 619 if (vw > vw_best) { 620 // Do not vectorize a memory access with more elements per vector 621 // if unaligned memory access is not allowed because number of 622 // iterations in pre-loop will be not enough to align it. 623 create_pack = false; 624 } else { 625 SWPointer p2(best_align_to_mem_ref, this, NULL, false); 626 if (align_to_ref_p.invar() != p2.invar()) { 627 // Do not vectorize memory accesses with different invariants 628 // if unaligned memory accesses are not allowed. 629 create_pack = false; 630 } 631 } 632 } 633 } else { 634 if (same_velt_type(mem_ref, best_align_to_mem_ref)) { 635 // Can't allow vectorization of unaligned memory accesses with the 636 // same type since it could be overlapped accesses to the same array. 637 create_pack = false; 638 } else { 639 // Allow independent (different type) unaligned memory operations 640 // if HW supports them. 641 if (!Matcher::misaligned_vectors_ok() || AlignVector) { 642 create_pack = false; 643 } else { 644 // Check if packs of the same memory type but 645 // with a different alignment were created before. 646 for (uint i = 0; i < align_to_refs.size(); i++) { 647 MemNode* mr = align_to_refs.at(i)->as_Mem(); 648 if (mr == mem_ref) { 649 // Skip when we are looking at same memory operation. 650 continue; 651 } 652 if (same_velt_type(mr, mem_ref) && 653 memory_alignment(mr, iv_adjustment) != 0) 654 create_pack = false; 655 } 656 } 657 } 658 } 659 if (create_pack) { 660 for (uint i = 0; i < memops.size(); i++) { 661 Node* s1 = memops.at(i); 3341 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); 3342 Node* invar = align_to_ref_p.invar(); 3343 if (_igvn.type(invar)->isa_long()) { 3344 // Computations are done % (vector width/element size) so it's 3345 // safe to simply convert invar to an int and loose the upper 32 3346 // bit half. 3347 invar = new ConvL2INode(invar); 3348 _igvn.register_new_node_with_optimizer(invar); 3349 } 3350 Node* aref = new URShiftINode(invar, log2_elt); 3351 _igvn.register_new_node_with_optimizer(aref); 3352 _phase->set_ctrl(aref, pre_ctrl); 3353 if (align_to_ref_p.negate_invar()) { 3354 e = new SubINode(e, aref); 3355 } else { 3356 e = new AddINode(e, aref); 3357 } 3358 _igvn.register_new_node_with_optimizer(e); 3359 _phase->set_ctrl(e, pre_ctrl); 3360 } 3361 if (vw > ObjectAlignmentInBytes || align_to_ref_p.base()->is_top()) { 3362 // incorporate base e +/- base && Mask >>> log2(elt) 3363 Node* xbase = new CastP2XNode(NULL, align_to_ref_p.adr()); 3364 _igvn.register_new_node_with_optimizer(xbase); 3365 #ifdef _LP64 3366 xbase = new ConvL2INode(xbase); 3367 _igvn.register_new_node_with_optimizer(xbase); 3368 #endif 3369 Node* mask = _igvn.intcon(vw-1); 3370 Node* masked_xbase = new AndINode(xbase, mask); 3371 _igvn.register_new_node_with_optimizer(masked_xbase); 3372 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); 3373 Node* bref = new URShiftINode(masked_xbase, log2_elt); 3374 _igvn.register_new_node_with_optimizer(bref); 3375 _phase->set_ctrl(bref, pre_ctrl); 3376 e = new AddINode(e, bref); 3377 _igvn.register_new_node_with_optimizer(e); 3378 _phase->set_ctrl(e, pre_ctrl); 3379 } 3380 3381 // compute e +/- lim0 3382 if (scale < 0) { 3383 e = new SubINode(e, lim0); 3535 _nstack(nstack), _analyze_only(analyze_only), 3536 _stack_idx(0) 3537 #ifndef PRODUCT 3538 , _tracer(slp) 3539 #endif 3540 { 3541 NOT_PRODUCT(_tracer.ctor_1(mem);) 3542 3543 Node* adr = mem->in(MemNode::Address); 3544 if (!adr->is_AddP()) { 3545 assert(!valid(), "too complex"); 3546 return; 3547 } 3548 // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant) 3549 Node* base = adr->in(AddPNode::Base); 3550 // The base address should be loop invariant 3551 if (!invariant(base)) { 3552 assert(!valid(), "base address is loop variant"); 3553 return; 3554 } 3555 // unsafe references require misaligned vector access support 3556 if (base->is_top() && !Matcher::misaligned_vectors_ok()) { 3557 assert(!valid(), "unsafe access"); 3558 return; 3559 } 3560 3561 NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.store_depth();) 3562 NOT_PRODUCT(_tracer.ctor_2(adr);) 3563 3564 int i; 3565 for (i = 0; i < 3; i++) { 3566 NOT_PRODUCT(_tracer.ctor_3(adr, i);) 3567 3568 if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) { 3569 assert(!valid(), "too complex"); 3570 return; 3571 } 3572 adr = adr->in(AddPNode::Address); 3573 NOT_PRODUCT(_tracer.ctor_4(adr, i);) 3574 3575 if (base == adr || !adr->is_AddP()) { 3576 NOT_PRODUCT(_tracer.ctor_5(adr, base, i);) 3577 break; // stop looking at addp's 3578 } 3579 } 3580 if (!invariant(adr)) { 3581 assert(!valid(), "adr is loop variant"); 3582 return; 3583 } 3584 3585 if (!base->is_top() && adr != base) { 3586 assert(!valid(), "adr and base differ"); 3587 return; 3588 } 3589 3590 NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.restore_depth();) 3591 NOT_PRODUCT(_tracer.ctor_6(mem);) 3592 3593 _base = base; 3594 _adr = adr; 3595 assert(valid(), "Usable"); 3596 } 3597 3598 // Following is used to create a temporary object during 3599 // the pattern match of an address expression. 3600 SWPointer::SWPointer(SWPointer* p) : 3601 _mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL), 3602 _scale(0), _offset(0), _invar(NULL), _negate_invar(false), 3603 _nstack(p->_nstack), _analyze_only(p->_analyze_only), 3604 _stack_idx(p->_stack_idx) 3605 #ifndef PRODUCT 3606 , _tracer(p->_slp) 3607 #endif 3608 {} 3609 |