628 // if unaligned memory accesses are not allowed.
629 create_pack = false;
630 }
631 }
632 }
633 } else {
634 if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
635 // Can't allow vectorization of unaligned memory accesses with the
636 // same type since it could be overlapped accesses to the same array.
637 create_pack = false;
638 } else {
639 // Allow independent (different type) unaligned memory operations
640 // if HW supports them.
641 if (!Matcher::misaligned_vectors_ok()) {
642 create_pack = false;
643 } else {
644 // Check if packs of the same memory type but
645 // with a different alignment were created before.
646 for (uint i = 0; i < align_to_refs.size(); i++) {
647 MemNode* mr = align_to_refs.at(i)->as_Mem();
648 if (same_velt_type(mr, mem_ref) &&
649 memory_alignment(mr, iv_adjustment) != 0)
650 create_pack = false;
651 }
652 }
653 }
654 }
655 if (create_pack) {
656 for (uint i = 0; i < memops.size(); i++) {
657 Node* s1 = memops.at(i);
658 int align = alignment(s1);
659 if (align == top_align) continue;
660 for (uint j = 0; j < memops.size(); j++) {
661 Node* s2 = memops.at(j);
662 if (alignment(s2) == top_align) continue;
663 if (s1 != s2 && are_adjacent_refs(s1, s2)) {
664 if (stmts_can_pack(s1, s2, align)) {
665 Node_List* pair = new Node_List();
666 pair->push(s1);
667 pair->push(s2);
829 tty->print_cr("\nVector memops after find_align_to_ref");
830 for (uint i = 0; i < memops.size(); i++) {
831 MemNode* s = memops.at(i)->as_Mem();
832 s->dump();
833 }
834 }
835 #endif
836
837 if (max_ct > 0) {
838 #ifdef ASSERT
839 if (TraceSuperWord) {
840 tty->print("\nVector align to node: ");
841 memops.at(max_idx)->as_Mem()->dump();
842 }
843 #endif
844 return memops.at(max_idx)->as_Mem();
845 }
846 return NULL;
847 }
848
849 //------------------------------ref_is_alignable---------------------------
850 // Can the preloop align the reference to position zero in the vector?
851 bool SuperWord::ref_is_alignable(SWPointer& p) {
852 if (!p.has_iv()) {
853 return true; // no induction variable
854 }
855 CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop());
856 assert(pre_end != NULL, "we must have a correct pre-loop");
857 assert(pre_end->stride_is_con(), "pre loop stride is constant");
858 int preloop_stride = pre_end->stride_con();
859
860 int span = preloop_stride * p.scale_in_bytes();
861 int mem_size = p.memory_size();
862 int offset = p.offset_in_bytes();
863 // Stride one accesses are alignable if offset is aligned to memory operation size.
864 // Offset can be unaligned when UseUnalignedAccesses is used.
865 if (ABS(span) == mem_size && (ABS(offset) % mem_size) == 0) {
866 return true;
867 }
868 // If the initial offset from start of the object is computable,
869 // check if the pre-loop can align the final offset accordingly.
870 //
871 // In other words: Can we find an i such that the offset
872 // after i pre-loop iterations is aligned to vw?
873 // (init_offset + pre_loop) % vw == 0 (1)
874 // where
875 // pre_loop = i * span
876 // is the number of bytes added to the offset by i pre-loop iterations.
877 //
878 // For this to hold we need pre_loop to increase init_offset by
879 // pre_loop = vw - (init_offset % vw)
880 //
881 // This is only possible if pre_loop is divisible by span because each
882 // pre-loop iteration increases the initial offset by 'span' bytes:
883 // (vw - (init_offset % vw)) % span == 0
884 //
885 int vw = vector_width_in_bytes(p.mem());
898 } else {
899 assert(span < 0, "nonzero stride * scale");
900 return (init_offset % vw) % -span == 0;
901 }
902 } else if (span % vw == 0) {
903 // If span is a multiple of vw, we can simplify formula (1) to:
904 // (init_offset + i * span) % vw == 0
905 // =>
906 // (init_offset % vw) + ((i * span) % vw) == 0
907 // =>
908 // init_offset % vw == 0
909 //
910 // Because we add a multiple of vw to the initial offset, the final
911 // offset is a multiple of vw if and only if init_offset is a multiple.
912 //
913 return (init_offset % vw) == 0;
914 }
915 }
916 return false;
917 }
918
919 //---------------------------get_iv_adjustment---------------------------
920 // Calculate loop's iv adjustment for this memory ops.
921 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
922 SWPointer align_to_ref_p(mem_ref, this, NULL, false);
923 int offset = align_to_ref_p.offset_in_bytes();
924 int scale = align_to_ref_p.scale_in_bytes();
925 int elt_size = align_to_ref_p.memory_size();
926 int vw = vector_width_in_bytes(mem_ref);
927 assert(vw > 1, "sanity");
928 int iv_adjustment;
929 if (scale != 0) {
930 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
931 // At least one iteration is executed in pre-loop by default. As result
932 // several iterations are needed to align memory operations in main-loop even
933 // if offset is 0.
934 int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));
935 assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0),
936 "(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size);
937 iv_adjustment = iv_adjustment_in_bytes/elt_size;
938 } else {
939 // This memory op is not dependent on iv (scale == 0)
940 iv_adjustment = 0;
941 }
942
943 #ifndef PRODUCT
944 if (TraceSuperWord) {
945 tty->print("SuperWord::get_iv_adjustment: n = %d, noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d: ",
946 mem_ref->_idx, offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
2286 const TypePtr* atyp = n->adr_type();
2287 vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n), control_dependency(p));
2288 vlen_in_bytes = vn->as_LoadVector()->memory_size();
2289 } else if (n->is_Store()) {
2290 // Promote value to be stored to vector
2291 Node* val = vector_opd(p, MemNode::ValueIn);
2292 if (val == NULL) {
2293 if (do_reserve_copy()) {
2294 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: val should not be NULL, exiting SuperWord");})
2295 return; //and reverse to backup IG
2296 }
2297 ShouldNotReachHere();
2298 }
2299
2300 Node* ctl = n->in(MemNode::Control);
2301 Node* mem = first->in(MemNode::Memory);
2302 Node* adr = low_adr->in(MemNode::Address);
2303 const TypePtr* atyp = n->adr_type();
2304 vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
2305 vlen_in_bytes = vn->as_StoreVector()->memory_size();
2306 } else if (n->req() == 3 && !is_cmov_pack(p)) {
2307 // Promote operands to vector
2308 Node* in1 = NULL;
2309 bool node_isa_reduction = n->is_reduction();
2310 if (node_isa_reduction) {
2311 // the input to the first reduction operation is retained
2312 in1 = low_adr->in(1);
2313 } else {
2314 in1 = vector_opd(p, 1);
2315 if (in1 == NULL) {
2316 if (do_reserve_copy()) {
2317 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in1 should not be NULL, exiting SuperWord");})
2318 return; //and reverse to backup IG
2319 }
2320 ShouldNotReachHere();
2321 }
2322 }
2323 Node* in2 = vector_opd(p, 2);
2324 if (in2 == NULL) {
2325 if (do_reserve_copy()) {
2598 }
2599 #endif
2600 return vn;
2601 }
2602
2603 // Insert pack operation
2604 BasicType bt = velt_basic_type(p0);
2605 PackNode* pk = PackNode::make(opd, vlen, bt);
2606 DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )
2607
2608 for (uint i = 1; i < vlen; i++) {
2609 Node* pi = p->at(i);
2610 Node* in = pi->in(opd_idx);
2611 assert(my_pack(in) == NULL, "Should already have been unpacked");
2612 if (my_pack(in) != NULL) {
2613 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should already have been unpacked");})
2614 return NULL;
2615 }
2616 assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
2617 pk->add_opd(in);
2618 }
2619 _igvn.register_new_node_with_optimizer(pk);
2620 _phase->set_ctrl(pk, _phase->get_ctrl(opd));
2621 #ifdef ASSERT
2622 if (TraceNewVectors) {
2623 tty->print("new Vector node: ");
2624 pk->dump();
2625 }
2626 #endif
2627 return pk;
2628 }
2629
2630 //------------------------------insert_extracts---------------------------
2631 // If a use of pack p is not a vector use, then replace the
2632 // use with an extract operation.
2633 void SuperWord::insert_extracts(Node_List* p) {
2634 if (p->at(0)->is_Store()) return;
2635 assert(_n_idx_list.is_empty(), "empty (node,index) list");
2636
2637 // Inspect each use of each pack member. For each use that is
2675 set_velt_type(ex, velt_type(def));
2676 }
2677 }
2678
2679 //------------------------------is_vector_use---------------------------
2680 // Is use->in(u_idx) a vector use?
2681 bool SuperWord::is_vector_use(Node* use, int u_idx) {
2682 Node_List* u_pk = my_pack(use);
2683 if (u_pk == NULL) return false;
2684 if (use->is_reduction()) return true;
2685 Node* def = use->in(u_idx);
2686 Node_List* d_pk = my_pack(def);
2687 if (d_pk == NULL) {
2688 // check for scalar promotion
2689 Node* n = u_pk->at(0)->in(u_idx);
2690 for (uint i = 1; i < u_pk->size(); i++) {
2691 if (u_pk->at(i)->in(u_idx) != n) return false;
2692 }
2693 return true;
2694 }
2695 if (u_pk->size() != d_pk->size())
2696 return false;
2697 for (uint i = 0; i < u_pk->size(); i++) {
2698 Node* ui = u_pk->at(i);
2699 Node* di = d_pk->at(i);
2700 if (ui->in(u_idx) != di || alignment(ui) != alignment(di))
2701 return false;
2702 }
2703 return true;
2704 }
2705
2706 //------------------------------construct_bb---------------------------
2707 // Construct reverse postorder list of block members
2708 bool SuperWord::construct_bb() {
2709 Node* entry = bb();
2710
2711 assert(_stk.length() == 0, "stk is empty");
2712 assert(_block.length() == 0, "block is empty");
2713 assert(_data_entry.length() == 0, "data_entry is empty");
2714 assert(_mem_slice_head.length() == 0, "mem_slice_head is empty");
3000 n->dump();
3001 }
3002 }
3003 #endif
3004 }
3005
3006 //------------------------------memory_alignment---------------------------
3007 // Alignment within a vector memory reference
3008 int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
3009 #ifndef PRODUCT
3010 if(TraceSuperWord && Verbose) {
3011 tty->print("SuperWord::memory_alignment within a vector memory reference for %d: ", s->_idx); s->dump();
3012 }
3013 #endif
3014 NOT_PRODUCT(SWPointer::Tracer::Depth ddd(0);)
3015 SWPointer p(s, this, NULL, false);
3016 if (!p.valid()) {
3017 NOT_PRODUCT(if(is_trace_alignment()) tty->print("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");)
3018 return bottom_align;
3019 }
3020 int vw = vector_width_in_bytes(s);
3021 if (vw < 2) {
3022 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");)
3023 return bottom_align; // No vectors for this type
3024 }
3025 int offset = p.offset_in_bytes();
3026 offset += iv_adjust*p.memory_size();
3027 int off_rem = offset % vw;
3028 int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
3029 if (TraceSuperWord && Verbose) {
3030 tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod);
3031 }
3032 return off_mod;
3033 }
3034
3035 //---------------------------container_type---------------------------
3036 // Smallest type containing range of values
3037 const Type* SuperWord::container_type(Node* n) {
3038 if (n->is_Mem()) {
3039 BasicType bt = n->as_Mem()->memory_type();
3040 if (n->is_Store() && (bt == T_CHAR)) {
|
628 // if unaligned memory accesses are not allowed.
629 create_pack = false;
630 }
631 }
632 }
633 } else {
634 if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
635 // Can't allow vectorization of unaligned memory accesses with the
636 // same type since it could be overlapped accesses to the same array.
637 create_pack = false;
638 } else {
639 // Allow independent (different type) unaligned memory operations
640 // if HW supports them.
641 if (!Matcher::misaligned_vectors_ok()) {
642 create_pack = false;
643 } else {
644 // Check if packs of the same memory type but
645 // with a different alignment were created before.
646 for (uint i = 0; i < align_to_refs.size(); i++) {
647 MemNode* mr = align_to_refs.at(i)->as_Mem();
648 if (mr == mem_ref) {
649 // Skip when we are looking at same memory operation.
650 continue;
651 }
652 if (same_velt_type(mr, mem_ref) &&
653 memory_alignment(mr, iv_adjustment) != 0)
654 create_pack = false;
655 }
656 }
657 }
658 }
659 if (create_pack) {
660 for (uint i = 0; i < memops.size(); i++) {
661 Node* s1 = memops.at(i);
662 int align = alignment(s1);
663 if (align == top_align) continue;
664 for (uint j = 0; j < memops.size(); j++) {
665 Node* s2 = memops.at(j);
666 if (alignment(s2) == top_align) continue;
667 if (s1 != s2 && are_adjacent_refs(s1, s2)) {
668 if (stmts_can_pack(s1, s2, align)) {
669 Node_List* pair = new Node_List();
670 pair->push(s1);
671 pair->push(s2);
833 tty->print_cr("\nVector memops after find_align_to_ref");
834 for (uint i = 0; i < memops.size(); i++) {
835 MemNode* s = memops.at(i)->as_Mem();
836 s->dump();
837 }
838 }
839 #endif
840
841 if (max_ct > 0) {
842 #ifdef ASSERT
843 if (TraceSuperWord) {
844 tty->print("\nVector align to node: ");
845 memops.at(max_idx)->as_Mem()->dump();
846 }
847 #endif
848 return memops.at(max_idx)->as_Mem();
849 }
850 return NULL;
851 }
852
853 //------------------span_works_for_memory_size-----------------------------
854 static bool span_works_for_memory_size(MemNode* mem, int span, int mem_size, int offset) {
855 bool span_matches_memory = false;
856 if ((mem_size == type2aelembytes(T_BYTE) || mem_size == type2aelembytes(T_SHORT))
857 && ABS(span) == type2aelembytes(T_INT)) {
858 // There is a mismatch on span size compared to memory.
859 for (DUIterator_Fast jmax, j = mem->fast_outs(jmax); j < jmax; j++) {
860 Node* use = mem->fast_out(j);
861 if (!VectorNode::is_type_transition_to_int(use)) {
862 return false;
863 }
864 }
865 // If all uses transition to integer, it means that we can successfully align even on mismatch.
866 return true;
867 }
868 else {
869 span_matches_memory = ABS(span) == mem_size;
870 }
871 return span_matches_memory && (ABS(offset) % mem_size) == 0;
872 }
873
874 //------------------------------ref_is_alignable---------------------------
875 // Can the preloop align the reference to position zero in the vector?
876 bool SuperWord::ref_is_alignable(SWPointer& p) {
877 if (!p.has_iv()) {
878 return true; // no induction variable
879 }
880 CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop());
881 assert(pre_end != NULL, "we must have a correct pre-loop");
882 assert(pre_end->stride_is_con(), "pre loop stride is constant");
883 int preloop_stride = pre_end->stride_con();
884
885 int span = preloop_stride * p.scale_in_bytes();
886 int mem_size = p.memory_size();
887 int offset = p.offset_in_bytes();
888 // Stride one accesses are alignable if offset is aligned to memory operation size.
889 // Offset can be unaligned when UseUnalignedAccesses is used.
890 if (span_works_for_memory_size(p.mem(), span, mem_size, offset)) {
891 return true;
892 }
893 // If the initial offset from start of the object is computable,
894 // check if the pre-loop can align the final offset accordingly.
895 //
896 // In other words: Can we find an i such that the offset
897 // after i pre-loop iterations is aligned to vw?
898 // (init_offset + pre_loop) % vw == 0 (1)
899 // where
900 // pre_loop = i * span
901 // is the number of bytes added to the offset by i pre-loop iterations.
902 //
903 // For this to hold we need pre_loop to increase init_offset by
904 // pre_loop = vw - (init_offset % vw)
905 //
906 // This is only possible if pre_loop is divisible by span because each
907 // pre-loop iteration increases the initial offset by 'span' bytes:
908 // (vw - (init_offset % vw)) % span == 0
909 //
910 int vw = vector_width_in_bytes(p.mem());
923 } else {
924 assert(span < 0, "nonzero stride * scale");
925 return (init_offset % vw) % -span == 0;
926 }
927 } else if (span % vw == 0) {
928 // If span is a multiple of vw, we can simplify formula (1) to:
929 // (init_offset + i * span) % vw == 0
930 // =>
931 // (init_offset % vw) + ((i * span) % vw) == 0
932 // =>
933 // init_offset % vw == 0
934 //
935 // Because we add a multiple of vw to the initial offset, the final
936 // offset is a multiple of vw if and only if init_offset is a multiple.
937 //
938 return (init_offset % vw) == 0;
939 }
940 }
941 return false;
942 }
943 //---------------------------get_vw_bytes_special------------------------
944 int SuperWord::get_vw_bytes_special(MemNode* s) {
945 // Get the vector width in bytes.
946 int vw = vector_width_in_bytes(s);
947
948 // Check for special case where there is an MulAddS2I usage where short vectors are going to need combined.
949 BasicType btype = velt_basic_type(s);
950 if (type2aelembytes(btype) == 2) {
951 bool should_combine_adjacent = true;
952 for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
953 Node* user = s->fast_out(i);
954 if (!VectorNode::is_muladds2i(user)) {
955 should_combine_adjacent = false;
956 }
957 }
958 if (should_combine_adjacent) {
959 vw = MIN2(Matcher::max_vector_size(btype)*type2aelembytes(btype), vw * 2);
960 }
961 }
962
963 return vw;
964 }
965
966 //---------------------------get_iv_adjustment---------------------------
967 // Calculate loop's iv adjustment for this memory ops.
968 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
969 SWPointer align_to_ref_p(mem_ref, this, NULL, false);
970 int offset = align_to_ref_p.offset_in_bytes();
971 int scale = align_to_ref_p.scale_in_bytes();
972 int elt_size = align_to_ref_p.memory_size();
973 int vw = get_vw_bytes_special(mem_ref);
974 assert(vw > 1, "sanity");
975 int iv_adjustment;
976 if (scale != 0) {
977 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
978 // At least one iteration is executed in pre-loop by default. As result
979 // several iterations are needed to align memory operations in main-loop even
980 // if offset is 0.
981 int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));
982 assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0),
983 "(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size);
984 iv_adjustment = iv_adjustment_in_bytes/elt_size;
985 } else {
986 // This memory op is not dependent on iv (scale == 0)
987 iv_adjustment = 0;
988 }
989
990 #ifndef PRODUCT
991 if (TraceSuperWord) {
992 tty->print("SuperWord::get_iv_adjustment: n = %d, noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d: ",
993 mem_ref->_idx, offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
2333 const TypePtr* atyp = n->adr_type();
2334 vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n), control_dependency(p));
2335 vlen_in_bytes = vn->as_LoadVector()->memory_size();
2336 } else if (n->is_Store()) {
2337 // Promote value to be stored to vector
2338 Node* val = vector_opd(p, MemNode::ValueIn);
2339 if (val == NULL) {
2340 if (do_reserve_copy()) {
2341 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: val should not be NULL, exiting SuperWord");})
2342 return; //and reverse to backup IG
2343 }
2344 ShouldNotReachHere();
2345 }
2346
2347 Node* ctl = n->in(MemNode::Control);
2348 Node* mem = first->in(MemNode::Memory);
2349 Node* adr = low_adr->in(MemNode::Address);
2350 const TypePtr* atyp = n->adr_type();
2351 vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
2352 vlen_in_bytes = vn->as_StoreVector()->memory_size();
2353 } else if (VectorNode::is_muladds2i(n)) {
2354 assert(n->req() == 5u, "MulAddS2I should have 4 operands.");
2355 Node* in1 = vector_opd(p, 1);
2356 Node* in2 = vector_opd(p, 2);
2357 vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
2358 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
2359 } else if (n->req() == 3 && !is_cmov_pack(p)) {
2360 // Promote operands to vector
2361 Node* in1 = NULL;
2362 bool node_isa_reduction = n->is_reduction();
2363 if (node_isa_reduction) {
2364 // the input to the first reduction operation is retained
2365 in1 = low_adr->in(1);
2366 } else {
2367 in1 = vector_opd(p, 1);
2368 if (in1 == NULL) {
2369 if (do_reserve_copy()) {
2370 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in1 should not be NULL, exiting SuperWord");})
2371 return; //and reverse to backup IG
2372 }
2373 ShouldNotReachHere();
2374 }
2375 }
2376 Node* in2 = vector_opd(p, 2);
2377 if (in2 == NULL) {
2378 if (do_reserve_copy()) {
2651 }
2652 #endif
2653 return vn;
2654 }
2655
2656 // Insert pack operation
2657 BasicType bt = velt_basic_type(p0);
2658 PackNode* pk = PackNode::make(opd, vlen, bt);
2659 DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )
2660
2661 for (uint i = 1; i < vlen; i++) {
2662 Node* pi = p->at(i);
2663 Node* in = pi->in(opd_idx);
2664 assert(my_pack(in) == NULL, "Should already have been unpacked");
2665 if (my_pack(in) != NULL) {
2666 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should already have been unpacked");})
2667 return NULL;
2668 }
2669 assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
2670 pk->add_opd(in);
2671 if (VectorNode::is_muladds2i(pi)) {
2672 Node* in2 = pi->in(opd_idx + 2);
2673 assert(my_pack(in2) == NULL, "Should already have been unpacked");
2674 if (my_pack(in2) != NULL) {
2675 NOT_PRODUCT(if (is_trace_loop_reverse() || TraceLoopOpts) { tty->print_cr("Should already have been unpacked"); })
2676 return NULL;
2677 }
2678 assert(opd_bt == in2->bottom_type()->basic_type(), "all same type");
2679 pk->add_opd(in2);
2680 }
2681 }
2682 _igvn.register_new_node_with_optimizer(pk);
2683 _phase->set_ctrl(pk, _phase->get_ctrl(opd));
2684 #ifdef ASSERT
2685 if (TraceNewVectors) {
2686 tty->print("new Vector node: ");
2687 pk->dump();
2688 }
2689 #endif
2690 return pk;
2691 }
2692
2693 //------------------------------insert_extracts---------------------------
2694 // If a use of pack p is not a vector use, then replace the
2695 // use with an extract operation.
2696 void SuperWord::insert_extracts(Node_List* p) {
2697 if (p->at(0)->is_Store()) return;
2698 assert(_n_idx_list.is_empty(), "empty (node,index) list");
2699
2700 // Inspect each use of each pack member. For each use that is
2738 set_velt_type(ex, velt_type(def));
2739 }
2740 }
2741
2742 //------------------------------is_vector_use---------------------------
2743 // Is use->in(u_idx) a vector use?
2744 bool SuperWord::is_vector_use(Node* use, int u_idx) {
2745 Node_List* u_pk = my_pack(use);
2746 if (u_pk == NULL) return false;
2747 if (use->is_reduction()) return true;
2748 Node* def = use->in(u_idx);
2749 Node_List* d_pk = my_pack(def);
2750 if (d_pk == NULL) {
2751 // check for scalar promotion
2752 Node* n = u_pk->at(0)->in(u_idx);
2753 for (uint i = 1; i < u_pk->size(); i++) {
2754 if (u_pk->at(i)->in(u_idx) != n) return false;
2755 }
2756 return true;
2757 }
2758 if (VectorNode::is_muladds2i(use)) {
2759 // MulAddS2I takes shorts and produces ints - hence the special checks
2760 // on alignment and size.
2761 if (u_pk->size() * 2 != d_pk->size()) {
2762 return false;
2763 }
2764 for (uint i = 0; i < MIN2(d_pk->size(), u_pk->size()); i++) {
2765 Node* ui = u_pk->at(i);
2766 Node* di = d_pk->at(i);
2767 if (alignment(ui) != alignment(di) * 2) {
2768 return false;
2769 }
2770 }
2771 return true;
2772 }
2773 if (u_pk->size() != d_pk->size())
2774 return false;
2775 for (uint i = 0; i < u_pk->size(); i++) {
2776 Node* ui = u_pk->at(i);
2777 Node* di = d_pk->at(i);
2778 if (ui->in(u_idx) != di || alignment(ui) != alignment(di))
2779 return false;
2780 }
2781 return true;
2782 }
2783
2784 //------------------------------construct_bb---------------------------
2785 // Construct reverse postorder list of block members
2786 bool SuperWord::construct_bb() {
2787 Node* entry = bb();
2788
2789 assert(_stk.length() == 0, "stk is empty");
2790 assert(_block.length() == 0, "block is empty");
2791 assert(_data_entry.length() == 0, "data_entry is empty");
2792 assert(_mem_slice_head.length() == 0, "mem_slice_head is empty");
3078 n->dump();
3079 }
3080 }
3081 #endif
3082 }
3083
3084 //------------------------------memory_alignment---------------------------
3085 // Alignment within a vector memory reference
3086 int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
3087 #ifndef PRODUCT
3088 if(TraceSuperWord && Verbose) {
3089 tty->print("SuperWord::memory_alignment within a vector memory reference for %d: ", s->_idx); s->dump();
3090 }
3091 #endif
3092 NOT_PRODUCT(SWPointer::Tracer::Depth ddd(0);)
3093 SWPointer p(s, this, NULL, false);
3094 if (!p.valid()) {
3095 NOT_PRODUCT(if(is_trace_alignment()) tty->print("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");)
3096 return bottom_align;
3097 }
3098 int vw = get_vw_bytes_special(s);
3099 if (vw < 2) {
3100 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");)
3101 return bottom_align; // No vectors for this type
3102 }
3103 int offset = p.offset_in_bytes();
3104 offset += iv_adjust*p.memory_size();
3105 int off_rem = offset % vw;
3106 int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
3107 if (TraceSuperWord && Verbose) {
3108 tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod);
3109 }
3110 return off_mod;
3111 }
3112
3113 //---------------------------container_type---------------------------
3114 // Smallest type containing range of values
3115 const Type* SuperWord::container_type(Node* n) {
3116 if (n->is_Mem()) {
3117 BasicType bt = n->as_Mem()->memory_type();
3118 if (n->is_Store() && (bt == T_CHAR)) {
|