752 return false;
753 }
754
755 //---------------------------get_iv_adjustment---------------------------
756 // Calculate loop's iv adjustment for this memory ops.
757 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
758 SWPointer align_to_ref_p(mem_ref, this, NULL, false);
759 int offset = align_to_ref_p.offset_in_bytes();
760 int scale = align_to_ref_p.scale_in_bytes();
761 int elt_size = align_to_ref_p.memory_size();
762 int vw = vector_width_in_bytes(mem_ref);
763 assert(vw > 1, "sanity");
764 int iv_adjustment;
765 if (scale != 0) {
766 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
767 // At least one iteration is executed in pre-loop by default. As result
768 // several iterations are needed to align memory operations in main-loop even
769 // if offset is 0.
770 int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));
771 assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0),
772 err_msg_res("(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size));
773 iv_adjustment = iv_adjustment_in_bytes/elt_size;
774 } else {
775 // This memory op is not dependent on iv (scale == 0)
776 iv_adjustment = 0;
777 }
778
779 #ifndef PRODUCT
780 if (TraceSuperWord) {
781 tty->print("SuperWord::get_iv_adjustment: n = %d, noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d: ",
782 mem_ref->_idx, offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
783 mem_ref->dump();
784 }
785 #endif
786 return iv_adjustment;
787 }
788
789 //---------------------------dependence_graph---------------------------
790 // Construct dependency graph.
791 // Add dependence edges to load/store nodes for memory dependence
792 // A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
897 }
898 } else {
899 // FIXME
900 if (out->is_MergeMem() && !in_bb(out)) {
901 // Either unrolling is causing a memory edge not to disappear,
902 // or need to run igvn.optimize() again before SLP
903 } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) {
904 // Ditto. Not sure what else to check further.
905 } else if (out->Opcode() == Op_StoreCM && out->in(MemNode::OopStore) == n) {
906 // StoreCM has an input edge used as a precedence edge.
907 // Maybe an issue when oop stores are vectorized.
908 } else {
909 assert(out == prev || prev == NULL, "no branches off of store slice");
910 }
911 }//else
912 }//for
913 if (n == stop) break;
914 preds.push(n);
915 NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx);)
916 prev = n;
917 assert(n->is_Mem(), err_msg_res("unexpected node %s", n->Name()));
918 n = n->in(MemNode::Memory);
919 }
920 }
921
922 //------------------------------stmts_can_pack---------------------------
923 // Can s1 and s2 be in a pack with s1 immediately preceding s2 and
924 // s1 aligned at "align"
925 bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
926
927 // Do not use superword for non-primitives
928 BasicType bt1 = velt_basic_type(s1);
929 BasicType bt2 = velt_basic_type(s2);
930 if(!is_java_primitive(bt1) || !is_java_primitive(bt2))
931 return false;
932 if (Matcher::max_vector_size(bt1) < 2) {
933 return false; // No vectors for this type
934 }
935
936 if (isomorphic(s1, s2)) {
937 if (independent(s1, s2) || reduction(s1, s2)) {
2088 if (def && in_bb(def)) {
2089 found = true;
2090 break;
2091 }
2092 }
2093 if (!found) {
2094 assert(n != entry, "can't be entry");
2095 _data_entry.push(n);
2096 }
2097 }
2098 }
2099 }
2100
2101 // Find memory slices (head and tail)
2102 for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) {
2103 Node *n = lp()->fast_out(i);
2104 if (in_bb(n) && (n->is_Phi() && n->bottom_type() == Type::MEMORY)) {
2105 Node* n_tail = n->in(LoopNode::LoopBackControl);
2106 if (n_tail != n->in(LoopNode::EntryControl)) {
2107 if (!n_tail->is_Mem()) {
2108 assert(n_tail->is_Mem(), err_msg_res("unexpected node for memory slice: %s", n_tail->Name()));
2109 return false; // Bailout
2110 }
2111 _mem_slice_head.push(n);
2112 _mem_slice_tail.push(n_tail);
2113 }
2114 }
2115 }
2116
2117 // Create an RPO list of nodes in block
2118
2119 visited_clear();
2120 post_visited_clear();
2121
2122 // Push all non-control nodes with no inputs from within block, then control entry
2123 for (int j = 0; j < _data_entry.length(); j++) {
2124 Node* n = _data_entry.at(j);
2125 visited_set(n);
2126 _stk.push(n);
2127 }
2128 visited_set(entry);
|
752 return false;
753 }
754
755 //---------------------------get_iv_adjustment---------------------------
756 // Calculate loop's iv adjustment for this memory ops.
757 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
758 SWPointer align_to_ref_p(mem_ref, this, NULL, false);
759 int offset = align_to_ref_p.offset_in_bytes();
760 int scale = align_to_ref_p.scale_in_bytes();
761 int elt_size = align_to_ref_p.memory_size();
762 int vw = vector_width_in_bytes(mem_ref);
763 assert(vw > 1, "sanity");
764 int iv_adjustment;
765 if (scale != 0) {
766 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
767 // At least one iteration is executed in pre-loop by default. As result
768 // several iterations are needed to align memory operations in main-loop even
769 // if offset is 0.
770 int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));
771 assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0),
772 "(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size);
773 iv_adjustment = iv_adjustment_in_bytes/elt_size;
774 } else {
775 // This memory op is not dependent on iv (scale == 0)
776 iv_adjustment = 0;
777 }
778
779 #ifndef PRODUCT
780 if (TraceSuperWord) {
781 tty->print("SuperWord::get_iv_adjustment: n = %d, noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d: ",
782 mem_ref->_idx, offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
783 mem_ref->dump();
784 }
785 #endif
786 return iv_adjustment;
787 }
788
789 //---------------------------dependence_graph---------------------------
790 // Construct dependency graph.
791 // Add dependence edges to load/store nodes for memory dependence
792 // A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
897 }
898 } else {
899 // FIXME
900 if (out->is_MergeMem() && !in_bb(out)) {
901 // Either unrolling is causing a memory edge not to disappear,
902 // or need to run igvn.optimize() again before SLP
903 } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) {
904 // Ditto. Not sure what else to check further.
905 } else if (out->Opcode() == Op_StoreCM && out->in(MemNode::OopStore) == n) {
906 // StoreCM has an input edge used as a precedence edge.
907 // Maybe an issue when oop stores are vectorized.
908 } else {
909 assert(out == prev || prev == NULL, "no branches off of store slice");
910 }
911 }//else
912 }//for
913 if (n == stop) break;
914 preds.push(n);
915 NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx);)
916 prev = n;
917 assert(n->is_Mem(), "unexpected node %s", n->Name());
918 n = n->in(MemNode::Memory);
919 }
920 }
921
922 //------------------------------stmts_can_pack---------------------------
923 // Can s1 and s2 be in a pack with s1 immediately preceding s2 and
924 // s1 aligned at "align"
925 bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
926
927 // Do not use superword for non-primitives
928 BasicType bt1 = velt_basic_type(s1);
929 BasicType bt2 = velt_basic_type(s2);
930 if(!is_java_primitive(bt1) || !is_java_primitive(bt2))
931 return false;
932 if (Matcher::max_vector_size(bt1) < 2) {
933 return false; // No vectors for this type
934 }
935
936 if (isomorphic(s1, s2)) {
937 if (independent(s1, s2) || reduction(s1, s2)) {
2088 if (def && in_bb(def)) {
2089 found = true;
2090 break;
2091 }
2092 }
2093 if (!found) {
2094 assert(n != entry, "can't be entry");
2095 _data_entry.push(n);
2096 }
2097 }
2098 }
2099 }
2100
2101 // Find memory slices (head and tail)
2102 for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) {
2103 Node *n = lp()->fast_out(i);
2104 if (in_bb(n) && (n->is_Phi() && n->bottom_type() == Type::MEMORY)) {
2105 Node* n_tail = n->in(LoopNode::LoopBackControl);
2106 if (n_tail != n->in(LoopNode::EntryControl)) {
2107 if (!n_tail->is_Mem()) {
2108 assert(n_tail->is_Mem(), "unexpected node for memory slice: %s", n_tail->Name());
2109 return false; // Bailout
2110 }
2111 _mem_slice_head.push(n);
2112 _mem_slice_tail.push(n_tail);
2113 }
2114 }
2115 }
2116
2117 // Create an RPO list of nodes in block
2118
2119 visited_clear();
2120 post_visited_clear();
2121
2122 // Push all non-control nodes with no inputs from within block, then control entry
2123 for (int j = 0; j < _data_entry.length(); j++) {
2124 Node* n = _data_entry.at(j);
2125 visited_set(n);
2126 _stk.push(n);
2127 }
2128 visited_set(entry);
|