48 _igvn(phase->_igvn),
49 _arena(phase->C->comp_arena()),
50 _packset(arena(), 8, 0, NULL), // packs for the current block
51 _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
52 _block(arena(), 8, 0, NULL), // nodes in current block
53 _data_entry(arena(), 8, 0, NULL), // nodes with all inputs from outside
54 _mem_slice_head(arena(), 8, 0, NULL), // memory slice heads
55 _mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails
56 _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node
57 _align_to_ref(NULL), // memory reference to align vectors to
58 _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs
59 _dg(_arena), // dependence graph
60 _visited(arena()), // visited node set
61 _post_visited(arena()), // post visited node set
62 _n_idx_list(arena(), 8), // scratch list of (node,index) pairs
63 _stk(arena(), 8, 0, NULL), // scratch stack of nodes
64 _nlist(arena(), 8, 0, NULL), // scratch list of nodes
65 _lpt(NULL), // loop tree node
66 _lp(NULL), // LoopNode
67 _bb(NULL), // basic block
68 _iv(NULL) // induction var
69 {}
70
71 //------------------------------transform_loop---------------------------
72 void SuperWord::transform_loop(IdealLoopTree* lpt) {
73 assert(UseSuperWord, "should be");
74 // Do vectors exist on this architecture?
75 if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
76
77 assert(lpt->_head->is_CountedLoop(), "must be");
78 CountedLoopNode *cl = lpt->_head->as_CountedLoop();
79
80 if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
81
82 if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
83
84 // Check for no control flow in body (other than exit)
85 Node *cl_exit = cl->loopexit();
86 if (cl_exit->in(0) != lpt->_head) return;
87
88 // Make sure the are no extra control users of the loop backedge
128 //
129 // 5) One of the memory references is picked to be an aligned vector reference.
130 // The pre-loop trip count is adjusted to align this reference in the
131 // unrolled body.
132 //
133 // 6) The initial set of pack pairs is seeded with memory references.
134 //
135 // 7) The set of pack pairs is extended by following use->def and def->use links.
136 //
137 // 8) The pairs are combined into vector sized packs.
138 //
139 // 9) Reorder the memory slices to co-locate members of the memory packs.
140 //
141 // 10) Generate ideal vector nodes for the final set of packs and where necessary,
142 // inserting scalar promotion, vector creation from multiple scalars, and
143 // extraction of scalar values from vectors.
144 //
145 void SuperWord::SLP_extract() {
146
147 // Ready the block
148
149 if (!construct_bb())
150 return; // Exit if no interesting nodes or complex graph.
151
152 dependence_graph();
153
154 compute_max_depth();
155
156 compute_vector_element_type();
157
158 // Attempt vectorization
159
160 find_adjacent_refs();
161
162 extend_packlist();
163
164 combine_packs();
165
166 construct_my_pack_map();
167
168 filter_packs();
623 assert(n->is_Mem(), err_msg_res("unexpected node %s", n->Name()));
624 n = n->in(MemNode::Memory);
625 }
626 }
627
628 //------------------------------stmts_can_pack---------------------------
629 // Can s1 and s2 be in a pack with s1 immediately preceding s2 and
630 // s1 aligned at "align"
631 bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
632
633 // Do not use superword for non-primitives
634 BasicType bt1 = velt_basic_type(s1);
635 BasicType bt2 = velt_basic_type(s2);
636 if(!is_java_primitive(bt1) || !is_java_primitive(bt2))
637 return false;
638 if (Matcher::max_vector_size(bt1) < 2) {
639 return false; // No vectors for this type
640 }
641
642 if (isomorphic(s1, s2)) {
643 if (independent(s1, s2)) {
644 if (!exists_at(s1, 0) && !exists_at(s2, 1)) {
645 if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) {
646 int s1_align = alignment(s1);
647 int s2_align = alignment(s2);
648 if (s1_align == top_align || s1_align == align) {
649 if (s2_align == top_align || s2_align == align + data_size(s1)) {
650 return true;
651 }
652 }
653 }
654 }
655 }
656 }
657 return false;
658 }
659
660 //------------------------------exists_at---------------------------
661 // Does s exist in a pack at position pos?
662 bool SuperWord::exists_at(Node* s, uint pos) {
663 for (int i = 0; i < _packset.length(); i++) {
701 if (s1->in(0) != s2->in(0)) return false;
702 if (!same_velt_type(s1, s2)) return false;
703 return true;
704 }
705
706 //------------------------------independent---------------------------
707 // Is there no data path from s1 to s2 or s2 to s1?
708 bool SuperWord::independent(Node* s1, Node* s2) {
709 // assert(s1->Opcode() == s2->Opcode(), "check isomorphic first");
710 int d1 = depth(s1);
711 int d2 = depth(s2);
712 if (d1 == d2) return s1 != s2;
713 Node* deep = d1 > d2 ? s1 : s2;
714 Node* shallow = d1 > d2 ? s2 : s1;
715
716 visited_clear();
717
718 return independent_path(shallow, deep);
719 }
720
721 //------------------------------independent_path------------------------------
722 // Helper for independent
723 bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) {
724 if (dp >= 1000) return false; // stop deep recursion
725 visited_set(deep);
726 int shal_depth = depth(shallow);
727 assert(shal_depth <= depth(deep), "must be");
728 for (DepPreds preds(deep, _dg); !preds.done(); preds.next()) {
729 Node* pred = preds.current();
730 if (in_bb(pred) && !visited_test(pred)) {
731 if (shallow == pred) {
732 return false;
733 }
734 if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) {
735 return false;
736 }
737 }
738 }
739 return true;
740 }
744 set_alignment(s1, align);
745 if (align == top_align || align == bottom_align) {
746 set_alignment(s2, align);
747 } else {
748 set_alignment(s2, align + data_size(s1));
749 }
750 }
751
752 //------------------------------data_size---------------------------
753 int SuperWord::data_size(Node* s) {
754 int bsize = type2aelembytes(velt_basic_type(s));
755 assert(bsize != 0, "valid size");
756 return bsize;
757 }
758
759 //------------------------------extend_packlist---------------------------
760 // Extend packset by following use->def and def->use links from pack members.
761 void SuperWord::extend_packlist() {
762 bool changed;
763 do {
764 changed = false;
765 for (int i = 0; i < _packset.length(); i++) {
766 Node_List* p = _packset.at(i);
767 changed |= follow_use_defs(p);
768 changed |= follow_def_uses(p);
769 }
770 } while (changed);
771
772 #ifndef PRODUCT
773 if (TraceSuperWord) {
774 tty->print_cr("\nAfter extend_packlist");
775 print_packset();
776 }
777 #endif
778 }
779
780 //------------------------------follow_use_defs---------------------------
781 // Extend the packset by visiting operand definitions of nodes in pack p
782 bool SuperWord::follow_use_defs(Node_List* p) {
783 assert(p->size() == 2, "just checking");
784 Node* s1 = p->at(0);
785 Node* s2 = p->at(1);
786 assert(s1->req() == s2->req(), "just checking");
787 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
788
789 if (s1->is_Load()) return false;
790
791 int align = alignment(s1);
808 }
809 }
810 }
811 return changed;
812 }
813
814 //------------------------------follow_def_uses---------------------------
815 // Extend the packset by visiting uses of nodes in pack p
816 bool SuperWord::follow_def_uses(Node_List* p) {
817 bool changed = false;
818 Node* s1 = p->at(0);
819 Node* s2 = p->at(1);
820 assert(p->size() == 2, "just checking");
821 assert(s1->req() == s2->req(), "just checking");
822 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
823
824 if (s1->is_Store()) return false;
825
826 int align = alignment(s1);
827 int savings = -1;
828 Node* u1 = NULL;
829 Node* u2 = NULL;
830 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
831 Node* t1 = s1->fast_out(i);
832 if (!in_bb(t1)) continue;
833 for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
834 Node* t2 = s2->fast_out(j);
835 if (!in_bb(t2)) continue;
836 if (!opnd_positions_match(s1, t1, s2, t2))
837 continue;
838 if (stmts_can_pack(t1, t2, align)) {
839 int my_savings = est_savings(t1, t2);
840 if (my_savings > savings) {
841 savings = my_savings;
842 u1 = t1;
843 u2 = t2;
844 }
845 }
846 }
847 }
848 if (savings >= 0) {
849 Node_List* pair = new Node_List();
850 pair->push(u1);
851 pair->push(u2);
852 _packset.append(pair);
853 set_alignment(u1, u2, align);
854 changed = true;
855 }
856 return changed;
857 }
858
859 //---------------------------opnd_positions_match-------------------------
860 // Is the use of d1 in u1 at the same operand position as d2 in u2?
861 bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) {
862 uint ct = u1->req();
863 if (ct != u2->req()) return false;
864 uint i1 = 0;
865 uint i2 = 0;
866 do {
867 for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break;
868 for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break;
869 if (i1 != i2) {
870 if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) {
871 // Further analysis relies on operands position matching.
872 u2->swap_edges(i1, i2);
873 } else {
874 return false;
875 }
876 }
877 } while (i1 < ct);
878 return true;
879 }
880
881 //------------------------------est_savings---------------------------
923 if (ct < s2->outcnt()) save_use += unpack_cost(1);
924
925 return MAX2(save_in, save_use);
926 }
927
928 //------------------------------costs---------------------------
929 int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; }
930 int SuperWord::pack_cost(int ct) { return ct; }
931 int SuperWord::unpack_cost(int ct) { return ct; }
932
933 //------------------------------combine_packs---------------------------
934 // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
935 void SuperWord::combine_packs() {
936 bool changed = true;
937 // Combine packs regardless max vector size.
938 while (changed) {
939 changed = false;
940 for (int i = 0; i < _packset.length(); i++) {
941 Node_List* p1 = _packset.at(i);
942 if (p1 == NULL) continue;
943 for (int j = 0; j < _packset.length(); j++) {
944 Node_List* p2 = _packset.at(j);
945 if (p2 == NULL) continue;
946 if (i == j) continue;
947 if (p1->at(p1->size()-1) == p2->at(0)) {
948 for (uint k = 1; k < p2->size(); k++) {
949 p1->push(p2->at(k));
950 }
951 _packset.at_put(j, NULL);
952 changed = true;
953 }
954 }
955 }
956 }
957
958 // Split packs which have size greater then max vector size.
959 for (int i = 0; i < _packset.length(); i++) {
960 Node_List* p1 = _packset.at(i);
961 if (p1 != NULL) {
962 BasicType bt = velt_basic_type(p1->at(0));
963 uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector
1050 }
1051 #endif
1052 remove_pack_at(i);
1053 changed = true;
1054 }
1055 }
1056 } while (changed);
1057
1058 #ifndef PRODUCT
1059 if (TraceSuperWord) {
1060 tty->print_cr("\nAfter filter_packs");
1061 print_packset();
1062 tty->cr();
1063 }
1064 #endif
1065 }
1066
1067 //------------------------------implemented---------------------------
1068 // Can code be generated for pack p?
1069 bool SuperWord::implemented(Node_List* p) {
1070 Node* p0 = p->at(0);
1071 return VectorNode::implemented(p0->Opcode(), p->size(), velt_basic_type(p0));
1072 }
1073
1074 //------------------------------same_inputs--------------------------
1075 // For pack p, are all idx operands the same?
1076 static bool same_inputs(Node_List* p, int idx) {
1077 Node* p0 = p->at(0);
1078 uint vlen = p->size();
1079 Node* p0_def = p0->in(idx);
1080 for (uint i = 1; i < vlen; i++) {
1081 Node* pi = p->at(i);
1082 Node* pi_def = pi->in(idx);
1083 if (p0_def != pi_def)
1084 return false;
1085 }
1086 return true;
1087 }
1088
1089 //------------------------------profitable---------------------------
1090 // For pack p, are all operands and all uses (with in the block) vector?
1091 bool SuperWord::profitable(Node_List* p) {
1092 Node* p0 = p->at(0);
1093 uint start, end;
1094 VectorNode::vector_operands(p0, &start, &end);
1095
1096 // Return false if some inputs are not vectors or vectors with different
1097 // size or alignment.
1098 // Also, for now, return false if not scalar promotion case when inputs are
1099 // the same. Later, implement PackNode and allow differing, non-vector inputs
1100 // (maybe just the ones from outside the block.)
1101 for (uint i = start; i < end; i++) {
1102 if (!is_vector_use(p0, i))
1103 return false;
1104 }
1105 if (VectorNode::is_shift(p0)) {
1106 // For now, return false if shift count is vector or not scalar promotion
1107 // case (different shift counts) because it is not supported yet.
1108 Node* cnt = p0->in(2);
1109 Node_List* cnt_pk = my_pack(cnt);
1110 if (cnt_pk != NULL)
1111 return false;
1112 if (!same_inputs(p, 2))
1113 return false;
1114 }
1115 if (!p0->is_Store()) {
1116 // For now, return false if not all uses are vector.
1117 // Later, implement ExtractNode and allow non-vector uses (maybe
1118 // just the ones outside the block.)
1119 for (uint i = 0; i < p->size(); i++) {
1120 Node* def = p->at(i);
1121 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1122 Node* use = def->fast_out(j);
1123 for (uint k = 0; k < use->req(); k++) {
1124 Node* n = use->in(k);
1125 if (def == n) {
1126 if (!is_vector_use(use, k)) {
1127 return false;
1128 }
1129 }
1130 }
1131 }
1132 }
1133 }
1134 return true;
1135 }
1136
1137 //------------------------------schedule---------------------------
1138 // Adjust the memory graph for the packed operations
1139 void SuperWord::schedule() {
1140
1141 // Co-locate in the memory graph the members of each memory pack
1142 for (int i = 0; i < _packset.length(); i++) {
1143 co_locate_pack(_packset.at(i));
1144 }
1145 }
1390 mem = mem->in(MemNode::Memory);
1391 } else {
1392 break; // dependent memory
1393 }
1394 }
1395 Node* adr = low_adr->in(MemNode::Address);
1396 const TypePtr* atyp = n->adr_type();
1397 vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
1398 vlen_in_bytes = vn->as_LoadVector()->memory_size();
1399 } else if (n->is_Store()) {
1400 // Promote value to be stored to vector
1401 Node* val = vector_opd(p, MemNode::ValueIn);
1402 Node* ctl = n->in(MemNode::Control);
1403 Node* mem = first->in(MemNode::Memory);
1404 Node* adr = low_adr->in(MemNode::Address);
1405 const TypePtr* atyp = n->adr_type();
1406 vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
1407 vlen_in_bytes = vn->as_StoreVector()->memory_size();
1408 } else if (n->req() == 3) {
1409 // Promote operands to vector
1410 Node* in1 = vector_opd(p, 1);
1411 Node* in2 = vector_opd(p, 2);
1412 if (VectorNode::is_invariant_vector(in1) && (n->is_Add() || n->is_Mul())) {
1413 // Move invariant vector input into second position to avoid register spilling.
1414 Node* tmp = in1;
1415 in1 = in2;
1416 in2 = tmp;
1417 }
1418 vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
1419 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
1420 } else {
1421 ShouldNotReachHere();
1422 }
1423 assert(vn != NULL, "sanity");
1424 _igvn.register_new_node_with_optimizer(vn);
1425 _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
1426 for (uint j = 0; j < p->size(); j++) {
1427 Node* pm = p->at(j);
1428 _igvn.replace_node(pm, vn);
1429 }
1430 _igvn._worklist.push(vn);
1431
1432 if (vlen_in_bytes > max_vlen_in_bytes) {
1433 max_vlen_in_bytes = vlen_in_bytes;
1434 }
1435 #ifdef ASSERT
1436 if (TraceNewVectors) {
1437 tty->print("new Vector node: ");
1438 vn->dump();
1439 }
1539 Node* def = p->at(i);
1540 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1541 Node* use = def->fast_out(j);
1542 for (uint k = 0; k < use->req(); k++) {
1543 Node* n = use->in(k);
1544 if (def == n) {
1545 if (!is_vector_use(use, k)) {
1546 _n_idx_list.push(use, k);
1547 }
1548 }
1549 }
1550 }
1551 }
1552
1553 while (_n_idx_list.is_nonempty()) {
1554 Node* use = _n_idx_list.node();
1555 int idx = _n_idx_list.index();
1556 _n_idx_list.pop();
1557 Node* def = use->in(idx);
1558
1559 // Insert extract operation
1560 _igvn.hash_delete(def);
1561 int def_pos = alignment(def) / data_size(def);
1562
1563 Node* ex = ExtractNode::make(def, def_pos, velt_basic_type(def));
1564 _igvn.register_new_node_with_optimizer(ex);
1565 _phase->set_ctrl(ex, _phase->get_ctrl(def));
1566 _igvn.replace_input_of(use, idx, ex);
1567 _igvn._worklist.push(def);
1568
1569 bb_insert_after(ex, bb_idx(def));
1570 set_velt_type(ex, velt_type(def));
1571 }
1572 }
1573
1574 //------------------------------is_vector_use---------------------------
1575 // Is use->in(u_idx) a vector use?
1576 bool SuperWord::is_vector_use(Node* use, int u_idx) {
1577 Node_List* u_pk = my_pack(use);
1578 if (u_pk == NULL) return false;
1579 Node* def = use->in(u_idx);
1580 Node_List* d_pk = my_pack(def);
1581 if (d_pk == NULL) {
1582 // check for scalar promotion
1583 Node* n = u_pk->at(0)->in(u_idx);
1584 for (uint i = 1; i < u_pk->size(); i++) {
1585 if (u_pk->at(i)->in(u_idx) != n) return false;
1586 }
1587 return true;
1588 }
1589 if (u_pk->size() != d_pk->size())
1590 return false;
1591 for (uint i = 0; i < u_pk->size(); i++) {
1592 Node* ui = u_pk->at(i);
1593 Node* di = d_pk->at(i);
1594 if (ui->in(u_idx) != di || alignment(ui) != alignment(di))
1595 return false;
1596 }
1597 return true;
1598 }
1599
1600 //------------------------------construct_bb---------------------------
1601 // Construct reverse postorder list of block members
1602 bool SuperWord::construct_bb() {
1603 Node* entry = bb();
1604
1605 assert(_stk.length() == 0, "stk is empty");
1606 assert(_block.length() == 0, "block is empty");
1607 assert(_data_entry.length() == 0, "data_entry is empty");
1608 assert(_mem_slice_head.length() == 0, "mem_slice_head is empty");
1609 assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty");
1610
1611 // Find non-control nodes with no inputs from within block,
1612 // create a temporary map from node _idx to bb_idx for use
1613 // by the visited and post_visited sets,
1614 // and count number of nodes in block.
1615 int bb_ct = 0;
1616 for (uint i = 0; i < lpt()->_body.size(); i++ ) {
1617 Node *n = lpt()->_body.at(i);
1618 set_bb_idx(n, i); // Create a temporary map
1619 if (in_bb(n)) {
1620 if (n->is_LoadStore() || n->is_MergeMem() ||
1621 (n->is_Proj() && !n->as_Proj()->is_CFG())) {
1622 // Bailout if the loop has LoadStore, MergeMem or data Proj
1623 // nodes. Superword optimization does not work with them.
1624 return false;
1625 }
1626 bb_ct++;
1627 if (!n->is_CFG()) {
1628 bool found = false;
1629 for (uint j = 0; j < n->req(); j++) {
1630 Node* def = n->in(j);
1631 if (def && in_bb(def)) {
1632 found = true;
1633 break;
1634 }
1635 }
1636 if (!found) {
1637 assert(n != entry, "can't be entry");
1657 }
1658 }
1659
1660 // Create an RPO list of nodes in block
1661
1662 visited_clear();
1663 post_visited_clear();
1664
1665 // Push all non-control nodes with no inputs from within block, then control entry
1666 for (int j = 0; j < _data_entry.length(); j++) {
1667 Node* n = _data_entry.at(j);
1668 visited_set(n);
1669 _stk.push(n);
1670 }
1671 visited_set(entry);
1672 _stk.push(entry);
1673
1674 // Do a depth first walk over out edges
1675 int rpo_idx = bb_ct - 1;
1676 int size;
1677 while ((size = _stk.length()) > 0) {
1678 Node* n = _stk.top(); // Leave node on stack
1679 if (!visited_test_set(n)) {
1680 // forward arc in graph
1681 } else if (!post_visited_test(n)) {
1682 // cross or back arc
1683 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
1684 Node *use = n->fast_out(i);
1685 if (in_bb(use) && !visited_test(use) &&
1686 // Don't go around backedge
1687 (!use->is_Phi() || n == entry)) {
1688 _stk.push(use);
1689 }
1690 }
1691 if (_stk.length() == size) {
1692 // There were no additional uses, post visit node now
1693 _stk.pop(); // Remove node from stack
1694 assert(rpo_idx >= 0, "");
1695 _block.at_put_grow(rpo_idx, n);
1696 rpo_idx--;
1697 post_visited_set(n);
1698 assert(rpo_idx >= 0 || _stk.is_empty(), "");
1699 }
1700 } else {
1701 _stk.pop(); // Remove post-visited node from stack
1702 }
1703 }
1704
1705 // Create real map of block indices for nodes
1706 for (int j = 0; j < _block.length(); j++) {
1707 Node* n = _block.at(j);
1708 set_bb_idx(n, j);
1709 }
1710
1711 initialize_bb(); // Ensure extra info is allocated.
1712
1713 #ifndef PRODUCT
1714 if (TraceSuperWord) {
1715 print_bb();
1716 tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE");
1717 for (int m = 0; m < _data_entry.length(); m++) {
1718 tty->print("%3d ", m);
1719 _data_entry.at(m)->dump();
1720 }
1721 tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE");
1722 for (int m = 0; m < _mem_slice_head.length(); m++) {
1723 tty->print("%3d ", m); _mem_slice_head.at(m)->dump();
1724 tty->print(" "); _mem_slice_tail.at(m)->dump();
1725 }
1726 }
1727 #endif
1728 assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found");
1729 return (_mem_slice_head.length() > 0) || (_data_entry.length() > 0);
1730 }
1731
1732 //------------------------------initialize_bb---------------------------
1733 // Initialize per node info
1734 void SuperWord::initialize_bb() {
1735 Node* last = _block.at(_block.length() - 1);
1736 grow_node_info(bb_idx(last));
1737 }
1738
1739 //------------------------------bb_insert_after---------------------------
1740 // Insert n into block after pos
1741 void SuperWord::bb_insert_after(Node* n, int pos) {
1742 int n_pos = pos + 1;
1743 // Make room
1744 for (int i = _block.length() - 1; i >= n_pos; i--) {
1745 _block.at_put_grow(i+1, _block.at(i));
1746 }
1747 for (int j = _node_info.length() - 1; j >= n_pos; j--) {
1748 _node_info.at_put_grow(j+1, _node_info.at(j));
1749 }
1940 //------------------------------in_pack---------------------------
1941 // Is s in pack p?
1942 Node_List* SuperWord::in_pack(Node* s, Node_List* p) {
1943 for (uint i = 0; i < p->size(); i++) {
1944 if (p->at(i) == s) {
1945 return p;
1946 }
1947 }
1948 return NULL;
1949 }
1950
1951 //------------------------------remove_pack_at---------------------------
1952 // Remove the pack at position pos in the packset
1953 void SuperWord::remove_pack_at(int pos) {
1954 Node_List* p = _packset.at(pos);
1955 for (uint i = 0; i < p->size(); i++) {
1956 Node* s = p->at(i);
1957 set_my_pack(s, NULL);
1958 }
1959 _packset.remove_at(pos);
1960 }
1961
1962 //------------------------------executed_first---------------------------
1963 // Return the node executed first in pack p. Uses the RPO block list
1964 // to determine order.
1965 Node* SuperWord::executed_first(Node_List* p) {
1966 Node* n = p->at(0);
1967 int n_rpo = bb_idx(n);
1968 for (uint i = 1; i < p->size(); i++) {
1969 Node* s = p->at(i);
1970 int s_rpo = bb_idx(s);
1971 if (s_rpo < n_rpo) {
1972 n = s;
1973 n_rpo = s_rpo;
1974 }
1975 }
1976 return n;
1977 }
1978
1979 //------------------------------executed_last---------------------------
|
48 _igvn(phase->_igvn),
49 _arena(phase->C->comp_arena()),
50 _packset(arena(), 8, 0, NULL), // packs for the current block
51 _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
52 _block(arena(), 8, 0, NULL), // nodes in current block
53 _data_entry(arena(), 8, 0, NULL), // nodes with all inputs from outside
54 _mem_slice_head(arena(), 8, 0, NULL), // memory slice heads
55 _mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails
56 _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node
57 _align_to_ref(NULL), // memory reference to align vectors to
58 _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs
59 _dg(_arena), // dependence graph
60 _visited(arena()), // visited node set
61 _post_visited(arena()), // post visited node set
62 _n_idx_list(arena(), 8), // scratch list of (node,index) pairs
63 _stk(arena(), 8, 0, NULL), // scratch stack of nodes
64 _nlist(arena(), 8, 0, NULL), // scratch list of nodes
65 _lpt(NULL), // loop tree node
66 _lp(NULL), // LoopNode
67 _bb(NULL), // basic block
68 _iv(NULL), // induction var
69 _race_possible(false) // cases where SDMU is true
70 {}
71
72 //------------------------------transform_loop---------------------------
73 void SuperWord::transform_loop(IdealLoopTree* lpt) {
74 assert(UseSuperWord, "should be");
75 // Do vectors exist on this architecture?
76 if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
77
78 assert(lpt->_head->is_CountedLoop(), "must be");
79 CountedLoopNode *cl = lpt->_head->as_CountedLoop();
80
81 if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
82
83 if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
84
85 // Check for no control flow in body (other than exit)
86 Node *cl_exit = cl->loopexit();
87 if (cl_exit->in(0) != lpt->_head) return;
88
89 // Make sure the are no extra control users of the loop backedge
129 //
130 // 5) One of the memory references is picked to be an aligned vector reference.
131 // The pre-loop trip count is adjusted to align this reference in the
132 // unrolled body.
133 //
134 // 6) The initial set of pack pairs is seeded with memory references.
135 //
136 // 7) The set of pack pairs is extended by following use->def and def->use links.
137 //
138 // 8) The pairs are combined into vector sized packs.
139 //
140 // 9) Reorder the memory slices to co-locate members of the memory packs.
141 //
142 // 10) Generate ideal vector nodes for the final set of packs and where necessary,
143 // inserting scalar promotion, vector creation from multiple scalars, and
144 // extraction of scalar values from vectors.
145 //
146 void SuperWord::SLP_extract() {
147
148 // Ready the block
149 if (!construct_bb())
150 return; // Exit if no interesting nodes or complex graph.
151
152 dependence_graph();
153
154 compute_max_depth();
155
156 compute_vector_element_type();
157
158 // Attempt vectorization
159
160 find_adjacent_refs();
161
162 extend_packlist();
163
164 combine_packs();
165
166 construct_my_pack_map();
167
168 filter_packs();
623 assert(n->is_Mem(), err_msg_res("unexpected node %s", n->Name()));
624 n = n->in(MemNode::Memory);
625 }
626 }
627
628 //------------------------------stmts_can_pack---------------------------
629 // Can s1 and s2 be in a pack with s1 immediately preceding s2 and
630 // s1 aligned at "align"
631 bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
632
633 // Do not use superword for non-primitives
634 BasicType bt1 = velt_basic_type(s1);
635 BasicType bt2 = velt_basic_type(s2);
636 if(!is_java_primitive(bt1) || !is_java_primitive(bt2))
637 return false;
638 if (Matcher::max_vector_size(bt1) < 2) {
639 return false; // No vectors for this type
640 }
641
642 if (isomorphic(s1, s2)) {
643 if (independent(s1, s2) || reduction(s1, s2)) {
644 if (!exists_at(s1, 0) && !exists_at(s2, 1)) {
645 if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) {
646 int s1_align = alignment(s1);
647 int s2_align = alignment(s2);
648 if (s1_align == top_align || s1_align == align) {
649 if (s2_align == top_align || s2_align == align + data_size(s1)) {
650 return true;
651 }
652 }
653 }
654 }
655 }
656 }
657 return false;
658 }
659
660 //------------------------------exists_at---------------------------
661 // Does s exist in a pack at position pos?
662 bool SuperWord::exists_at(Node* s, uint pos) {
663 for (int i = 0; i < _packset.length(); i++) {
701 if (s1->in(0) != s2->in(0)) return false;
702 if (!same_velt_type(s1, s2)) return false;
703 return true;
704 }
705
706 //------------------------------independent---------------------------
707 // Is there no data path from s1 to s2 or s2 to s1?
708 bool SuperWord::independent(Node* s1, Node* s2) {
709 // assert(s1->Opcode() == s2->Opcode(), "check isomorphic first");
710 int d1 = depth(s1);
711 int d2 = depth(s2);
712 if (d1 == d2) return s1 != s2;
713 Node* deep = d1 > d2 ? s1 : s2;
714 Node* shallow = d1 > d2 ? s2 : s1;
715
716 visited_clear();
717
718 return independent_path(shallow, deep);
719 }
720
721 //------------------------------reduction---------------------------
722 // Is there a data path between s1 and s2 and the nodes reductions?
723 bool SuperWord::reduction(Node* s1, Node* s2) {
724 bool retValue = false;
725 int d1 = depth(s1);
726 int d2 = depth(s2);
727 if (d1 + 1 == d2) {
728 if (is_reduction(s1) && is_reduction(s2)) {
729 // This is an ordered set, so s1 should define s2
730 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
731 Node* t1 = s1->fast_out(i);
732 if (t1 == s2) {
733 // both nodes are reductions and connected
734 retValue = true;
735 }
736 }
737 }
738 }
739
740 return retValue;
741 }
742
743 //------------------------------independent_path------------------------------
744 // Helper for independent
745 bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) {
746 if (dp >= 1000) return false; // stop deep recursion
747 visited_set(deep);
748 int shal_depth = depth(shallow);
749 assert(shal_depth <= depth(deep), "must be");
750 for (DepPreds preds(deep, _dg); !preds.done(); preds.next()) {
751 Node* pred = preds.current();
752 if (in_bb(pred) && !visited_test(pred)) {
753 if (shallow == pred) {
754 return false;
755 }
756 if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) {
757 return false;
758 }
759 }
760 }
761 return true;
762 }
766 set_alignment(s1, align);
767 if (align == top_align || align == bottom_align) {
768 set_alignment(s2, align);
769 } else {
770 set_alignment(s2, align + data_size(s1));
771 }
772 }
773
774 //------------------------------data_size---------------------------
775 int SuperWord::data_size(Node* s) {
776 int bsize = type2aelembytes(velt_basic_type(s));
777 assert(bsize != 0, "valid size");
778 return bsize;
779 }
780
781 //------------------------------extend_packlist---------------------------
782 // Extend packset by following use->def and def->use links from pack members.
783 void SuperWord::extend_packlist() {
784 bool changed;
785 do {
786 _packset.sort(&packset_eval);
787 changed = false;
788 for (int i = 0; i < _packset.length(); i++) {
789 Node_List* p = _packset.at(i);
790 changed |= follow_use_defs(p);
791 changed |= follow_def_uses(p);
792 }
793 } while (changed);
794
795 if (_race_possible) {
796 for (int i = 0; i < _packset.length(); i++) {
797 Node_List* p = _packset.at(i);
798 order_def_uses(p);
799 }
800 }
801
802 #ifndef PRODUCT
803 if (TraceSuperWord) {
804 tty->print_cr("\nAfter extend_packlist");
805 print_packset();
806 }
807 #endif
808 }
809
810 //------------------------------follow_use_defs---------------------------
811 // Extend the packset by visiting operand definitions of nodes in pack p
812 bool SuperWord::follow_use_defs(Node_List* p) {
813 assert(p->size() == 2, "just checking");
814 Node* s1 = p->at(0);
815 Node* s2 = p->at(1);
816 assert(s1->req() == s2->req(), "just checking");
817 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
818
819 if (s1->is_Load()) return false;
820
821 int align = alignment(s1);
838 }
839 }
840 }
841 return changed;
842 }
843
844 //------------------------------follow_def_uses---------------------------
845 // Extend the packset by visiting uses of nodes in pack p
846 bool SuperWord::follow_def_uses(Node_List* p) {
847 bool changed = false;
848 Node* s1 = p->at(0);
849 Node* s2 = p->at(1);
850 assert(p->size() == 2, "just checking");
851 assert(s1->req() == s2->req(), "just checking");
852 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
853
854 if (s1->is_Store()) return false;
855
856 int align = alignment(s1);
857 int savings = -1;
858 int num_s1_uses = 0;
859 Node* u1 = NULL;
860 Node* u2 = NULL;
861 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
862 Node* t1 = s1->fast_out(i);
863 num_s1_uses++;
864 if (!in_bb(t1)) continue;
865 for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
866 Node* t2 = s2->fast_out(j);
867 if (!in_bb(t2)) continue;
868 if (!opnd_positions_match(s1, t1, s2, t2))
869 continue;
870 if (stmts_can_pack(t1, t2, align)) {
871 int my_savings = est_savings(t1, t2);
872 if (my_savings > savings) {
873 savings = my_savings;
874 u1 = t1;
875 u2 = t2;
876 }
877 }
878 }
879 }
880 if (num_s1_uses > 1) {
881 _race_possible = true;
882 }
883 if (savings >= 0) {
884 Node_List* pair = new Node_List();
885 pair->push(u1);
886 pair->push(u2);
887 _packset.append(pair);
888 set_alignment(u1, u2, align);
889 changed = true;
890 }
891 return changed;
892 }
893
894 //------------------------------order_def_uses---------------------------
895 // For extended packsets, ordinally arrange uses packset by major component
896 void SuperWord::order_def_uses(Node_List* p) {
897 Node* s1 = p->at(0);
898
899 if (s1->is_Store()) return;
900
901 // reductions are always managed beforehand
902 if (is_reduction(s1)) return;
903
904 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
905 Node* t1 = s1->fast_out(i);
906
907 // Only allow operand swap on commuting operations
908 if (!t1->is_Add() && !t1->is_Mul()) {
909 break;
910 }
911
912 // Now find t1's packset
913 Node_List* p2 = NULL;
914 for (int j = 0; j < _packset.length(); j++) {
915 p2 = _packset.at(j);
916 Node* first = p2->at(0);
917 if (t1 == first) {
918 break;
919 }
920 p2 = NULL;
921 }
922 // Arrange all sub components by the major component
923 if (p2 != NULL) {
924 for (uint j = 1; j < p->size(); j++) {
925 Node* d1 = p->at(j);
926 Node* u1 = p2->at(j);
927 opnd_positions_match(s1, t1, d1, u1);
928 }
929 }
930 }
931 }
932
933 //---------------------------opnd_positions_match-------------------------
934 // Is the use of d1 in u1 at the same operand position as d2 in u2?
935 bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) {
936 // check reductions to see if they are marshalled to represent the reduction
937 // operator in a specified opnd
938 if (is_reduction(u1) && is_reduction(u2)) {
939 // ensure reductions have phis and reduction definitions feeding the 1st operand
940 Node* first = u1->in(2);
941 if (first->is_Phi() || is_reduction(first)) {
942 u1->swap_edges(1, 2);
943 }
944 // ensure reductions have phis and reduction definitions feeding the 1st operand
945 first = u2->in(2);
946 if (first->is_Phi() || is_reduction(first)) {
947 u2->swap_edges(1, 2);
948 }
949 return true;
950 }
951
952 uint ct = u1->req();
953 if (ct != u2->req()) return false;
954 uint i1 = 0;
955 uint i2 = 0;
956 do {
957 for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break;
958 for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break;
959 if (i1 != i2) {
960 if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) {
961 // Further analysis relies on operands position matching.
962 u2->swap_edges(i1, i2);
963 } else {
964 return false;
965 }
966 }
967 } while (i1 < ct);
968 return true;
969 }
970
971 //------------------------------est_savings---------------------------
1013 if (ct < s2->outcnt()) save_use += unpack_cost(1);
1014
1015 return MAX2(save_in, save_use);
1016 }
1017
1018 //------------------------------costs---------------------------
1019 int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; }
1020 int SuperWord::pack_cost(int ct) { return ct; }
1021 int SuperWord::unpack_cost(int ct) { return ct; }
1022
1023 //------------------------------combine_packs---------------------------
1024 // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
1025 void SuperWord::combine_packs() {
1026 bool changed = true;
1027 // Combine packs regardless max vector size.
1028 while (changed) {
1029 changed = false;
1030 for (int i = 0; i < _packset.length(); i++) {
1031 Node_List* p1 = _packset.at(i);
1032 if (p1 == NULL) continue;
1033 // Because of sorting we can start at i + 1
1034 for (int j = i + 1; j < _packset.length(); j++) {
1035 Node_List* p2 = _packset.at(j);
1036 if (p2 == NULL) continue;
1037 if (i == j) continue;
1038 if (p1->at(p1->size()-1) == p2->at(0)) {
1039 for (uint k = 1; k < p2->size(); k++) {
1040 p1->push(p2->at(k));
1041 }
1042 _packset.at_put(j, NULL);
1043 changed = true;
1044 }
1045 }
1046 }
1047 }
1048
1049 // Split packs which have size greater then max vector size.
1050 for (int i = 0; i < _packset.length(); i++) {
1051 Node_List* p1 = _packset.at(i);
1052 if (p1 != NULL) {
1053 BasicType bt = velt_basic_type(p1->at(0));
1054 uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector
1141 }
1142 #endif
1143 remove_pack_at(i);
1144 changed = true;
1145 }
1146 }
1147 } while (changed);
1148
1149 #ifndef PRODUCT
1150 if (TraceSuperWord) {
1151 tty->print_cr("\nAfter filter_packs");
1152 print_packset();
1153 tty->cr();
1154 }
1155 #endif
1156 }
1157
1158 //------------------------------implemented---------------------------
1159 // Can code be generated for pack p?
1160 bool SuperWord::implemented(Node_List* p) {
1161 bool retValue = false;
1162 Node* p0 = p->at(0);
1163 if (p0 != NULL) {
1164 int opc = p0->Opcode();
1165 uint size = p->size();
1166 if (is_reduction(p0)) {
1167 const Type *arith_type = p0->bottom_type();
1168 retValue = ReductionNode::implemented(opc, size, arith_type->basic_type());
1169 } else {
1170 retValue = VectorNode::implemented(opc, size, velt_basic_type(p0));
1171 }
1172 }
1173 return retValue;
1174 }
1175
1176 //------------------------------same_inputs--------------------------
1177 // For pack p, are all idx operands the same?
1178 static bool same_inputs(Node_List* p, int idx) {
1179 Node* p0 = p->at(0);
1180 uint vlen = p->size();
1181 Node* p0_def = p0->in(idx);
1182 for (uint i = 1; i < vlen; i++) {
1183 Node* pi = p->at(i);
1184 Node* pi_def = pi->in(idx);
1185 if (p0_def != pi_def)
1186 return false;
1187 }
1188 return true;
1189 }
1190
1191 //------------------------------profitable---------------------------
1192 // For pack p, are all operands and all uses (with in the block) vector?
1193 bool SuperWord::profitable(Node_List* p) {
1194 Node* p0 = p->at(0);
1195 uint start, end;
1196 VectorNode::vector_operands(p0, &start, &end);
1197
1198 // Return false if some inputs are not vectors or vectors with different
1199 // size or alignment.
1200 // Also, for now, return false if not scalar promotion case when inputs are
1201 // the same. Later, implement PackNode and allow differing, non-vector inputs
1202 // (maybe just the ones from outside the block.)
1203 for (uint i = start; i < end; i++) {
1204 if (!is_vector_use(p0, i))
1205 return false;
1206 }
1207 // Check if reductions are connected
1208 if (is_reduction(p0)) {
1209 Node* second_in = p0->in(2);
1210 Node_List* second_pk = my_pack(second_in);
1211 if (second_pk == NULL) {
1212 // Remove reduction flag if no parent pack, it is not profitable
1213 p0->remove_flag(Node::Flag_has_reduction);
1214 return false;
1215 } else if (second_pk->size() != p->size()) {
1216 return false;
1217 }
1218 }
1219 if (VectorNode::is_shift(p0)) {
1220 // For now, return false if shift count is vector or not scalar promotion
1221 // case (different shift counts) because it is not supported yet.
1222 Node* cnt = p0->in(2);
1223 Node_List* cnt_pk = my_pack(cnt);
1224 if (cnt_pk != NULL)
1225 return false;
1226 if (!same_inputs(p, 2))
1227 return false;
1228 }
1229 if (!p0->is_Store()) {
1230 // For now, return false if not all uses are vector.
1231 // Later, implement ExtractNode and allow non-vector uses (maybe
1232 // just the ones outside the block.)
1233 for (uint i = 0; i < p->size(); i++) {
1234 Node* def = p->at(i);
1235 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1236 Node* use = def->fast_out(j);
1237 for (uint k = 0; k < use->req(); k++) {
1238 Node* n = use->in(k);
1239 if (def == n) {
1240 // reductions can be loop carried dependences
1241 if (is_reduction(def) && use->is_Phi())
1242 continue;
1243 if (!is_vector_use(use, k)) {
1244 return false;
1245 }
1246 }
1247 }
1248 }
1249 }
1250 }
1251 return true;
1252 }
1253
1254 //------------------------------schedule---------------------------
1255 // Adjust the memory graph for the packed operations
1256 void SuperWord::schedule() {
1257
1258 // Co-locate in the memory graph the members of each memory pack
1259 for (int i = 0; i < _packset.length(); i++) {
1260 co_locate_pack(_packset.at(i));
1261 }
1262 }
1507 mem = mem->in(MemNode::Memory);
1508 } else {
1509 break; // dependent memory
1510 }
1511 }
1512 Node* adr = low_adr->in(MemNode::Address);
1513 const TypePtr* atyp = n->adr_type();
1514 vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
1515 vlen_in_bytes = vn->as_LoadVector()->memory_size();
1516 } else if (n->is_Store()) {
1517 // Promote value to be stored to vector
1518 Node* val = vector_opd(p, MemNode::ValueIn);
1519 Node* ctl = n->in(MemNode::Control);
1520 Node* mem = first->in(MemNode::Memory);
1521 Node* adr = low_adr->in(MemNode::Address);
1522 const TypePtr* atyp = n->adr_type();
1523 vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
1524 vlen_in_bytes = vn->as_StoreVector()->memory_size();
1525 } else if (n->req() == 3) {
1526 // Promote operands to vector
1527 Node* in1 = NULL;
1528 bool node_isa_reduction = is_reduction(n);
1529 if (node_isa_reduction) {
1530 // the input to the first reduction operation is retained
1531 in1 = low_adr->in(1);
1532 } else {
1533 in1 = vector_opd(p, 1);
1534 }
1535 Node* in2 = vector_opd(p, 2);
1536 if (VectorNode::is_invariant_vector(in1) && (node_isa_reduction == false) && (n->is_Add() || n->is_Mul())) {
1537 // Move invariant vector input into second position to avoid register spilling.
1538 Node* tmp = in1;
1539 in1 = in2;
1540 in2 = tmp;
1541 }
1542 if (node_isa_reduction) {
1543 const Type *arith_type = n->bottom_type();
1544 vn = ReductionNode::make(opc, NULL, in1, in2, arith_type->basic_type());
1545 if (in2->is_Load()) {
1546 vlen_in_bytes = in2->as_LoadVector()->memory_size();
1547 } else {
1548 vlen_in_bytes = in2->as_Vector()->length_in_bytes();
1549 }
1550 } else {
1551 vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
1552 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
1553 }
1554 } else {
1555 ShouldNotReachHere();
1556 }
1557 assert(vn != NULL, "sanity");
1558 _igvn.register_new_node_with_optimizer(vn);
1559 _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
1560 for (uint j = 0; j < p->size(); j++) {
1561 Node* pm = p->at(j);
1562 _igvn.replace_node(pm, vn);
1563 }
1564 _igvn._worklist.push(vn);
1565
1566 if (vlen_in_bytes > max_vlen_in_bytes) {
1567 max_vlen_in_bytes = vlen_in_bytes;
1568 }
1569 #ifdef ASSERT
1570 if (TraceNewVectors) {
1571 tty->print("new Vector node: ");
1572 vn->dump();
1573 }
1673 Node* def = p->at(i);
1674 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1675 Node* use = def->fast_out(j);
1676 for (uint k = 0; k < use->req(); k++) {
1677 Node* n = use->in(k);
1678 if (def == n) {
1679 if (!is_vector_use(use, k)) {
1680 _n_idx_list.push(use, k);
1681 }
1682 }
1683 }
1684 }
1685 }
1686
1687 while (_n_idx_list.is_nonempty()) {
1688 Node* use = _n_idx_list.node();
1689 int idx = _n_idx_list.index();
1690 _n_idx_list.pop();
1691 Node* def = use->in(idx);
1692
1693 if (is_reduction(def)) continue;
1694
1695 // Insert extract operation
1696 _igvn.hash_delete(def);
1697 int def_pos = alignment(def) / data_size(def);
1698
1699 Node* ex = ExtractNode::make(def, def_pos, velt_basic_type(def));
1700 _igvn.register_new_node_with_optimizer(ex);
1701 _phase->set_ctrl(ex, _phase->get_ctrl(def));
1702 _igvn.replace_input_of(use, idx, ex);
1703 _igvn._worklist.push(def);
1704
1705 bb_insert_after(ex, bb_idx(def));
1706 set_velt_type(ex, velt_type(def));
1707 }
1708 }
1709
1710 //------------------------------is_vector_use---------------------------
1711 // Is use->in(u_idx) a vector use?
1712 bool SuperWord::is_vector_use(Node* use, int u_idx) {
1713 Node_List* u_pk = my_pack(use);
1714 if (u_pk == NULL) return false;
1715 if (is_reduction(use)) return true;
1716 Node* def = use->in(u_idx);
1717 Node_List* d_pk = my_pack(def);
1718 if (d_pk == NULL) {
1719 // check for scalar promotion
1720 Node* n = u_pk->at(0)->in(u_idx);
1721 for (uint i = 1; i < u_pk->size(); i++) {
1722 if (u_pk->at(i)->in(u_idx) != n) return false;
1723 }
1724 return true;
1725 }
1726 if (u_pk->size() != d_pk->size())
1727 return false;
1728 for (uint i = 0; i < u_pk->size(); i++) {
1729 Node* ui = u_pk->at(i);
1730 Node* di = d_pk->at(i);
1731 if (ui->in(u_idx) != di || alignment(ui) != alignment(di))
1732 return false;
1733 }
1734 return true;
1735 }
1736
1737 //------------------------------is_reduction---------------------------
1738 // Is the current node a reduction node?
1739 bool SuperWord::is_reduction(Node* n) {
1740 uint flags = n->flags();
1741 if ((flags & Node::Flag_has_reduction) == Node::Flag_has_reduction) {
1742 return true;
1743 }
1744 return false;
1745 }
1746
1747 //------------------------------construct_bb---------------------------
1748 // Construct reverse postorder list of block members
1749 bool SuperWord::construct_bb() {
1750 Node* entry = bb();
1751
1752 assert(_stk.length() == 0, "stk is empty");
1753 assert(_block.length() == 0, "block is empty");
1754 assert(_data_entry.length() == 0, "data_entry is empty");
1755 assert(_mem_slice_head.length() == 0, "mem_slice_head is empty");
1756 assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty");
1757
1758 // Find non-control nodes with no inputs from within block,
1759 // create a temporary map from node _idx to bb_idx for use
1760 // by the visited and post_visited sets,
1761 // and count number of nodes in block.
1762 int bb_ct = 0;
1763 for (uint i = 0; i < lpt()->_body.size(); i++) {
1764 Node *n = lpt()->_body.at(i);
1765 n->set_attr(0);
1766 set_bb_idx(n, i); // Create a temporary map
1767 if (in_bb(n)) {
1768 if (n->is_LoadStore() || n->is_MergeMem() ||
1769 (n->is_Proj() && !n->as_Proj()->is_CFG())) {
1770 // Bailout if the loop has LoadStore, MergeMem or data Proj
1771 // nodes. Superword optimization does not work with them.
1772 return false;
1773 }
1774 bb_ct++;
1775 if (!n->is_CFG()) {
1776 bool found = false;
1777 for (uint j = 0; j < n->req(); j++) {
1778 Node* def = n->in(j);
1779 if (def && in_bb(def)) {
1780 found = true;
1781 break;
1782 }
1783 }
1784 if (!found) {
1785 assert(n != entry, "can't be entry");
1805 }
1806 }
1807
1808 // Create an RPO list of nodes in block
1809
1810 visited_clear();
1811 post_visited_clear();
1812
1813 // Push all non-control nodes with no inputs from within block, then control entry
1814 for (int j = 0; j < _data_entry.length(); j++) {
1815 Node* n = _data_entry.at(j);
1816 visited_set(n);
1817 _stk.push(n);
1818 }
1819 visited_set(entry);
1820 _stk.push(entry);
1821
1822 // Do a depth first walk over out edges
1823 int rpo_idx = bb_ct - 1;
1824 int size;
1825 int reduction_uses = 0;
1826 while ((size = _stk.length()) > 0) {
1827 Node* n = _stk.top(); // Leave node on stack
1828 if (!visited_test_set(n)) {
1829 // forward arc in graph
1830 } else if (!post_visited_test(n)) {
1831 // cross or back arc
1832 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
1833 Node *use = n->fast_out(i);
1834 if (in_bb(use) && !visited_test(use) &&
1835 // Don't go around backedge
1836 (!use->is_Phi() || n == entry)) {
1837 if (is_reduction(use)) {
1838 // First see if we can map the reduction on the given system we are on, then
1839 // make a data entry operation for each reduction we see.
1840 const Type *arith_type = use->bottom_type();
1841 int vopc = ReductionNode::opcode(use->Opcode(), arith_type->basic_type());
1842 if (vopc != use->Opcode()) {
1843 if (Matcher::match_rule_supported(vopc)) {
1844 _stk.push(use);
1845 reduction_uses++;
1846 } else {
1847 // failed a support issue
1848 return false;
1849 }
1850 }
1851 } else {
1852 _stk.push(use);
1853 }
1854 }
1855 }
1856 if (_stk.length() == size) {
1857 // There were no additional uses, post visit node now
1858 _stk.pop(); // Remove node from stack
1859 assert(rpo_idx >= 0, "");
1860 _block.at_put_grow(rpo_idx, n);
1861 rpo_idx--;
1862 post_visited_set(n);
1863 assert(rpo_idx >= 0 || _stk.is_empty(), "");
1864 }
1865 } else {
1866 _stk.pop(); // Remove post-visited node from stack
1867 }
1868 }
1869
1870 // Create real map of block indices for nodes
1871 for (int j = 0; j < _block.length(); j++) {
1872 Node* n = _block.at(j);
1873 set_bb_idx(n, j);
1874 }
1875
1876 // Ensure extra info is allocated.
1877 initialize_bb();
1878
1879 #ifndef PRODUCT
1880 if (TraceSuperWord) {
1881 print_bb();
1882 tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE");
1883 for (int m = 0; m < _data_entry.length(); m++) {
1884 tty->print("%3d ", m);
1885 _data_entry.at(m)->dump();
1886 }
1887 tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE");
1888 for (int m = 0; m < _mem_slice_head.length(); m++) {
1889 tty->print("%3d ", m); _mem_slice_head.at(m)->dump();
1890 tty->print(" "); _mem_slice_tail.at(m)->dump();
1891 }
1892 }
1893 #endif
1894 assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found");
1895 return (_mem_slice_head.length() > 0) || (reduction_uses > 0) || (_data_entry.length() > 0);
1896 }
1897
1898 //------------------------------initialize_bb---------------------------
1899 // Initialize per node info
1900 void SuperWord::initialize_bb() {
1901 Node* last = _block.at(_block.length() - 1);
1902 grow_node_info(bb_idx(last));
1903 }
1904
1905 //------------------------------bb_insert_after---------------------------
1906 // Insert n into block after pos
1907 void SuperWord::bb_insert_after(Node* n, int pos) {
1908 int n_pos = pos + 1;
1909 // Make room
1910 for (int i = _block.length() - 1; i >= n_pos; i--) {
1911 _block.at_put_grow(i+1, _block.at(i));
1912 }
1913 for (int j = _node_info.length() - 1; j >= n_pos; j--) {
1914 _node_info.at_put_grow(j+1, _node_info.at(j));
1915 }
2106 //------------------------------in_pack---------------------------
2107 // Is s in pack p?
2108 Node_List* SuperWord::in_pack(Node* s, Node_List* p) {
2109 for (uint i = 0; i < p->size(); i++) {
2110 if (p->at(i) == s) {
2111 return p;
2112 }
2113 }
2114 return NULL;
2115 }
2116
2117 //------------------------------remove_pack_at---------------------------
2118 // Remove the pack at position pos in the packset
2119 void SuperWord::remove_pack_at(int pos) {
2120 Node_List* p = _packset.at(pos);
2121 for (uint i = 0; i < p->size(); i++) {
2122 Node* s = p->at(i);
2123 set_my_pack(s, NULL);
2124 }
2125 _packset.remove_at(pos);
2126 }
2127
2128 int SuperWord::packset_eval(Node_List** x, Node_List** y) {
2129 Node* a = (*x)->at(0);
2130 Node* b = (*y)->at(0);
2131 int retValue = 0;
2132 if ((a != NULL) && (b != NULL)) {
2133 int align_a = a->attr();
2134 int align_b = b->attr();
2135 if (align_a < align_b) retValue = -1;
2136 if (align_a > align_b) retValue = 1;
2137 }
2138 return retValue;
2139 }
2140
2141 //------------------------------executed_first---------------------------
2142 // Return the node executed first in pack p. Uses the RPO block list
2143 // to determine order.
2144 Node* SuperWord::executed_first(Node_List* p) {
2145 Node* n = p->at(0);
2146 int n_rpo = bb_idx(n);
2147 for (uint i = 1; i < p->size(); i++) {
2148 Node* s = p->at(i);
2149 int s_rpo = bb_idx(s);
2150 if (s_rpo < n_rpo) {
2151 n = s;
2152 n_rpo = s_rpo;
2153 }
2154 }
2155 return n;
2156 }
2157
2158 //------------------------------executed_last---------------------------
|