< prev index next >

src/share/vm/opto/superword.cpp

Print this page




  48   _igvn(phase->_igvn),
  49   _arena(phase->C->comp_arena()),
  50   _packset(arena(), 8,  0, NULL),         // packs for the current block
  51   _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
  52   _block(arena(), 8,  0, NULL),           // nodes in current block
  53   _data_entry(arena(), 8,  0, NULL),      // nodes with all inputs from outside
  54   _mem_slice_head(arena(), 8,  0, NULL),  // memory slice heads
  55   _mem_slice_tail(arena(), 8,  0, NULL),  // memory slice tails
  56   _node_info(arena(), 8,  0, SWNodeInfo::initial), // info needed per node
  57   _align_to_ref(NULL),                    // memory reference to align vectors to
  58   _disjoint_ptrs(arena(), 8,  0, OrderedPair::initial), // runtime disambiguated pointer pairs
  59   _dg(_arena),                            // dependence graph
  60   _visited(arena()),                      // visited node set
  61   _post_visited(arena()),                 // post visited node set
  62   _n_idx_list(arena(), 8),                // scratch list of (node,index) pairs
  63   _stk(arena(), 8, 0, NULL),              // scratch stack of nodes
  64   _nlist(arena(), 8, 0, NULL),            // scratch list of nodes
  65   _lpt(NULL),                             // loop tree node
  66   _lp(NULL),                              // LoopNode
  67   _bb(NULL),                              // basic block
  68   _iv(NULL)                               // induction var

  69 {}
  70 
  71 //------------------------------transform_loop---------------------------
  72 void SuperWord::transform_loop(IdealLoopTree* lpt) {
  73   assert(UseSuperWord, "should be");
  74   // Do vectors exist on this architecture?
  75   if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
  76 
  77   assert(lpt->_head->is_CountedLoop(), "must be");
  78   CountedLoopNode *cl = lpt->_head->as_CountedLoop();
  79 
  80   if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
  81 
  82   if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
  83 
  84   // Check for no control flow in body (other than exit)
  85   Node *cl_exit = cl->loopexit();
  86   if (cl_exit->in(0) != lpt->_head) return;
  87 
  88   // Make sure the are no extra control users of the loop backedge


 128 //
 129 // 5) One of the memory references is picked to be an aligned vector reference.
 130 //    The pre-loop trip count is adjusted to align this reference in the
 131 //    unrolled body.
 132 //
 133 // 6) The initial set of pack pairs is seeded with memory references.
 134 //
 135 // 7) The set of pack pairs is extended by following use->def and def->use links.
 136 //
 137 // 8) The pairs are combined into vector sized packs.
 138 //
 139 // 9) Reorder the memory slices to co-locate members of the memory packs.
 140 //
 141 // 10) Generate ideal vector nodes for the final set of packs and where necessary,
 142 //    inserting scalar promotion, vector creation from multiple scalars, and
 143 //    extraction of scalar values from vectors.
 144 //
 145 void SuperWord::SLP_extract() {
 146 
 147   // Ready the block
 148 
 149   if (!construct_bb())
 150     return; // Exit if no interesting nodes or complex graph.
 151 
 152   dependence_graph();
 153 
 154   compute_max_depth();
 155 
 156   compute_vector_element_type();
 157 
 158   // Attempt vectorization
 159 
 160   find_adjacent_refs();
 161 
 162   extend_packlist();
 163 
 164   combine_packs();
 165 
 166   construct_my_pack_map();
 167 
 168   filter_packs();


 623     assert(n->is_Mem(), err_msg_res("unexpected node %s", n->Name()));
 624     n = n->in(MemNode::Memory);
 625   }
 626 }
 627 
 628 //------------------------------stmts_can_pack---------------------------
 629 // Can s1 and s2 be in a pack with s1 immediately preceding s2 and
 630 // s1 aligned at "align"
 631 bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
 632 
 633   // Do not use superword for non-primitives
 634   BasicType bt1 = velt_basic_type(s1);
 635   BasicType bt2 = velt_basic_type(s2);
 636   if(!is_java_primitive(bt1) || !is_java_primitive(bt2))
 637     return false;
 638   if (Matcher::max_vector_size(bt1) < 2) {
 639     return false; // No vectors for this type
 640   }
 641 
 642   if (isomorphic(s1, s2)) {
 643     if (independent(s1, s2)) {
 644       if (!exists_at(s1, 0) && !exists_at(s2, 1)) {
 645         if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) {
 646           int s1_align = alignment(s1);
 647           int s2_align = alignment(s2);
 648           if (s1_align == top_align || s1_align == align) {
 649             if (s2_align == top_align || s2_align == align + data_size(s1)) {
 650               return true;
 651             }
 652           }
 653         }
 654       }
 655     }
 656   }
 657   return false;
 658 }
 659 
 660 //------------------------------exists_at---------------------------
 661 // Does s exist in a pack at position pos?
 662 bool SuperWord::exists_at(Node* s, uint pos) {
 663   for (int i = 0; i < _packset.length(); i++) {


 701   if (s1->in(0) != s2->in(0)) return false;
 702   if (!same_velt_type(s1, s2)) return false;
 703   return true;
 704 }
 705 
 706 //------------------------------independent---------------------------
 707 // Is there no data path from s1 to s2 or s2 to s1?
 708 bool SuperWord::independent(Node* s1, Node* s2) {
 709   //  assert(s1->Opcode() == s2->Opcode(), "check isomorphic first");
 710   int d1 = depth(s1);
 711   int d2 = depth(s2);
 712   if (d1 == d2) return s1 != s2;
 713   Node* deep    = d1 > d2 ? s1 : s2;
 714   Node* shallow = d1 > d2 ? s2 : s1;
 715 
 716   visited_clear();
 717 
 718   return independent_path(shallow, deep);
 719 }
 720 






















 721 //------------------------------independent_path------------------------------
 722 // Helper for independent
 723 bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) {
 724   if (dp >= 1000) return false; // stop deep recursion
 725   visited_set(deep);
 726   int shal_depth = depth(shallow);
 727   assert(shal_depth <= depth(deep), "must be");
 728   for (DepPreds preds(deep, _dg); !preds.done(); preds.next()) {
 729     Node* pred = preds.current();
 730     if (in_bb(pred) && !visited_test(pred)) {
 731       if (shallow == pred) {
 732         return false;
 733       }
 734       if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) {
 735         return false;
 736       }
 737     }
 738   }
 739   return true;
 740 }


 744   set_alignment(s1, align);
 745   if (align == top_align || align == bottom_align) {
 746     set_alignment(s2, align);
 747   } else {
 748     set_alignment(s2, align + data_size(s1));
 749   }
 750 }
 751 
 752 //------------------------------data_size---------------------------
 753 int SuperWord::data_size(Node* s) {
 754   int bsize = type2aelembytes(velt_basic_type(s));
 755   assert(bsize != 0, "valid size");
 756   return bsize;
 757 }
 758 
 759 //------------------------------extend_packlist---------------------------
 760 // Extend packset by following use->def and def->use links from pack members.
 761 void SuperWord::extend_packlist() {
 762   bool changed;
 763   do {

 764     changed = false;
 765     for (int i = 0; i < _packset.length(); i++) {
 766       Node_List* p = _packset.at(i);
 767       changed |= follow_use_defs(p);
 768       changed |= follow_def_uses(p);
 769     }
 770   } while (changed);
 771 







 772 #ifndef PRODUCT
 773   if (TraceSuperWord) {
 774     tty->print_cr("\nAfter extend_packlist");
 775     print_packset();
 776   }
 777 #endif
 778 }
 779 
 780 //------------------------------follow_use_defs---------------------------
 781 // Extend the packset by visiting operand definitions of nodes in pack p
 782 bool SuperWord::follow_use_defs(Node_List* p) {
 783   assert(p->size() == 2, "just checking");
 784   Node* s1 = p->at(0);
 785   Node* s2 = p->at(1);
 786   assert(s1->req() == s2->req(), "just checking");
 787   assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
 788 
 789   if (s1->is_Load()) return false;
 790 
 791   int align = alignment(s1);


 808       }
 809     }
 810   }
 811   return changed;
 812 }
 813 
 814 //------------------------------follow_def_uses---------------------------
 815 // Extend the packset by visiting uses of nodes in pack p
 816 bool SuperWord::follow_def_uses(Node_List* p) {
 817   bool changed = false;
 818   Node* s1 = p->at(0);
 819   Node* s2 = p->at(1);
 820   assert(p->size() == 2, "just checking");
 821   assert(s1->req() == s2->req(), "just checking");
 822   assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
 823 
 824   if (s1->is_Store()) return false;
 825 
 826   int align = alignment(s1);
 827   int savings = -1;

 828   Node* u1 = NULL;
 829   Node* u2 = NULL;
 830   for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
 831     Node* t1 = s1->fast_out(i);

 832     if (!in_bb(t1)) continue;
 833     for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
 834       Node* t2 = s2->fast_out(j);
 835       if (!in_bb(t2)) continue;
 836       if (!opnd_positions_match(s1, t1, s2, t2))
 837         continue;
 838       if (stmts_can_pack(t1, t2, align)) {
 839         int my_savings = est_savings(t1, t2);
 840         if (my_savings > savings) {
 841           savings = my_savings;
 842           u1 = t1;
 843           u2 = t2;
 844         }
 845       }
 846     }
 847   }



 848   if (savings >= 0) {
 849     Node_List* pair = new Node_List();
 850     pair->push(u1);
 851     pair->push(u2);
 852     _packset.append(pair);
 853     set_alignment(u1, u2, align);
 854     changed = true;
 855   }
 856   return changed;
 857 }
 858 







































 859 //---------------------------opnd_positions_match-------------------------
 860 // Is the use of d1 in u1 at the same operand position as d2 in u2?
 861 bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) {
















 862   uint ct = u1->req();
 863   if (ct != u2->req()) return false;
 864   uint i1 = 0;
 865   uint i2 = 0;
 866   do {
 867     for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break;
 868     for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break;
 869     if (i1 != i2) {
 870       if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) {
 871         // Further analysis relies on operands position matching.
 872         u2->swap_edges(i1, i2);
 873       } else {
 874         return false;
 875       }
 876     }
 877   } while (i1 < ct);
 878   return true;
 879 }
 880 
 881 //------------------------------est_savings---------------------------


 923   if (ct < s2->outcnt()) save_use += unpack_cost(1);
 924 
 925   return MAX2(save_in, save_use);
 926 }
 927 
 928 //------------------------------costs---------------------------
 929 int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; }
 930 int SuperWord::pack_cost(int ct)   { return ct; }
 931 int SuperWord::unpack_cost(int ct) { return ct; }
 932 
 933 //------------------------------combine_packs---------------------------
 934 // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
 935 void SuperWord::combine_packs() {
 936   bool changed = true;
 937   // Combine packs regardless max vector size.
 938   while (changed) {
 939     changed = false;
 940     for (int i = 0; i < _packset.length(); i++) {
 941       Node_List* p1 = _packset.at(i);
 942       if (p1 == NULL) continue;
 943       for (int j = 0; j < _packset.length(); j++) {

 944         Node_List* p2 = _packset.at(j);
 945         if (p2 == NULL) continue;
 946         if (i == j) continue;
 947         if (p1->at(p1->size()-1) == p2->at(0)) {
 948           for (uint k = 1; k < p2->size(); k++) {
 949             p1->push(p2->at(k));
 950           }
 951           _packset.at_put(j, NULL);
 952           changed = true;
 953         }
 954       }
 955     }
 956   }
 957 
 958   // Split packs which have size greater then max vector size.
 959   for (int i = 0; i < _packset.length(); i++) {
 960     Node_List* p1 = _packset.at(i);
 961     if (p1 != NULL) {
 962       BasicType bt = velt_basic_type(p1->at(0));
 963       uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector


1050         }
1051 #endif
1052         remove_pack_at(i);
1053         changed = true;
1054       }
1055     }
1056   } while (changed);
1057 
1058 #ifndef PRODUCT
1059   if (TraceSuperWord) {
1060     tty->print_cr("\nAfter filter_packs");
1061     print_packset();
1062     tty->cr();
1063   }
1064 #endif
1065 }
1066 
1067 //------------------------------implemented---------------------------
1068 // Can code be generated for pack p?
1069 bool SuperWord::implemented(Node_List* p) {

1070   Node* p0 = p->at(0);
1071   return VectorNode::implemented(p0->Opcode(), p->size(), velt_basic_type(p0));










1072 }
1073 
1074 //------------------------------same_inputs--------------------------
1075 // For pack p, are all idx operands the same?
1076 static bool same_inputs(Node_List* p, int idx) {
1077   Node* p0 = p->at(0);
1078   uint vlen = p->size();
1079   Node* p0_def = p0->in(idx);
1080   for (uint i = 1; i < vlen; i++) {
1081     Node* pi = p->at(i);
1082     Node* pi_def = pi->in(idx);
1083     if (p0_def != pi_def)
1084       return false;
1085   }
1086   return true;
1087 }
1088 
1089 //------------------------------profitable---------------------------
1090 // For pack p, are all operands and all uses (with in the block) vector?
1091 bool SuperWord::profitable(Node_List* p) {
1092   Node* p0 = p->at(0);
1093   uint start, end;
1094   VectorNode::vector_operands(p0, &start, &end);
1095 
1096   // Return false if some inputs are not vectors or vectors with different
1097   // size or alignment.
1098   // Also, for now, return false if not scalar promotion case when inputs are
1099   // the same. Later, implement PackNode and allow differing, non-vector inputs
1100   // (maybe just the ones from outside the block.)
1101   for (uint i = start; i < end; i++) {
1102     if (!is_vector_use(p0, i))
1103       return false;
1104   }












1105   if (VectorNode::is_shift(p0)) {
1106     // For now, return false if shift count is vector or not scalar promotion
1107     // case (different shift counts) because it is not supported yet.
1108     Node* cnt = p0->in(2);
1109     Node_List* cnt_pk = my_pack(cnt);
1110     if (cnt_pk != NULL)
1111       return false;
1112     if (!same_inputs(p, 2))
1113       return false;
1114   }
1115   if (!p0->is_Store()) {
1116     // For now, return false if not all uses are vector.
1117     // Later, implement ExtractNode and allow non-vector uses (maybe
1118     // just the ones outside the block.)
1119     for (uint i = 0; i < p->size(); i++) {
1120       Node* def = p->at(i);
1121       for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1122         Node* use = def->fast_out(j);
1123         for (uint k = 0; k < use->req(); k++) {
1124           Node* n = use->in(k);
1125           if (def == n) {



1126             if (!is_vector_use(use, k)) {
1127               return false;
1128             }
1129           }
1130         }
1131       }
1132     }
1133   }
1134   return true;
1135 }
1136 
1137 //------------------------------schedule---------------------------
1138 // Adjust the memory graph for the packed operations
1139 void SuperWord::schedule() {
1140 
1141   // Co-locate in the memory graph the members of each memory pack
1142   for (int i = 0; i < _packset.length(); i++) {
1143     co_locate_pack(_packset.at(i));
1144   }
1145 }


1390             mem = mem->in(MemNode::Memory);
1391           } else {
1392             break; // dependent memory
1393           }
1394         }
1395         Node* adr = low_adr->in(MemNode::Address);
1396         const TypePtr* atyp = n->adr_type();
1397         vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
1398         vlen_in_bytes = vn->as_LoadVector()->memory_size();
1399       } else if (n->is_Store()) {
1400         // Promote value to be stored to vector
1401         Node* val = vector_opd(p, MemNode::ValueIn);
1402         Node* ctl = n->in(MemNode::Control);
1403         Node* mem = first->in(MemNode::Memory);
1404         Node* adr = low_adr->in(MemNode::Address);
1405         const TypePtr* atyp = n->adr_type();
1406         vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
1407         vlen_in_bytes = vn->as_StoreVector()->memory_size();
1408       } else if (n->req() == 3) {
1409         // Promote operands to vector
1410         Node* in1 = vector_opd(p, 1);







1411         Node* in2 = vector_opd(p, 2);
1412         if (VectorNode::is_invariant_vector(in1) && (n->is_Add() || n->is_Mul())) {
1413           // Move invariant vector input into second position to avoid register spilling.
1414           Node* tmp = in1;
1415           in1 = in2;
1416           in2 = tmp;
1417         }









1418         vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
1419         vlen_in_bytes = vn->as_Vector()->length_in_bytes();

1420       } else {
1421         ShouldNotReachHere();
1422       }
1423       assert(vn != NULL, "sanity");
1424       _igvn.register_new_node_with_optimizer(vn);
1425       _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
1426       for (uint j = 0; j < p->size(); j++) {
1427         Node* pm = p->at(j);
1428         _igvn.replace_node(pm, vn);
1429       }
1430       _igvn._worklist.push(vn);
1431 
1432       if (vlen_in_bytes > max_vlen_in_bytes) {
1433         max_vlen_in_bytes = vlen_in_bytes;
1434       }
1435 #ifdef ASSERT
1436       if (TraceNewVectors) {
1437         tty->print("new Vector node: ");
1438         vn->dump();
1439       }


1539     Node* def = p->at(i);
1540     for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1541       Node* use = def->fast_out(j);
1542       for (uint k = 0; k < use->req(); k++) {
1543         Node* n = use->in(k);
1544         if (def == n) {
1545           if (!is_vector_use(use, k)) {
1546             _n_idx_list.push(use, k);
1547           }
1548         }
1549       }
1550     }
1551   }
1552 
1553   while (_n_idx_list.is_nonempty()) {
1554     Node* use = _n_idx_list.node();
1555     int   idx = _n_idx_list.index();
1556     _n_idx_list.pop();
1557     Node* def = use->in(idx);
1558 


1559     // Insert extract operation
1560     _igvn.hash_delete(def);
1561     int def_pos = alignment(def) / data_size(def);
1562 
1563     Node* ex = ExtractNode::make(def, def_pos, velt_basic_type(def));
1564     _igvn.register_new_node_with_optimizer(ex);
1565     _phase->set_ctrl(ex, _phase->get_ctrl(def));
1566     _igvn.replace_input_of(use, idx, ex);
1567     _igvn._worklist.push(def);
1568 
1569     bb_insert_after(ex, bb_idx(def));
1570     set_velt_type(ex, velt_type(def));
1571   }
1572 }
1573 
1574 //------------------------------is_vector_use---------------------------
1575 // Is use->in(u_idx) a vector use?
1576 bool SuperWord::is_vector_use(Node* use, int u_idx) {
1577   Node_List* u_pk = my_pack(use);
1578   if (u_pk == NULL) return false;

1579   Node* def = use->in(u_idx);
1580   Node_List* d_pk = my_pack(def);
1581   if (d_pk == NULL) {
1582     // check for scalar promotion
1583     Node* n = u_pk->at(0)->in(u_idx);
1584     for (uint i = 1; i < u_pk->size(); i++) {
1585       if (u_pk->at(i)->in(u_idx) != n) return false;
1586     }
1587     return true;
1588   }
1589   if (u_pk->size() != d_pk->size())
1590     return false;
1591   for (uint i = 0; i < u_pk->size(); i++) {
1592     Node* ui = u_pk->at(i);
1593     Node* di = d_pk->at(i);
1594     if (ui->in(u_idx) != di || alignment(ui) != alignment(di))
1595       return false;
1596   }
1597   return true;
1598 }
1599 










1600 //------------------------------construct_bb---------------------------
1601 // Construct reverse postorder list of block members
1602 bool SuperWord::construct_bb() {
1603   Node* entry = bb();
1604 
1605   assert(_stk.length() == 0,            "stk is empty");
1606   assert(_block.length() == 0,          "block is empty");
1607   assert(_data_entry.length() == 0,     "data_entry is empty");
1608   assert(_mem_slice_head.length() == 0, "mem_slice_head is empty");
1609   assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty");
1610 
1611   // Find non-control nodes with no inputs from within block,
1612   // create a temporary map from node _idx to bb_idx for use
1613   // by the visited and post_visited sets,
1614   // and count number of nodes in block.
1615   int bb_ct = 0;
1616   for (uint i = 0; i < lpt()->_body.size(); i++ ) {
1617     Node *n = lpt()->_body.at(i);

1618     set_bb_idx(n, i); // Create a temporary map
1619     if (in_bb(n)) {
1620       if (n->is_LoadStore() || n->is_MergeMem() ||
1621           (n->is_Proj() && !n->as_Proj()->is_CFG())) {
1622         // Bailout if the loop has LoadStore, MergeMem or data Proj
1623         // nodes. Superword optimization does not work with them.
1624         return false;
1625       }
1626       bb_ct++;
1627       if (!n->is_CFG()) {
1628         bool found = false;
1629         for (uint j = 0; j < n->req(); j++) {
1630           Node* def = n->in(j);
1631           if (def && in_bb(def)) {
1632             found = true;
1633             break;
1634           }
1635         }
1636         if (!found) {
1637           assert(n != entry, "can't be entry");


1657     }
1658   }
1659 
1660   // Create an RPO list of nodes in block
1661 
1662   visited_clear();
1663   post_visited_clear();
1664 
1665   // Push all non-control nodes with no inputs from within block, then control entry
1666   for (int j = 0; j < _data_entry.length(); j++) {
1667     Node* n = _data_entry.at(j);
1668     visited_set(n);
1669     _stk.push(n);
1670   }
1671   visited_set(entry);
1672   _stk.push(entry);
1673 
1674   // Do a depth first walk over out edges
1675   int rpo_idx = bb_ct - 1;
1676   int size;

1677   while ((size = _stk.length()) > 0) {
1678     Node* n = _stk.top(); // Leave node on stack
1679     if (!visited_test_set(n)) {
1680       // forward arc in graph
1681     } else if (!post_visited_test(n)) {
1682       // cross or back arc
1683       for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
1684         Node *use = n->fast_out(i);
1685         if (in_bb(use) && !visited_test(use) &&
1686             // Don't go around backedge
1687             (!use->is_Phi() || n == entry)) {















1688           _stk.push(use);
1689         }
1690       }

1691       if (_stk.length() == size) {
1692         // There were no additional uses, post visit node now
1693         _stk.pop(); // Remove node from stack
1694         assert(rpo_idx >= 0, "");
1695         _block.at_put_grow(rpo_idx, n);
1696         rpo_idx--;
1697         post_visited_set(n);
1698         assert(rpo_idx >= 0 || _stk.is_empty(), "");
1699       }
1700     } else {
1701       _stk.pop(); // Remove post-visited node from stack
1702     }
1703   }
1704 
1705   // Create real map of block indices for nodes
1706   for (int j = 0; j < _block.length(); j++) {
1707     Node* n = _block.at(j);
1708     set_bb_idx(n, j);
1709   }
1710 
1711   initialize_bb(); // Ensure extra info is allocated.

1712 
1713 #ifndef PRODUCT
1714   if (TraceSuperWord) {
1715     print_bb();
1716     tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE");
1717     for (int m = 0; m < _data_entry.length(); m++) {
1718       tty->print("%3d ", m);
1719       _data_entry.at(m)->dump();
1720     }
1721     tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE");
1722     for (int m = 0; m < _mem_slice_head.length(); m++) {
1723       tty->print("%3d ", m); _mem_slice_head.at(m)->dump();
1724       tty->print("    ");    _mem_slice_tail.at(m)->dump();
1725     }
1726   }
1727 #endif
1728   assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found");
1729   return (_mem_slice_head.length() > 0) || (_data_entry.length() > 0);
1730 }
1731 
1732 //------------------------------initialize_bb---------------------------
1733 // Initialize per node info
1734 void SuperWord::initialize_bb() {
1735   Node* last = _block.at(_block.length() - 1);
1736   grow_node_info(bb_idx(last));
1737 }
1738 
1739 //------------------------------bb_insert_after---------------------------
1740 // Insert n into block after pos
1741 void SuperWord::bb_insert_after(Node* n, int pos) {
1742   int n_pos = pos + 1;
1743   // Make room
1744   for (int i = _block.length() - 1; i >= n_pos; i--) {
1745     _block.at_put_grow(i+1, _block.at(i));
1746   }
1747   for (int j = _node_info.length() - 1; j >= n_pos; j--) {
1748     _node_info.at_put_grow(j+1, _node_info.at(j));
1749   }


1940 //------------------------------in_pack---------------------------
1941 // Is s in pack p?
1942 Node_List* SuperWord::in_pack(Node* s, Node_List* p) {
1943   for (uint i = 0; i < p->size(); i++) {
1944     if (p->at(i) == s) {
1945       return p;
1946     }
1947   }
1948   return NULL;
1949 }
1950 
1951 //------------------------------remove_pack_at---------------------------
1952 // Remove the pack at position pos in the packset
1953 void SuperWord::remove_pack_at(int pos) {
1954   Node_List* p = _packset.at(pos);
1955   for (uint i = 0; i < p->size(); i++) {
1956     Node* s = p->at(i);
1957     set_my_pack(s, NULL);
1958   }
1959   _packset.remove_at(pos);













1960 }
1961 
1962 //------------------------------executed_first---------------------------
1963 // Return the node executed first in pack p.  Uses the RPO block list
1964 // to determine order.
1965 Node* SuperWord::executed_first(Node_List* p) {
1966   Node* n = p->at(0);
1967   int n_rpo = bb_idx(n);
1968   for (uint i = 1; i < p->size(); i++) {
1969     Node* s = p->at(i);
1970     int s_rpo = bb_idx(s);
1971     if (s_rpo < n_rpo) {
1972       n = s;
1973       n_rpo = s_rpo;
1974     }
1975   }
1976   return n;
1977 }
1978 
1979 //------------------------------executed_last---------------------------




  48   _igvn(phase->_igvn),
  49   _arena(phase->C->comp_arena()),
  50   _packset(arena(), 8,  0, NULL),         // packs for the current block
  51   _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
  52   _block(arena(), 8,  0, NULL),           // nodes in current block
  53   _data_entry(arena(), 8,  0, NULL),      // nodes with all inputs from outside
  54   _mem_slice_head(arena(), 8,  0, NULL),  // memory slice heads
  55   _mem_slice_tail(arena(), 8,  0, NULL),  // memory slice tails
  56   _node_info(arena(), 8,  0, SWNodeInfo::initial), // info needed per node
  57   _align_to_ref(NULL),                    // memory reference to align vectors to
  58   _disjoint_ptrs(arena(), 8,  0, OrderedPair::initial), // runtime disambiguated pointer pairs
  59   _dg(_arena),                            // dependence graph
  60   _visited(arena()),                      // visited node set
  61   _post_visited(arena()),                 // post visited node set
  62   _n_idx_list(arena(), 8),                // scratch list of (node,index) pairs
  63   _stk(arena(), 8, 0, NULL),              // scratch stack of nodes
  64   _nlist(arena(), 8, 0, NULL),            // scratch list of nodes
  65   _lpt(NULL),                             // loop tree node
  66   _lp(NULL),                              // LoopNode
  67   _bb(NULL),                              // basic block
  68   _iv(NULL),                              // induction var
  69   _race_possible(false)                   // cases where SDMU is true
  70 {}
  71 
  72 //------------------------------transform_loop---------------------------
  73 void SuperWord::transform_loop(IdealLoopTree* lpt) {
  74   assert(UseSuperWord, "should be");
  75   // Do vectors exist on this architecture?
  76   if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
  77 
  78   assert(lpt->_head->is_CountedLoop(), "must be");
  79   CountedLoopNode *cl = lpt->_head->as_CountedLoop();
  80 
  81   if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
  82 
  83   if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
  84 
  85   // Check for no control flow in body (other than exit)
  86   Node *cl_exit = cl->loopexit();
  87   if (cl_exit->in(0) != lpt->_head) return;
  88 
  89   // Make sure the are no extra control users of the loop backedge


 129 //
 130 // 5) One of the memory references is picked to be an aligned vector reference.
 131 //    The pre-loop trip count is adjusted to align this reference in the
 132 //    unrolled body.
 133 //
 134 // 6) The initial set of pack pairs is seeded with memory references.
 135 //
 136 // 7) The set of pack pairs is extended by following use->def and def->use links.
 137 //
 138 // 8) The pairs are combined into vector sized packs.
 139 //
 140 // 9) Reorder the memory slices to co-locate members of the memory packs.
 141 //
 142 // 10) Generate ideal vector nodes for the final set of packs and where necessary,
 143 //    inserting scalar promotion, vector creation from multiple scalars, and
 144 //    extraction of scalar values from vectors.
 145 //
 146 void SuperWord::SLP_extract() {
 147 
 148   // Ready the block

 149   if (!construct_bb())
 150     return; // Exit if no interesting nodes or complex graph.
 151 
 152   dependence_graph();
 153 
 154   compute_max_depth();
 155 
 156   compute_vector_element_type();
 157 
 158   // Attempt vectorization
 159 
 160   find_adjacent_refs();
 161 
 162   extend_packlist();
 163 
 164   combine_packs();
 165 
 166   construct_my_pack_map();
 167 
 168   filter_packs();


 623     assert(n->is_Mem(), err_msg_res("unexpected node %s", n->Name()));
 624     n = n->in(MemNode::Memory);
 625   }
 626 }
 627 
 628 //------------------------------stmts_can_pack---------------------------
 629 // Can s1 and s2 be in a pack with s1 immediately preceding s2 and
 630 // s1 aligned at "align"
 631 bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
 632 
 633   // Do not use superword for non-primitives
 634   BasicType bt1 = velt_basic_type(s1);
 635   BasicType bt2 = velt_basic_type(s2);
 636   if(!is_java_primitive(bt1) || !is_java_primitive(bt2))
 637     return false;
 638   if (Matcher::max_vector_size(bt1) < 2) {
 639     return false; // No vectors for this type
 640   }
 641 
 642   if (isomorphic(s1, s2)) {
 643     if (independent(s1, s2) || reduction(s1, s2)) {
 644       if (!exists_at(s1, 0) && !exists_at(s2, 1)) {
 645         if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) {
 646           int s1_align = alignment(s1);
 647           int s2_align = alignment(s2);
 648           if (s1_align == top_align || s1_align == align) {
 649             if (s2_align == top_align || s2_align == align + data_size(s1)) {
 650               return true;
 651             }
 652           }
 653         }
 654       }
 655     }
 656   }
 657   return false;
 658 }
 659 
 660 //------------------------------exists_at---------------------------
 661 // Does s exist in a pack at position pos?
 662 bool SuperWord::exists_at(Node* s, uint pos) {
 663   for (int i = 0; i < _packset.length(); i++) {


 701   if (s1->in(0) != s2->in(0)) return false;
 702   if (!same_velt_type(s1, s2)) return false;
 703   return true;
 704 }
 705 
 706 //------------------------------independent---------------------------
 707 // Is there no data path from s1 to s2 or s2 to s1?
 708 bool SuperWord::independent(Node* s1, Node* s2) {
 709   //  assert(s1->Opcode() == s2->Opcode(), "check isomorphic first");
 710   int d1 = depth(s1);
 711   int d2 = depth(s2);
 712   if (d1 == d2) return s1 != s2;
 713   Node* deep    = d1 > d2 ? s1 : s2;
 714   Node* shallow = d1 > d2 ? s2 : s1;
 715 
 716   visited_clear();
 717 
 718   return independent_path(shallow, deep);
 719 }
 720 
 721 //------------------------------reduction---------------------------
 722 // Is there a data path between s1 and s2 and the nodes reductions?
 723 bool SuperWord::reduction(Node* s1, Node* s2) {
 724   bool retValue = false;
 725   int d1 = depth(s1);
 726   int d2 = depth(s2);
 727   if (d1 + 1 == d2) {
 728     if (is_reduction(s1) && is_reduction(s2)) {
 729       // This is an ordered set, so s1 should define s2
 730       for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
 731         Node* t1 = s1->fast_out(i);
 732         if (t1 == s2) {
 733           // both nodes are reductions and connected
 734           retValue = true;
 735         }
 736       }
 737     }
 738   }
 739 
 740   return retValue;
 741 }
 742 
 743 //------------------------------independent_path------------------------------
 744 // Helper for independent
 745 bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) {
 746   if (dp >= 1000) return false; // stop deep recursion
 747   visited_set(deep);
 748   int shal_depth = depth(shallow);
 749   assert(shal_depth <= depth(deep), "must be");
 750   for (DepPreds preds(deep, _dg); !preds.done(); preds.next()) {
 751     Node* pred = preds.current();
 752     if (in_bb(pred) && !visited_test(pred)) {
 753       if (shallow == pred) {
 754         return false;
 755       }
 756       if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) {
 757         return false;
 758       }
 759     }
 760   }
 761   return true;
 762 }


 766   set_alignment(s1, align);
 767   if (align == top_align || align == bottom_align) {
 768     set_alignment(s2, align);
 769   } else {
 770     set_alignment(s2, align + data_size(s1));
 771   }
 772 }
 773 
 774 //------------------------------data_size---------------------------
 775 int SuperWord::data_size(Node* s) {
 776   int bsize = type2aelembytes(velt_basic_type(s));
 777   assert(bsize != 0, "valid size");
 778   return bsize;
 779 }
 780 
 781 //------------------------------extend_packlist---------------------------
 782 // Extend packset by following use->def and def->use links from pack members.
 783 void SuperWord::extend_packlist() {
 784   bool changed;
 785   do {
 786     _packset.sort(&packset_eval);
 787     changed = false;
 788     for (int i = 0; i < _packset.length(); i++) {
 789       Node_List* p = _packset.at(i);
 790       changed |= follow_use_defs(p);
 791       changed |= follow_def_uses(p);
 792     }
 793   } while (changed);
 794 
 795   if (_race_possible) {
 796     for (int i = 0; i < _packset.length(); i++) {
 797       Node_List* p = _packset.at(i);
 798       order_def_uses(p);
 799     }
 800   }
 801 
 802 #ifndef PRODUCT
 803   if (TraceSuperWord) {
 804     tty->print_cr("\nAfter extend_packlist");
 805     print_packset();
 806   }
 807 #endif
 808 }
 809 
 810 //------------------------------follow_use_defs---------------------------
 811 // Extend the packset by visiting operand definitions of nodes in pack p
 812 bool SuperWord::follow_use_defs(Node_List* p) {
 813   assert(p->size() == 2, "just checking");
 814   Node* s1 = p->at(0);
 815   Node* s2 = p->at(1);
 816   assert(s1->req() == s2->req(), "just checking");
 817   assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
 818 
 819   if (s1->is_Load()) return false;
 820 
 821   int align = alignment(s1);


 838       }
 839     }
 840   }
 841   return changed;
 842 }
 843 
 844 //------------------------------follow_def_uses---------------------------
 845 // Extend the packset by visiting uses of nodes in pack p
 846 bool SuperWord::follow_def_uses(Node_List* p) {
 847   bool changed = false;
 848   Node* s1 = p->at(0);
 849   Node* s2 = p->at(1);
 850   assert(p->size() == 2, "just checking");
 851   assert(s1->req() == s2->req(), "just checking");
 852   assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
 853 
 854   if (s1->is_Store()) return false;
 855 
 856   int align = alignment(s1);
 857   int savings = -1;
 858   int num_s1_uses = 0;
 859   Node* u1 = NULL;
 860   Node* u2 = NULL;
 861   for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
 862     Node* t1 = s1->fast_out(i);
 863     num_s1_uses++;
 864     if (!in_bb(t1)) continue;
 865     for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
 866       Node* t2 = s2->fast_out(j);
 867       if (!in_bb(t2)) continue;
 868       if (!opnd_positions_match(s1, t1, s2, t2))
 869         continue;
 870       if (stmts_can_pack(t1, t2, align)) {
 871         int my_savings = est_savings(t1, t2);
 872         if (my_savings > savings) {
 873           savings = my_savings;
 874           u1 = t1;
 875           u2 = t2;
 876         }
 877       }
 878     }
 879   }
 880   if (num_s1_uses > 1) {
 881     _race_possible = true;
 882   }
 883   if (savings >= 0) {
 884     Node_List* pair = new Node_List();
 885     pair->push(u1);
 886     pair->push(u2);
 887     _packset.append(pair);
 888     set_alignment(u1, u2, align);
 889     changed = true;
 890   }
 891   return changed;
 892 }
 893 
 894 //------------------------------order_def_uses---------------------------
 895 // For extended packsets, ordinally arrange uses packset by major component
 896 void SuperWord::order_def_uses(Node_List* p) {
 897   Node* s1 = p->at(0);
 898 
 899   if (s1->is_Store()) return;
 900 
 901   // reductions are always managed beforehand
 902   if (is_reduction(s1)) return;
 903 
 904   for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
 905     Node* t1 = s1->fast_out(i);
 906 
 907     // Only allow operand swap on commuting operations
 908     if (!t1->is_Add() && !t1->is_Mul()) {
 909       break;
 910     }
 911 
 912     // Now find t1's packset
 913     Node_List* p2 = NULL;
 914     for (int j = 0; j < _packset.length(); j++) {
 915       p2 = _packset.at(j);
 916       Node* first = p2->at(0);
 917       if (t1 == first) {
 918         break;
 919       }
 920       p2 = NULL;
 921     }
 922     // Arrange all sub components by the major component
 923     if (p2 != NULL) {
 924       for (uint j = 1; j < p->size(); j++) {
 925         Node* d1 = p->at(j);
 926         Node* u1 = p2->at(j);
 927         opnd_positions_match(s1, t1, d1, u1);
 928       }
 929     }
 930   }
 931 }
 932 
 933 //---------------------------opnd_positions_match-------------------------
 934 // Is the use of d1 in u1 at the same operand position as d2 in u2?
 935 bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) {
 936   // check reductions to see if they are marshalled to represent the reduction 
 937   // operator in a specified opnd
 938   if (is_reduction(u1) && is_reduction(u2)) {
 939     // ensure reductions have phis and reduction definitions feeding the 1st operand
 940     Node* first = u1->in(2);
 941     if (first->is_Phi() || is_reduction(first)) {
 942       u1->swap_edges(1, 2);
 943     }
 944     // ensure reductions have phis and reduction definitions feeding the 1st operand
 945     first = u2->in(2);
 946     if (first->is_Phi() || is_reduction(first)) {
 947       u2->swap_edges(1, 2);
 948     }
 949     return true;
 950   }
 951 
 952   uint ct = u1->req();
 953   if (ct != u2->req()) return false;
 954   uint i1 = 0;
 955   uint i2 = 0;
 956   do {
 957     for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break;
 958     for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break;
 959     if (i1 != i2) {
 960       if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) {
 961         // Further analysis relies on operands position matching.
 962         u2->swap_edges(i1, i2);
 963       } else {
 964         return false;
 965       }
 966     }
 967   } while (i1 < ct);
 968   return true;
 969 }
 970 
 971 //------------------------------est_savings---------------------------


1013   if (ct < s2->outcnt()) save_use += unpack_cost(1);
1014 
1015   return MAX2(save_in, save_use);
1016 }
1017 
1018 //------------------------------costs---------------------------
1019 int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; }
1020 int SuperWord::pack_cost(int ct)   { return ct; }
1021 int SuperWord::unpack_cost(int ct) { return ct; }
1022 
1023 //------------------------------combine_packs---------------------------
1024 // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
1025 void SuperWord::combine_packs() {
1026   bool changed = true;
1027   // Combine packs regardless max vector size.
1028   while (changed) {
1029     changed = false;
1030     for (int i = 0; i < _packset.length(); i++) {
1031       Node_List* p1 = _packset.at(i);
1032       if (p1 == NULL) continue;
1033       // Because of sorting we can start at i + 1
1034       for (int j = i + 1; j < _packset.length(); j++) {
1035         Node_List* p2 = _packset.at(j);
1036         if (p2 == NULL) continue;
1037         if (i == j) continue;
1038         if (p1->at(p1->size()-1) == p2->at(0)) {
1039           for (uint k = 1; k < p2->size(); k++) {
1040             p1->push(p2->at(k));
1041           }
1042           _packset.at_put(j, NULL);
1043           changed = true;
1044         }
1045       }
1046     }
1047   }
1048 
1049   // Split packs which have size greater then max vector size.
1050   for (int i = 0; i < _packset.length(); i++) {
1051     Node_List* p1 = _packset.at(i);
1052     if (p1 != NULL) {
1053       BasicType bt = velt_basic_type(p1->at(0));
1054       uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector


1141         }
1142 #endif
1143         remove_pack_at(i);
1144         changed = true;
1145       }
1146     }
1147   } while (changed);
1148 
1149 #ifndef PRODUCT
1150   if (TraceSuperWord) {
1151     tty->print_cr("\nAfter filter_packs");
1152     print_packset();
1153     tty->cr();
1154   }
1155 #endif
1156 }
1157 
1158 //------------------------------implemented---------------------------
1159 // Can code be generated for pack p?
1160 bool SuperWord::implemented(Node_List* p) {
1161   bool retValue = false;
1162   Node* p0 = p->at(0);
1163   if (p0 != NULL) {
1164     int opc = p0->Opcode();
1165     uint size = p->size();
1166     if (is_reduction(p0)) {
1167       const Type *arith_type = p0->bottom_type();
1168       retValue = ReductionNode::implemented(opc, size, arith_type->basic_type());
1169     } else {
1170       retValue = VectorNode::implemented(opc, size, velt_basic_type(p0));
1171     }
1172   }
1173   return retValue;
1174 }
1175 
1176 //------------------------------same_inputs--------------------------
1177 // For pack p, are all idx operands the same?
1178 static bool same_inputs(Node_List* p, int idx) {
1179   Node* p0 = p->at(0);
1180   uint vlen = p->size();
1181   Node* p0_def = p0->in(idx);
1182   for (uint i = 1; i < vlen; i++) {
1183     Node* pi = p->at(i);
1184     Node* pi_def = pi->in(idx);
1185     if (p0_def != pi_def)
1186       return false;
1187   }
1188   return true;
1189 }
1190 
1191 //------------------------------profitable---------------------------
1192 // For pack p, are all operands and all uses (with in the block) vector?
1193 bool SuperWord::profitable(Node_List* p) {
1194   Node* p0 = p->at(0);
1195   uint start, end;
1196   VectorNode::vector_operands(p0, &start, &end);
1197 
1198   // Return false if some inputs are not vectors or vectors with different
1199   // size or alignment.
1200   // Also, for now, return false if not scalar promotion case when inputs are
1201   // the same. Later, implement PackNode and allow differing, non-vector inputs
1202   // (maybe just the ones from outside the block.)
1203   for (uint i = start; i < end; i++) {
1204     if (!is_vector_use(p0, i))
1205       return false;
1206   }
1207   // Check if reductions are connected
1208   if (is_reduction(p0)) {
1209     Node* second_in = p0->in(2);
1210     Node_List* second_pk = my_pack(second_in);
1211     if (second_pk == NULL) {
1212       // Remove reduction flag if no parent pack, it is not profitable
1213       p0->remove_flag(Node::Flag_has_reduction);
1214       return false;
1215     } else if (second_pk->size() != p->size()) {
1216       return false;
1217     }
1218   }
1219   if (VectorNode::is_shift(p0)) {
1220     // For now, return false if shift count is vector or not scalar promotion
1221     // case (different shift counts) because it is not supported yet.
1222     Node* cnt = p0->in(2);
1223     Node_List* cnt_pk = my_pack(cnt);
1224     if (cnt_pk != NULL)
1225       return false;
1226     if (!same_inputs(p, 2))
1227       return false;
1228   }
1229   if (!p0->is_Store()) {
1230     // For now, return false if not all uses are vector.
1231     // Later, implement ExtractNode and allow non-vector uses (maybe
1232     // just the ones outside the block.)
1233     for (uint i = 0; i < p->size(); i++) {
1234       Node* def = p->at(i);
1235       for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1236         Node* use = def->fast_out(j);
1237         for (uint k = 0; k < use->req(); k++) {
1238           Node* n = use->in(k);
1239           if (def == n) {
1240             // reductions can be loop carried dependences
1241             if (is_reduction(def) && use->is_Phi())
1242               continue;
1243             if (!is_vector_use(use, k)) {
1244               return false;
1245             }
1246           }
1247         }
1248       }
1249     }
1250   }
1251   return true;
1252 }
1253 
1254 //------------------------------schedule---------------------------
1255 // Adjust the memory graph for the packed operations
1256 void SuperWord::schedule() {
1257 
1258   // Co-locate in the memory graph the members of each memory pack
1259   for (int i = 0; i < _packset.length(); i++) {
1260     co_locate_pack(_packset.at(i));
1261   }
1262 }


1507             mem = mem->in(MemNode::Memory);
1508           } else {
1509             break; // dependent memory
1510           }
1511         }
1512         Node* adr = low_adr->in(MemNode::Address);
1513         const TypePtr* atyp = n->adr_type();
1514         vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
1515         vlen_in_bytes = vn->as_LoadVector()->memory_size();
1516       } else if (n->is_Store()) {
1517         // Promote value to be stored to vector
1518         Node* val = vector_opd(p, MemNode::ValueIn);
1519         Node* ctl = n->in(MemNode::Control);
1520         Node* mem = first->in(MemNode::Memory);
1521         Node* adr = low_adr->in(MemNode::Address);
1522         const TypePtr* atyp = n->adr_type();
1523         vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
1524         vlen_in_bytes = vn->as_StoreVector()->memory_size();
1525       } else if (n->req() == 3) {
1526         // Promote operands to vector
1527         Node* in1 = NULL;
1528         bool node_isa_reduction = is_reduction(n);
1529         if (node_isa_reduction) {
1530           // the input to the first reduction operation is retained
1531           in1 = low_adr->in(1);
1532         } else {
1533           in1 = vector_opd(p, 1);
1534         }
1535         Node* in2 = vector_opd(p, 2);
1536         if (VectorNode::is_invariant_vector(in1) && (node_isa_reduction == false) && (n->is_Add() || n->is_Mul())) {
1537           // Move invariant vector input into second position to avoid register spilling.
1538           Node* tmp = in1;
1539           in1 = in2;
1540           in2 = tmp;
1541         }
1542         if (node_isa_reduction) {
1543           const Type *arith_type = n->bottom_type();
1544           vn = ReductionNode::make(opc, NULL, in1, in2, arith_type->basic_type());
1545           if (in2->is_Load()) {
1546             vlen_in_bytes = in2->as_LoadVector()->memory_size();
1547           } else {
1548             vlen_in_bytes = in2->as_Vector()->length_in_bytes();
1549           }
1550         } else {
1551           vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
1552           vlen_in_bytes = vn->as_Vector()->length_in_bytes();
1553         }
1554       } else {
1555         ShouldNotReachHere();
1556       }
1557       assert(vn != NULL, "sanity");
1558       _igvn.register_new_node_with_optimizer(vn);
1559       _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
1560       for (uint j = 0; j < p->size(); j++) {
1561         Node* pm = p->at(j);
1562         _igvn.replace_node(pm, vn);
1563       }
1564       _igvn._worklist.push(vn);
1565 
1566       if (vlen_in_bytes > max_vlen_in_bytes) {
1567         max_vlen_in_bytes = vlen_in_bytes;
1568       }
1569 #ifdef ASSERT
1570       if (TraceNewVectors) {
1571         tty->print("new Vector node: ");
1572         vn->dump();
1573       }


1673     Node* def = p->at(i);
1674     for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1675       Node* use = def->fast_out(j);
1676       for (uint k = 0; k < use->req(); k++) {
1677         Node* n = use->in(k);
1678         if (def == n) {
1679           if (!is_vector_use(use, k)) {
1680             _n_idx_list.push(use, k);
1681           }
1682         }
1683       }
1684     }
1685   }
1686 
1687   while (_n_idx_list.is_nonempty()) {
1688     Node* use = _n_idx_list.node();
1689     int   idx = _n_idx_list.index();
1690     _n_idx_list.pop();
1691     Node* def = use->in(idx);
1692 
1693     if (is_reduction(def)) continue;
1694 
1695     // Insert extract operation
1696     _igvn.hash_delete(def);
1697     int def_pos = alignment(def) / data_size(def);
1698 
1699     Node* ex = ExtractNode::make(def, def_pos, velt_basic_type(def));
1700     _igvn.register_new_node_with_optimizer(ex);
1701     _phase->set_ctrl(ex, _phase->get_ctrl(def));
1702     _igvn.replace_input_of(use, idx, ex);
1703     _igvn._worklist.push(def);
1704 
1705     bb_insert_after(ex, bb_idx(def));
1706     set_velt_type(ex, velt_type(def));
1707   }
1708 }
1709 
1710 //------------------------------is_vector_use---------------------------
1711 // Is use->in(u_idx) a vector use?
1712 bool SuperWord::is_vector_use(Node* use, int u_idx) {
1713   Node_List* u_pk = my_pack(use);
1714   if (u_pk == NULL) return false;
1715   if (is_reduction(use)) return true;
1716   Node* def = use->in(u_idx);
1717   Node_List* d_pk = my_pack(def);
1718   if (d_pk == NULL) {
1719     // check for scalar promotion
1720     Node* n = u_pk->at(0)->in(u_idx);
1721     for (uint i = 1; i < u_pk->size(); i++) {
1722       if (u_pk->at(i)->in(u_idx) != n) return false;
1723     }
1724     return true;
1725   }
1726   if (u_pk->size() != d_pk->size())
1727     return false;
1728   for (uint i = 0; i < u_pk->size(); i++) {
1729     Node* ui = u_pk->at(i);
1730     Node* di = d_pk->at(i);
1731     if (ui->in(u_idx) != di || alignment(ui) != alignment(di))
1732       return false;
1733   }
1734   return true;
1735 }
1736 
1737 //------------------------------is_reduction---------------------------
1738 // Is the current node a reduction node?
1739 bool SuperWord::is_reduction(Node* n) {
1740   uint flags = n->flags();
1741   if ((flags & Node::Flag_has_reduction) == Node::Flag_has_reduction) {
1742     return true;
1743   }
1744   return false;
1745 }
1746 
1747 //------------------------------construct_bb---------------------------
1748 // Construct reverse postorder list of block members
1749 bool SuperWord::construct_bb() {
1750   Node* entry = bb();
1751 
1752   assert(_stk.length() == 0,            "stk is empty");
1753   assert(_block.length() == 0,          "block is empty");
1754   assert(_data_entry.length() == 0,     "data_entry is empty");
1755   assert(_mem_slice_head.length() == 0, "mem_slice_head is empty");
1756   assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty");
1757 
1758   // Find non-control nodes with no inputs from within block,
1759   // create a temporary map from node _idx to bb_idx for use
1760   // by the visited and post_visited sets,
1761   // and count number of nodes in block.
1762   int bb_ct = 0;
1763   for (uint i = 0; i < lpt()->_body.size(); i++) {
1764     Node *n = lpt()->_body.at(i);
1765     n->set_attr(0);
1766     set_bb_idx(n, i); // Create a temporary map
1767     if (in_bb(n)) {
1768       if (n->is_LoadStore() || n->is_MergeMem() ||
1769           (n->is_Proj() && !n->as_Proj()->is_CFG())) {
1770         // Bailout if the loop has LoadStore, MergeMem or data Proj
1771         // nodes. Superword optimization does not work with them.
1772         return false;
1773       }
1774       bb_ct++;
1775       if (!n->is_CFG()) {
1776         bool found = false;
1777         for (uint j = 0; j < n->req(); j++) {
1778           Node* def = n->in(j);
1779           if (def && in_bb(def)) {
1780             found = true;
1781             break;
1782           }
1783         }
1784         if (!found) {
1785           assert(n != entry, "can't be entry");


1805     }
1806   }
1807 
1808   // Create an RPO list of nodes in block
1809 
1810   visited_clear();
1811   post_visited_clear();
1812 
1813   // Push all non-control nodes with no inputs from within block, then control entry
1814   for (int j = 0; j < _data_entry.length(); j++) {
1815     Node* n = _data_entry.at(j);
1816     visited_set(n);
1817     _stk.push(n);
1818   }
1819   visited_set(entry);
1820   _stk.push(entry);
1821 
1822   // Do a depth first walk over out edges
1823   int rpo_idx = bb_ct - 1;
1824   int size;
1825   int reduction_uses = 0;
1826   while ((size = _stk.length()) > 0) {
1827     Node* n = _stk.top(); // Leave node on stack
1828     if (!visited_test_set(n)) {
1829       // forward arc in graph
1830     } else if (!post_visited_test(n)) {
1831       // cross or back arc
1832       for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
1833         Node *use = n->fast_out(i);
1834         if (in_bb(use) && !visited_test(use) &&
1835             // Don't go around backedge
1836             (!use->is_Phi() || n == entry)) {
1837           if (is_reduction(use)) {
1838             // First see if we can map the reduction on the given system we are on, then
1839             // make a data entry operation for each reduction we see.
1840             const Type *arith_type = use->bottom_type();
1841             int vopc = ReductionNode::opcode(use->Opcode(), arith_type->basic_type());
1842             if (vopc != use->Opcode()) {
1843               if (Matcher::match_rule_supported(vopc)) {
1844                 _stk.push(use);
1845                 reduction_uses++;
1846               } else {
1847                 // failed a support issue
1848                 return false;
1849               }
1850             }
1851           } else {
1852             _stk.push(use);
1853           }
1854         }
1855       }
1856       if (_stk.length() == size) {
1857         // There were no additional uses, post visit node now
1858         _stk.pop(); // Remove node from stack
1859         assert(rpo_idx >= 0, "");
1860         _block.at_put_grow(rpo_idx, n);
1861         rpo_idx--;
1862         post_visited_set(n);
1863         assert(rpo_idx >= 0 || _stk.is_empty(), "");
1864       }
1865     } else {
1866       _stk.pop(); // Remove post-visited node from stack
1867     }
1868   }
1869 
1870   // Create real map of block indices for nodes
1871   for (int j = 0; j < _block.length(); j++) {
1872     Node* n = _block.at(j);
1873     set_bb_idx(n, j);
1874   }
1875 
1876   // Ensure extra info is allocated.
1877   initialize_bb();
1878 
1879 #ifndef PRODUCT
1880   if (TraceSuperWord) {
1881     print_bb();
1882     tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE");
1883     for (int m = 0; m < _data_entry.length(); m++) {
1884       tty->print("%3d ", m);
1885       _data_entry.at(m)->dump();
1886     }
1887     tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE");
1888     for (int m = 0; m < _mem_slice_head.length(); m++) {
1889       tty->print("%3d ", m); _mem_slice_head.at(m)->dump();
1890       tty->print("    ");    _mem_slice_tail.at(m)->dump();
1891     }
1892   }
1893 #endif
1894   assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found");
1895   return (_mem_slice_head.length() > 0) || (reduction_uses > 0) || (_data_entry.length() > 0);
1896 }
1897 
1898 //------------------------------initialize_bb---------------------------
1899 // Initialize per node info
1900 void SuperWord::initialize_bb() {
1901   Node* last = _block.at(_block.length() - 1);
1902   grow_node_info(bb_idx(last));
1903 }
1904 
1905 //------------------------------bb_insert_after---------------------------
1906 // Insert n into block after pos
1907 void SuperWord::bb_insert_after(Node* n, int pos) {
1908   int n_pos = pos + 1;
1909   // Make room
1910   for (int i = _block.length() - 1; i >= n_pos; i--) {
1911     _block.at_put_grow(i+1, _block.at(i));
1912   }
1913   for (int j = _node_info.length() - 1; j >= n_pos; j--) {
1914     _node_info.at_put_grow(j+1, _node_info.at(j));
1915   }


2106 //------------------------------in_pack---------------------------
2107 // Is s in pack p?
2108 Node_List* SuperWord::in_pack(Node* s, Node_List* p) {
2109   for (uint i = 0; i < p->size(); i++) {
2110     if (p->at(i) == s) {
2111       return p;
2112     }
2113   }
2114   return NULL;
2115 }
2116 
2117 //------------------------------remove_pack_at---------------------------
2118 // Remove the pack at position pos in the packset
2119 void SuperWord::remove_pack_at(int pos) {
2120   Node_List* p = _packset.at(pos);
2121   for (uint i = 0; i < p->size(); i++) {
2122     Node* s = p->at(i);
2123     set_my_pack(s, NULL);
2124   }
2125   _packset.remove_at(pos);
2126 }
2127 
2128 int SuperWord::packset_eval(Node_List** x, Node_List** y) {
2129   Node* a = (*x)->at(0);
2130   Node* b = (*y)->at(0);
2131   int retValue = 0;
2132   if ((a != NULL) && (b != NULL)) {
2133     int align_a = a->attr();
2134     int align_b = b->attr();
2135     if (align_a < align_b) retValue = -1;
2136     if (align_a > align_b) retValue = 1;
2137   }
2138   return retValue;
2139 }
2140 
2141 //------------------------------executed_first---------------------------
2142 // Return the node executed first in pack p.  Uses the RPO block list
2143 // to determine order.
2144 Node* SuperWord::executed_first(Node_List* p) {
2145   Node* n = p->at(0);
2146   int n_rpo = bb_idx(n);
2147   for (uint i = 1; i < p->size(); i++) {
2148     Node* s = p->at(i);
2149     int s_rpo = bb_idx(s);
2150     if (s_rpo < n_rpo) {
2151       n = s;
2152       n_rpo = s_rpo;
2153     }
2154   }
2155   return n;
2156 }
2157 
2158 //------------------------------executed_last---------------------------


< prev index next >