< prev index next >

src/share/vm/opto/superword.cpp

Print this page




  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 #include "precompiled.hpp"
  25 #include "compiler/compileLog.hpp"
  26 #include "libadt/vectset.hpp"
  27 #include "memory/allocation.inline.hpp"
  28 #include "opto/addnode.hpp"
  29 #include "opto/callnode.hpp"
  30 #include "opto/castnode.hpp"
  31 #include "opto/convertnode.hpp"
  32 #include "opto/divnode.hpp"
  33 #include "opto/matcher.hpp"
  34 #include "opto/memnode.hpp"
  35 #include "opto/mulnode.hpp"
  36 #include "opto/opcodes.hpp"
  37 #include "opto/opaquenode.hpp"
  38 #include "opto/superword.hpp"
  39 #include "opto/vectornode.hpp"

  40 
  41 //
  42 //                  S U P E R W O R D   T R A N S F O R M
  43 //=============================================================================
  44 
  45 //------------------------------SuperWord---------------------------
  46 SuperWord::SuperWord(PhaseIdealLoop* phase) :
  47   _phase(phase),
  48   _igvn(phase->_igvn),
  49   _arena(phase->C->comp_arena()),
  50   _packset(arena(), 8,  0, NULL),         // packs for the current block
  51   _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
  52   _block(arena(), 8,  0, NULL),           // nodes in current block
  53   _data_entry(arena(), 8,  0, NULL),      // nodes with all inputs from outside
  54   _mem_slice_head(arena(), 8,  0, NULL),  // memory slice heads
  55   _mem_slice_tail(arena(), 8,  0, NULL),  // memory slice tails
  56   _node_info(arena(), 8,  0, SWNodeInfo::initial), // info needed per node
  57   _clone_map(phase->C->clone_map()),      // map of nodes created in cloning

  58   _align_to_ref(NULL),                    // memory reference to align vectors to
  59   _disjoint_ptrs(arena(), 8,  0, OrderedPair::initial), // runtime disambiguated pointer pairs
  60   _dg(_arena),                            // dependence graph
  61   _visited(arena()),                      // visited node set
  62   _post_visited(arena()),                 // post visited node set
  63   _n_idx_list(arena(), 8),                // scratch list of (node,index) pairs
  64   _stk(arena(), 8, 0, NULL),              // scratch stack of nodes
  65   _nlist(arena(), 8, 0, NULL),            // scratch list of nodes
  66   _lpt(NULL),                             // loop tree node
  67   _lp(NULL),                              // LoopNode
  68   _bb(NULL),                              // basic block
  69   _iv(NULL),                              // induction var
  70   _race_possible(false),                  // cases where SDMU is true
  71   _early_return(true),                    // analysis evaluations routine
  72   _num_work_vecs(0),                      // amount of vector work we have
  73   _num_reductions(0),                     // amount of reduction work we have
  74   _do_vector_loop(phase->C->do_vector_loop()),  // whether to do vectorization/simd style

  75   _ii_first(-1),                          // first loop generation index - only if do_vector_loop()
  76   _ii_last(-1),                           // last loop generation index - only if do_vector_loop()
  77   _ii_order(arena(), 8, 0, 0)
  78 {
  79 #ifndef PRODUCT
  80   _vector_loop_debug = 0;
  81   if (_phase->C->method() != NULL) {
  82     _phase->C->method()->has_option_value("VectorizeDebug", _vector_loop_debug);
  83   }
  84   _CountedLoopReserveKit_debug = 0;
  85   if (_phase->C->method() != NULL) {
  86     _phase->C->method()->has_option_value("DoReserveCopyInSuperWordDebug", _CountedLoopReserveKit_debug);
  87   }
  88 #endif
  89 }
  90 
  91 //------------------------------transform_loop---------------------------
  92 void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
  93   assert(UseSuperWord, "should be");
  94   // Do vectors exist on this architecture?
  95   if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
  96 
  97   assert(lpt->_head->is_CountedLoop(), "must be");
  98   CountedLoopNode *cl = lpt->_head->as_CountedLoop();
  99 
 100   if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
 101 
 102   if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
 103   // Check for no control flow in body (other than exit)
 104   Node *cl_exit = cl->loopexit();
 105   if (cl_exit->in(0) != lpt->_head) return;











 106 
 107   // Make sure the are no extra control users of the loop backedge
 108   if (cl->back_control()->outcnt() != 1) {
 109     return;
 110   }
 111 
 112   // We only re-enter slp when we vector mapped a queried loop and we want to
 113   // continue unrolling, in this case, slp is not subsequently done.
 114   if (cl->do_unroll_only()) return;
 115 
 116   // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
 117   CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
 118   if (pre_end == NULL) return;
 119   Node *pre_opaq1 = pre_end->limit();
 120   if (pre_opaq1->Opcode() != Op_Opaque1) return;
 121 
 122   init(); // initialize data structures
 123 
 124   set_lpt(lpt);
 125   set_lp(cl);


 374 
 375   find_adjacent_refs();
 376 
 377   extend_packlist();
 378 
 379   if (_do_vector_loop) {
 380     if (_packset.length() == 0) {
 381 #ifndef PRODUCT
 382       if (TraceSuperWord) {
 383         tty->print_cr("\nSuperWord::_do_vector_loop DFA could not build packset, now trying to build anyway");
 384       }
 385 #endif
 386       pack_parallel();
 387     }
 388   }
 389 
 390   combine_packs();
 391 
 392   construct_my_pack_map();
 393 




 394   filter_packs();
 395 
 396   schedule();
 397 
 398   output();
 399 }
 400 
 401 //------------------------------find_adjacent_refs---------------------------
 402 // Find the adjacent memory references and create pack pairs for them.
 403 // This is the initial set of packs that will then be extended by
 404 // following use->def and def->use links.  The align positions are
 405 // assigned relative to the reference "align_to_ref"
 406 void SuperWord::find_adjacent_refs() {
 407   // Get list of memory operations
 408   Node_List memops;
 409   for (int i = 0; i < _block.length(); i++) {
 410     Node* n = _block.at(i);
 411     if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) &&
 412         is_java_primitive(n->as_Mem()->memory_type())) {
 413       int align = memory_alignment(n->as_Mem(), 0);


1054       if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) {
1055         return false;
1056       }
1057     }
1058   }
1059   return true;
1060 }
1061 
1062 //------------------------------set_alignment---------------------------
1063 void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
1064   set_alignment(s1, align);
1065   if (align == top_align || align == bottom_align) {
1066     set_alignment(s2, align);
1067   } else {
1068     set_alignment(s2, align + data_size(s1));
1069   }
1070 }
1071 
1072 //------------------------------data_size---------------------------
1073 int SuperWord::data_size(Node* s) {











1074   int bsize = type2aelembytes(velt_basic_type(s));
1075   assert(bsize != 0, "valid size");
1076   return bsize;
1077 }
1078 
1079 //------------------------------extend_packlist---------------------------
1080 // Extend packset by following use->def and def->use links from pack members.
1081 void SuperWord::extend_packlist() {
1082   bool changed;
1083   do {
1084     packset_sort(_packset.length());
1085     changed = false;
1086     for (int i = 0; i < _packset.length(); i++) {
1087       Node_List* p = _packset.at(i);
1088       changed |= follow_use_defs(p);
1089       changed |= follow_def_uses(p);
1090     }
1091   } while (changed);
1092 
1093   if (_race_possible) {


1100 #ifndef PRODUCT
1101   if (TraceSuperWord) {
1102     tty->print_cr("\nAfter extend_packlist");
1103     print_packset();
1104   }
1105 #endif
1106 }
1107 
1108 //------------------------------follow_use_defs---------------------------
1109 // Extend the packset by visiting operand definitions of nodes in pack p
1110 bool SuperWord::follow_use_defs(Node_List* p) {
1111   assert(p->size() == 2, "just checking");
1112   Node* s1 = p->at(0);
1113   Node* s2 = p->at(1);
1114   assert(s1->req() == s2->req(), "just checking");
1115   assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
1116 
1117   if (s1->is_Load()) return false;
1118 
1119   int align = alignment(s1);

1120   bool changed = false;
1121   int start = s1->is_Store() ? MemNode::ValueIn   : 1;
1122   int end   = s1->is_Store() ? MemNode::ValueIn+1 : s1->req();
1123   for (int j = start; j < end; j++) {
1124     Node* t1 = s1->in(j);
1125     Node* t2 = s2->in(j);
1126     if (!in_bb(t1) || !in_bb(t2))
1127       continue;
1128     if (stmts_can_pack(t1, t2, align)) {
1129       if (est_savings(t1, t2) >= 0) {
1130         Node_List* pair = new Node_List();
1131         pair->push(t1);
1132         pair->push(t2);
1133         _packset.append(pair);

1134         set_alignment(t1, t2, align);
1135         changed = true;
1136       }
1137     }
1138   }
1139   return changed;
1140 }
1141 
1142 //------------------------------follow_def_uses---------------------------
1143 // Extend the packset by visiting uses of nodes in pack p
1144 bool SuperWord::follow_def_uses(Node_List* p) {
1145   bool changed = false;
1146   Node* s1 = p->at(0);
1147   Node* s2 = p->at(1);
1148   assert(p->size() == 2, "just checking");
1149   assert(s1->req() == s2->req(), "just checking");
1150   assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
1151 
1152   if (s1->is_Store()) return false;
1153 
1154   int align = alignment(s1);

1155   int savings = -1;
1156   int num_s1_uses = 0;
1157   Node* u1 = NULL;
1158   Node* u2 = NULL;
1159   for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
1160     Node* t1 = s1->fast_out(i);
1161     num_s1_uses++;
1162     if (!in_bb(t1)) continue;
1163     for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
1164       Node* t2 = s2->fast_out(j);
1165       if (!in_bb(t2)) continue;
1166       if (!opnd_positions_match(s1, t1, s2, t2))
1167         continue;
1168       if (stmts_can_pack(t1, t2, align)) {
1169         int my_savings = est_savings(t1, t2);
1170         if (my_savings > savings) {
1171           savings = my_savings;
1172           u1 = t1;
1173           u2 = t2;
1174         }
1175       }
1176     }
1177   }
1178   if (num_s1_uses > 1) {
1179     _race_possible = true;
1180   }
1181   if (savings >= 0) {
1182     Node_List* pair = new Node_List();
1183     pair->push(u1);
1184     pair->push(u2);
1185     _packset.append(pair);

1186     set_alignment(u1, u2, align);
1187     changed = true;
1188   }
1189   return changed;
1190 }
1191 
1192 //------------------------------order_def_uses---------------------------
1193 // For extended packsets, ordinally arrange uses packset by major component
1194 void SuperWord::order_def_uses(Node_List* p) {
1195   Node* s1 = p->at(0);
1196 
1197   if (s1->is_Store()) return;
1198 
1199   // reductions are always managed beforehand
1200   if (s1->is_reduction()) return;
1201 
1202   for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
1203     Node* t1 = s1->fast_out(i);
1204 
1205     // Only allow operand swap on commuting operations


1441         if (TraceSuperWord && Verbose) {
1442           tty->print_cr("Unprofitable");
1443           pk->at(0)->dump();
1444         }
1445 #endif
1446         remove_pack_at(i);
1447         changed = true;
1448       }
1449     }
1450   } while (changed);
1451 
1452 #ifndef PRODUCT
1453   if (TraceSuperWord) {
1454     tty->print_cr("\nAfter filter_packs");
1455     print_packset();
1456     tty->cr();
1457   }
1458 #endif
1459 }
1460 






























































































































































































1461 //------------------------------implemented---------------------------
1462 // Can code be generated for pack p?
1463 bool SuperWord::implemented(Node_List* p) {
1464   bool retValue = false;
1465   Node* p0 = p->at(0);
1466   if (p0 != NULL) {
1467     int opc = p0->Opcode();
1468     uint size = p->size();
1469     if (p0->is_reduction()) {
1470       const Type *arith_type = p0->bottom_type();
1471       // Length 2 reductions of INT/LONG do not offer performance benefits
1472       if (((arith_type->basic_type() == T_INT) || (arith_type->basic_type() == T_LONG)) && (size == 2)) {
1473         retValue = false;
1474       } else {
1475         retValue = ReductionNode::implemented(opc, size, arith_type->basic_type());
1476       }
1477     } else {
1478       retValue = VectorNode::implemented(opc, size, velt_basic_type(p0));
1479     }






1480   }
1481   return retValue;
1482 }
1483 



1484 //------------------------------same_inputs--------------------------
1485 // For pack p, are all idx operands the same?
1486 static bool same_inputs(Node_List* p, int idx) {
1487   Node* p0 = p->at(0);
1488   uint vlen = p->size();
1489   Node* p0_def = p0->in(idx);
1490   for (uint i = 1; i < vlen; i++) {
1491     Node* pi = p->at(i);
1492     Node* pi_def = pi->in(idx);
1493     if (p0_def != pi_def)
1494       return false;
1495   }

1496   return true;
1497 }
1498 
1499 //------------------------------profitable---------------------------
1500 // For pack p, are all operands and all uses (with in the block) vector?
1501 bool SuperWord::profitable(Node_List* p) {
1502   Node* p0 = p->at(0);
1503   uint start, end;
1504   VectorNode::vector_operands(p0, &start, &end);
1505 
1506   // Return false if some inputs are not vectors or vectors with different
1507   // size or alignment.
1508   // Also, for now, return false if not scalar promotion case when inputs are
1509   // the same. Later, implement PackNode and allow differing, non-vector inputs
1510   // (maybe just the ones from outside the block.)
1511   for (uint i = start; i < end; i++) {
1512     if (!is_vector_use(p0, i))
1513       return false;
1514   }

1515   // Check if reductions are connected
1516   if (p0->is_reduction()) {
1517     Node* second_in = p0->in(2);
1518     Node_List* second_pk = my_pack(second_in);
1519     if ((second_pk == NULL) || (_num_work_vecs == _num_reductions)) {
1520       // Remove reduction flag if no parent pack or if not enough work
1521       // to cover reduction expansion overhead
1522       p0->remove_flag(Node::Flag_is_reduction);
1523       return false;
1524     } else if (second_pk->size() != p->size()) {
1525       return false;
1526     }
1527   }
1528   if (VectorNode::is_shift(p0)) {
1529     // For now, return false if shift count is vector or not scalar promotion
1530     // case (different shift counts) because it is not supported yet.
1531     Node* cnt = p0->in(2);
1532     Node_List* cnt_pk = my_pack(cnt);
1533     if (cnt_pk != NULL)
1534       return false;
1535     if (!same_inputs(p, 2))
1536       return false;
1537   }
1538   if (!p0->is_Store()) {
1539     // For now, return false if not all uses are vector.
1540     // Later, implement ExtractNode and allow non-vector uses (maybe
1541     // just the ones outside the block.)
1542     for (uint i = 0; i < p->size(); i++) {
1543       Node* def = p->at(i);



1544       for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1545         Node* use = def->fast_out(j);
1546         for (uint k = 0; k < use->req(); k++) {
1547           Node* n = use->in(k);
1548           if (def == n) {
1549             // reductions can be loop carried dependences
1550             if (def->is_reduction() && use->is_Phi())
1551               continue;
1552             if (!is_vector_use(use, k)) {
1553               return false;
1554             }
1555           }
1556         }
1557       }
1558     }
1559   }
1560   return true;
1561 }
1562 
1563 //------------------------------schedule---------------------------


1793     tty->print("SuperWord::output    ");
1794     lpt()->dump_head();
1795   }
1796 #endif
1797 
1798   // MUST ENSURE main loop's initial value is properly aligned:
1799   //  (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
1800 
1801   align_initial_loop_index(align_to_ref());
1802 
1803   // Insert extract (unpack) operations for scalar uses
1804   for (int i = 0; i < _packset.length(); i++) {
1805     insert_extracts(_packset.at(i));
1806   }
1807 
1808   Compile* C = _phase->C;
1809   CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
1810   uint max_vlen_in_bytes = 0;
1811   uint max_vlen = 0;
1812 
1813   NOT_PRODUCT(if(_CountedLoopReserveKit_debug > 0) {tty->print_cr("SWPointer::output: print loop before create_reserve_version_of_loop"); print_loop(true);})
1814 
1815   CountedLoopReserveKit make_reversable(_phase, _lpt, DoReserveCopyInSuperWord);
1816 
1817   NOT_PRODUCT(if(_CountedLoopReserveKit_debug > 0) {tty->print_cr("SWPointer::output: print loop after create_reserve_version_of_loop"); print_loop(true);})
1818 
1819   if (DoReserveCopyInSuperWord && !make_reversable.has_reserved()) {
1820     NOT_PRODUCT({tty->print_cr("SWPointer::output: loop was not reserved correctly, exiting SuperWord");})
1821     return;
1822   }
1823 
1824   for (int i = 0; i < _block.length(); i++) {
1825     Node* n = _block.at(i);
1826     Node_List* p = my_pack(n);
1827     if (p && n == executed_last(p)) {
1828       uint vlen = p->size();
1829       uint vlen_in_bytes = 0;
1830       Node* vn = NULL;
1831       Node* low_adr = p->at(0);
1832       Node* first   = executed_first(p);

1833       int   opc = n->Opcode();
1834       if (n->is_Load()) {
1835         Node* ctl = n->in(MemNode::Control);
1836         Node* mem = first->in(MemNode::Memory);
1837         SWPointer p1(n->as_Mem(), this, NULL, false);
1838         // Identify the memory dependency for the new loadVector node by
1839         // walking up through memory chain.
1840         // This is done to give flexibility to the new loadVector node so that
1841         // it can move above independent storeVector nodes.
1842         while (mem->is_StoreVector()) {
1843           SWPointer p2(mem->as_Mem(), this, NULL, false);
1844           int cmp = p1.cmp(p2);
1845           if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) {
1846             mem = mem->in(MemNode::Memory);
1847           } else {
1848             break; // dependent memory
1849           }
1850         }
1851         Node* adr = low_adr->in(MemNode::Address);
1852         const TypePtr* atyp = n->adr_type();
1853         vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n), control_dependency(p));
1854         vlen_in_bytes = vn->as_LoadVector()->memory_size();
1855       } else if (n->is_Store()) {
1856         // Promote value to be stored to vector
1857         Node* val = vector_opd(p, MemNode::ValueIn);








1858         Node* ctl = n->in(MemNode::Control);
1859         Node* mem = first->in(MemNode::Memory);
1860         Node* adr = low_adr->in(MemNode::Address);
1861         const TypePtr* atyp = n->adr_type();
1862         vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
1863         vlen_in_bytes = vn->as_StoreVector()->memory_size();
1864       } else if (n->req() == 3) {
1865         // Promote operands to vector
1866         Node* in1 = NULL;
1867         bool node_isa_reduction = n->is_reduction();
1868         if (node_isa_reduction) {
1869           // the input to the first reduction operation is retained
1870           in1 = low_adr->in(1);
1871         } else {
1872           in1 = vector_opd(p, 1);







1873         }
1874         Node* in2 = vector_opd(p, 2);







1875         if (VectorNode::is_invariant_vector(in1) && (node_isa_reduction == false) && (n->is_Add() || n->is_Mul())) {
1876           // Move invariant vector input into second position to avoid register spilling.
1877           Node* tmp = in1;
1878           in1 = in2;
1879           in2 = tmp;
1880         }
1881         if (node_isa_reduction) {
1882           const Type *arith_type = n->bottom_type();
1883           vn = ReductionNode::make(opc, NULL, in1, in2, arith_type->basic_type());
1884           if (in2->is_Load()) {
1885             vlen_in_bytes = in2->as_LoadVector()->memory_size();
1886           } else {
1887             vlen_in_bytes = in2->as_Vector()->length_in_bytes();
1888           }
1889         } else {
1890           vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
1891           vlen_in_bytes = vn->as_Vector()->length_in_bytes();
1892         }
1893       } else if (opc == Op_SqrtD || opc == Op_AbsF || opc == Op_AbsD || opc == Op_NegF || opc == Op_NegD) {
1894         // Promote operand to vector (Sqrt/Abs/Neg are 2 address instructions)
1895         Node* in = vector_opd(p, 1);
1896         vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n));
1897         vlen_in_bytes = vn->as_Vector()->length_in_bytes();
1898       } else {




















































1899         ShouldNotReachHere();
1900       }

1901       assert(vn != NULL, "sanity");








1902       _igvn.register_new_node_with_optimizer(vn);
1903       _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
1904       for (uint j = 0; j < p->size(); j++) {
1905         Node* pm = p->at(j);
1906         _igvn.replace_node(pm, vn);
1907       }
1908       _igvn._worklist.push(vn);
1909 
1910       if (vlen_in_bytes > max_vlen_in_bytes) {
1911         max_vlen = vlen;
1912         max_vlen_in_bytes = vlen_in_bytes;
1913       }
1914 #ifdef ASSERT
1915       if (TraceNewVectors) {
1916         tty->print("new Vector node: ");
1917         vn->dump();
1918       }
1919 #endif
1920     }
1921   }

1922   C->set_max_vector_size(max_vlen_in_bytes);
1923 
1924   if (SuperWordLoopUnrollAnalysis) {
1925     if (cl->has_passed_slp()) {
1926       uint slp_max_unroll_factor = cl->slp_max_unroll();
1927       if (slp_max_unroll_factor == max_vlen) {
1928         NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte));
1929         // For atomic unrolled loops which are vector mapped, instigate more unrolling.
1930         cl->set_notpassed_slp();
1931         C->set_major_progress();
1932         cl->mark_do_unroll_only();
1933       }
1934     }
1935   }
1936 
1937   if (DoReserveCopyInSuperWord) {
1938     make_reversable.use_new();
1939   }
1940   NOT_PRODUCT(if(_CountedLoopReserveKit_debug > 0) {tty->print_cr("\n Final loop after SuperWord"); print_loop(true);})
1941   return;
1942 }
1943 
1944 //------------------------------vector_opd---------------------------
1945 // Create a vector operand for the nodes in pack p for operand: in(opd_idx)
1946 Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
1947   Node* p0 = p->at(0);
1948   uint vlen = p->size();
1949   Node* opd = p0->in(opd_idx);
1950 
1951   if (same_inputs(p, opd_idx)) {
1952     if (opd->is_Vector() || opd->is_LoadVector()) {
1953       assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector");




1954       return opd; // input is matching vector
1955     }
1956     if ((opd_idx == 2) && VectorNode::is_shift(p0)) {
1957       Compile* C = _phase->C;
1958       Node* cnt = opd;
1959       // Vector instructions do not mask shift count, do it here.
1960       juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
1961       const TypeInt* t = opd->find_int_type();
1962       if (t != NULL && t->is_con()) {
1963         juint shift = t->get_con();
1964         if (shift > mask) { // Unsigned cmp
1965           cnt = ConNode::make(TypeInt::make(shift & mask));
1966         }
1967       } else {
1968         if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {
1969           cnt = ConNode::make(TypeInt::make(mask));
1970           _igvn.register_new_node_with_optimizer(cnt);
1971           cnt = new AndINode(opd, cnt);
1972           _igvn.register_new_node_with_optimizer(cnt);
1973           _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
1974         }
1975         assert(opd->bottom_type()->isa_int(), "int type only");




1976         // Move non constant shift count into vector register.
1977         cnt = VectorNode::shift_count(p0, cnt, vlen, velt_basic_type(p0));
1978       }
1979       if (cnt != opd) {
1980         _igvn.register_new_node_with_optimizer(cnt);
1981         _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
1982       }
1983       return cnt;
1984     }
1985     assert(!opd->is_StoreVector(), "such vector is not expected here");




1986     // Convert scalar input to vector with the same number of elements as
1987     // p0's vector. Use p0's type because size of operand's container in
1988     // vector should match p0's size regardless operand's size.
1989     const Type* p0_t = velt_type(p0);
1990     VectorNode* vn = VectorNode::scalar2vector(opd, vlen, p0_t);
1991 
1992     _igvn.register_new_node_with_optimizer(vn);
1993     _phase->set_ctrl(vn, _phase->get_ctrl(opd));
1994 #ifdef ASSERT
1995     if (TraceNewVectors) {
1996       tty->print("new Vector node: ");
1997       vn->dump();
1998     }
1999 #endif
2000     return vn;
2001   }
2002 
2003   // Insert pack operation
2004   BasicType bt = velt_basic_type(p0);
2005   PackNode* pk = PackNode::make(opd, vlen, bt);
2006   DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )
2007 
2008   for (uint i = 1; i < vlen; i++) {
2009     Node* pi = p->at(i);
2010     Node* in = pi->in(opd_idx);
2011     assert(my_pack(in) == NULL, "Should already have been unpacked");




2012     assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
2013     pk->add_opd(in);
2014   }
2015   _igvn.register_new_node_with_optimizer(pk);
2016   _phase->set_ctrl(pk, _phase->get_ctrl(opd));
2017 #ifdef ASSERT
2018   if (TraceNewVectors) {
2019     tty->print("new Vector node: ");
2020     pk->dump();
2021   }
2022 #endif
2023   return pk;
2024 }
2025 
2026 //------------------------------insert_extracts---------------------------
2027 // If a use of pack p is not a vector use, then replace the
2028 // use with an extract operation.
2029 void SuperWord::insert_extracts(Node_List* p) {
2030   if (p->at(0)->is_Store()) return;
2031   assert(_n_idx_list.is_empty(), "empty (node,index) list");
2032 
2033   // Inspect each use of each pack member.  For each use that is
2034   // not a vector use, replace the use with an extract operation.
2035 
2036   for (uint i = 0; i < p->size(); i++) {
2037     Node* def = p->at(i);
2038     for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2039       Node* use = def->fast_out(j);
2040       for (uint k = 0; k < use->req(); k++) {
2041         Node* n = use->in(k);
2042         if (def == n) {
2043           if (!is_vector_use(use, k)) {

2044             _n_idx_list.push(use, k);
2045           }
2046         }
2047       }
2048     }
2049   }
2050 
2051   while (_n_idx_list.is_nonempty()) {
2052     Node* use = _n_idx_list.node();
2053     int   idx = _n_idx_list.index();
2054     _n_idx_list.pop();
2055     Node* def = use->in(idx);
2056 
2057     if (def->is_reduction()) continue;
2058 
2059     // Insert extract operation
2060     _igvn.hash_delete(def);
2061     int def_pos = alignment(def) / data_size(def);
2062 
2063     Node* ex = ExtractNode::make(def, def_pos, velt_basic_type(def));




  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 #include "precompiled.hpp"
  25 #include "compiler/compileLog.hpp"
  26 #include "libadt/vectset.hpp"
  27 #include "memory/allocation.inline.hpp"
  28 #include "opto/addnode.hpp"
  29 #include "opto/callnode.hpp"
  30 #include "opto/castnode.hpp"
  31 #include "opto/convertnode.hpp"
  32 #include "opto/divnode.hpp"
  33 #include "opto/matcher.hpp"
  34 #include "opto/memnode.hpp"
  35 #include "opto/mulnode.hpp"
  36 #include "opto/opcodes.hpp"
  37 #include "opto/opaquenode.hpp"
  38 #include "opto/superword.hpp"
  39 #include "opto/vectornode.hpp"
  40 #include "opto/movenode.hpp"
  41 
  42 //
  43 //                  S U P E R W O R D   T R A N S F O R M
  44 //=============================================================================
  45 
  46 //------------------------------SuperWord---------------------------
  47 SuperWord::SuperWord(PhaseIdealLoop* phase) :
  48   _phase(phase),
  49   _igvn(phase->_igvn),
  50   _arena(phase->C->comp_arena()),
  51   _packset(arena(), 8,  0, NULL),         // packs for the current block
  52   _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
  53   _block(arena(), 8,  0, NULL),           // nodes in current block
  54   _data_entry(arena(), 8,  0, NULL),      // nodes with all inputs from outside
  55   _mem_slice_head(arena(), 8,  0, NULL),  // memory slice heads
  56   _mem_slice_tail(arena(), 8,  0, NULL),  // memory slice tails
  57   _node_info(arena(), 8,  0, SWNodeInfo::initial), // info needed per node
  58   _clone_map(phase->C->clone_map()),      // map of nodes created in cloning
  59   _cmovev_kit(_arena, this),              // map to facilitate CMoveVD creation
  60   _align_to_ref(NULL),                    // memory reference to align vectors to
  61   _disjoint_ptrs(arena(), 8,  0, OrderedPair::initial), // runtime disambiguated pointer pairs
  62   _dg(_arena),                            // dependence graph
  63   _visited(arena()),                      // visited node set
  64   _post_visited(arena()),                 // post visited node set
  65   _n_idx_list(arena(), 8),                // scratch list of (node,index) pairs
  66   _stk(arena(), 8, 0, NULL),              // scratch stack of nodes
  67   _nlist(arena(), 8, 0, NULL),            // scratch list of nodes
  68   _lpt(NULL),                             // loop tree node
  69   _lp(NULL),                              // LoopNode
  70   _bb(NULL),                              // basic block
  71   _iv(NULL),                              // induction var
  72   _race_possible(false),                  // cases where SDMU is true
  73   _early_return(true),                    // analysis evaluations routine
  74   _num_work_vecs(0),                      // amount of vector work we have
  75   _num_reductions(0),                     // amount of reduction work we have
  76   _do_vector_loop(phase->C->do_vector_loop()),  // whether to do vectorization/simd style
  77   _do_reserve_copy(DoReserveCopyInSuperWord),
  78   _ii_first(-1),                          // first loop generation index - only if do_vector_loop()
  79   _ii_last(-1),                           // last loop generation index - only if do_vector_loop()
  80   _ii_order(arena(), 8, 0, 0)
  81 {
  82 #ifndef PRODUCT
  83   _vector_loop_debug = 0;
  84   if (_phase->C->method() != NULL) {
  85     _phase->C->method()->has_option_value("VectorizeDebug", _vector_loop_debug);
  86   }




  87 #endif
  88 }
  89 
  90 //------------------------------transform_loop---------------------------
  91 void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
  92   assert(UseSuperWord, "should be");
  93   // Do vectors exist on this architecture?
  94   if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
  95 
  96   assert(lpt->_head->is_CountedLoop(), "must be");
  97   CountedLoopNode *cl = lpt->_head->as_CountedLoop();
  98 
  99   if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
 100 
 101   if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
 102   // Check for no control flow in body (other than exit)
 103   Node *cl_exit = cl->loopexit();
 104   if (cl_exit->in(0) != lpt->_head) {
 105     #ifndef PRODUCT
 106       if (TraceSuperWord) {
 107         tty->print_cr("SuperWord::transform_loop: loop too complicated, cl_exit->in(0) != lpt->_head");
 108         tty->print("cl_exit %d", cl_exit->_idx); cl_exit->dump();
 109         tty->print("cl_exit->in(0) %d", cl_exit->in(0)->_idx); cl_exit->in(0)->dump();
 110         tty->print("lpt->_head %d", lpt->_head->_idx); lpt->_head->dump();
 111         lpt->dump_head();
 112       }
 113     #endif
 114     return;
 115   }
 116 
 117   // Make sure the are no extra control users of the loop backedge
 118   if (cl->back_control()->outcnt() != 1) {
 119     return;
 120   }
 121 
 122   // We only re-enter slp when we vector mapped a queried loop and we want to
 123   // continue unrolling, in this case, slp is not subsequently done.
 124   if (cl->do_unroll_only()) return;
 125 
 126   // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
 127   CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
 128   if (pre_end == NULL) return;
 129   Node *pre_opaq1 = pre_end->limit();
 130   if (pre_opaq1->Opcode() != Op_Opaque1) return;
 131 
 132   init(); // initialize data structures
 133 
 134   set_lpt(lpt);
 135   set_lp(cl);


 384 
 385   find_adjacent_refs();
 386 
 387   extend_packlist();
 388 
 389   if (_do_vector_loop) {
 390     if (_packset.length() == 0) {
 391 #ifndef PRODUCT
 392       if (TraceSuperWord) {
 393         tty->print_cr("\nSuperWord::_do_vector_loop DFA could not build packset, now trying to build anyway");
 394       }
 395 #endif
 396       pack_parallel();
 397     }
 398   }
 399 
 400   combine_packs();
 401 
 402   construct_my_pack_map();
 403 
 404   if (_do_vector_loop) {
 405     merge_packs_to_cmovd();
 406   }
 407 
 408   filter_packs();
 409 
 410   schedule();
 411 
 412   output();
 413 }
 414 
 415 //------------------------------find_adjacent_refs---------------------------
 416 // Find the adjacent memory references and create pack pairs for them.
 417 // This is the initial set of packs that will then be extended by
 418 // following use->def and def->use links.  The align positions are
 419 // assigned relative to the reference "align_to_ref"
 420 void SuperWord::find_adjacent_refs() {
 421   // Get list of memory operations
 422   Node_List memops;
 423   for (int i = 0; i < _block.length(); i++) {
 424     Node* n = _block.at(i);
 425     if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) &&
 426         is_java_primitive(n->as_Mem()->memory_type())) {
 427       int align = memory_alignment(n->as_Mem(), 0);


1068       if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) {
1069         return false;
1070       }
1071     }
1072   }
1073   return true;
1074 }
1075 
1076 //------------------------------set_alignment---------------------------
1077 void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
1078   set_alignment(s1, align);
1079   if (align == top_align || align == bottom_align) {
1080     set_alignment(s2, align);
1081   } else {
1082     set_alignment(s2, align + data_size(s1));
1083   }
1084 }
1085 
1086 //------------------------------data_size---------------------------
1087 int SuperWord::data_size(Node* s) {
1088   Node* use = NULL; //test if the node is a candidate for CMoveVD optimization, then return the size of CMov
1089   if (_do_vector_loop) {
1090     use = _cmovev_kit.is_Bool_candidate(s);
1091     if (use != NULL) {
1092       return data_size(use);
1093     }
1094     use = _cmovev_kit.is_CmpD_candidate(s);
1095     if (use != NULL) {
1096       return data_size(use);
1097     }
1098   }
1099   int bsize = type2aelembytes(velt_basic_type(s));
1100   assert(bsize != 0, "valid size");
1101   return bsize;
1102 }
1103 
1104 //------------------------------extend_packlist---------------------------
1105 // Extend packset by following use->def and def->use links from pack members.
1106 void SuperWord::extend_packlist() {
1107   bool changed;
1108   do {
1109     packset_sort(_packset.length());
1110     changed = false;
1111     for (int i = 0; i < _packset.length(); i++) {
1112       Node_List* p = _packset.at(i);
1113       changed |= follow_use_defs(p);
1114       changed |= follow_def_uses(p);
1115     }
1116   } while (changed);
1117 
1118   if (_race_possible) {


1125 #ifndef PRODUCT
1126   if (TraceSuperWord) {
1127     tty->print_cr("\nAfter extend_packlist");
1128     print_packset();
1129   }
1130 #endif
1131 }
1132 
1133 //------------------------------follow_use_defs---------------------------
1134 // Extend the packset by visiting operand definitions of nodes in pack p
1135 bool SuperWord::follow_use_defs(Node_List* p) {
1136   assert(p->size() == 2, "just checking");
1137   Node* s1 = p->at(0);
1138   Node* s2 = p->at(1);
1139   assert(s1->req() == s2->req(), "just checking");
1140   assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
1141 
1142   if (s1->is_Load()) return false;
1143 
1144   int align = alignment(s1);
1145   NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: s1 %d, align %d", s1->_idx, align);)
1146   bool changed = false;
1147   int start = s1->is_Store() ? MemNode::ValueIn   : 1;
1148   int end   = s1->is_Store() ? MemNode::ValueIn+1 : s1->req();
1149   for (int j = start; j < end; j++) {
1150     Node* t1 = s1->in(j);
1151     Node* t2 = s2->in(j);
1152     if (!in_bb(t1) || !in_bb(t2))
1153       continue;
1154     if (stmts_can_pack(t1, t2, align)) {
1155       if (est_savings(t1, t2) >= 0) {
1156         Node_List* pair = new Node_List();
1157         pair->push(t1);
1158         pair->push(t2);
1159         _packset.append(pair);
1160         NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: set_alignment(%d, %d, %d)", t1->_idx, t2->_idx, align);)
1161         set_alignment(t1, t2, align);
1162         changed = true;
1163       }
1164     }
1165   }
1166   return changed;
1167 }
1168 
1169 //------------------------------follow_def_uses---------------------------
1170 // Extend the packset by visiting uses of nodes in pack p
1171 bool SuperWord::follow_def_uses(Node_List* p) {
1172   bool changed = false;
1173   Node* s1 = p->at(0);
1174   Node* s2 = p->at(1);
1175   assert(p->size() == 2, "just checking");
1176   assert(s1->req() == s2->req(), "just checking");
1177   assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
1178 
1179   if (s1->is_Store()) return false;
1180 
1181   int align = alignment(s1);
1182   NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: s1 %d, align %d", s1->_idx, align);)
1183   int savings = -1;
1184   int num_s1_uses = 0;
1185   Node* u1 = NULL;
1186   Node* u2 = NULL;
1187   for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
1188     Node* t1 = s1->fast_out(i);
1189     num_s1_uses++;
1190     if (!in_bb(t1)) continue;
1191     for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
1192       Node* t2 = s2->fast_out(j);
1193       if (!in_bb(t2)) continue;
1194       if (!opnd_positions_match(s1, t1, s2, t2))
1195         continue;
1196       if (stmts_can_pack(t1, t2, align)) {
1197         int my_savings = est_savings(t1, t2);
1198         if (my_savings > savings) {
1199           savings = my_savings;
1200           u1 = t1;
1201           u2 = t2;
1202         }
1203       }
1204     }
1205   }
1206   if (num_s1_uses > 1) {
1207     _race_possible = true;
1208   }
1209   if (savings >= 0) {
1210     Node_List* pair = new Node_List();
1211     pair->push(u1);
1212     pair->push(u2);
1213     _packset.append(pair);
1214     NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: set_alignment(%d, %d, %d)", u1->_idx, u2->_idx, align);)
1215     set_alignment(u1, u2, align);
1216     changed = true;
1217   }
1218   return changed;
1219 }
1220 
1221 //------------------------------order_def_uses---------------------------
1222 // For extended packsets, ordinally arrange uses packset by major component
1223 void SuperWord::order_def_uses(Node_List* p) {
1224   Node* s1 = p->at(0);
1225 
1226   if (s1->is_Store()) return;
1227 
1228   // reductions are always managed beforehand
1229   if (s1->is_reduction()) return;
1230 
1231   for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
1232     Node* t1 = s1->fast_out(i);
1233 
1234     // Only allow operand swap on commuting operations


1470         if (TraceSuperWord && Verbose) {
1471           tty->print_cr("Unprofitable");
1472           pk->at(0)->dump();
1473         }
1474 #endif
1475         remove_pack_at(i);
1476         changed = true;
1477       }
1478     }
1479   } while (changed);
1480 
1481 #ifndef PRODUCT
1482   if (TraceSuperWord) {
1483     tty->print_cr("\nAfter filter_packs");
1484     print_packset();
1485     tty->cr();
1486   }
1487 #endif
1488 }
1489 
1490 //------------------------------merge_packs_to_cmovd---------------------------
1491 // Merge CMoveD into new vector-nodes
1492 // We want to catch this pattern and subsume CmpD and Bool into CMoveD
1493 //
1494 //                   SubD             ConD
1495 //                  /  |               /
1496 //                 /   |           /   /
1497 //                /    |       /      /
1498 //               /     |   /         /
1499 //              /      /            /
1500 //             /    /  |           /
1501 //            v /      |          /
1502 //         CmpD        |         /
1503 //          |          |        /
1504 //          v          |       /
1505 //         Bool        |      /
1506 //           \         |     /
1507 //             \       |    /
1508 //               \     |   /
1509 //                 \   |  /
1510 //                   \ v /
1511 //                   CMoveD
1512 //
1513 
1514 void SuperWord::merge_packs_to_cmovd() {
1515   for (int i = _packset.length() - 1; i >= 0; i--) {
1516     _cmovev_kit.make_cmovevd_pack(_packset.at(i));
1517   }
1518   #ifndef PRODUCT
1519     if (TraceSuperWord) {
1520       tty->print_cr("\nSuperWord::merge_packs_to_cmovd(): After merge");
1521       print_packset();
1522       tty->cr();
1523     }
1524   #endif
1525 }
1526 
1527 Node* CMoveKit::is_Bool_candidate(Node* def) const {
1528   Node* use = NULL;
1529   if (!def->is_Bool() || def->in(0) != NULL || def->outcnt() != 1) {
1530     return NULL;
1531   }
1532   for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1533     use = def->fast_out(j);
1534     if (!_sw->same_generation(def, use) || !use->is_CMove()) {
1535       return NULL;
1536     }
1537   }
1538   return use;
1539 }
1540 
1541 Node* CMoveKit::is_CmpD_candidate(Node* def) const {
1542   Node* use = NULL;
1543   if (!def->is_Cmp() || def->in(0) != NULL || def->outcnt() != 1) {
1544     return NULL;
1545   }
1546   for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1547     use = def->fast_out(j);
1548     if (!_sw->same_generation(def, use) || (use = is_Bool_candidate(use)) == NULL || !_sw->same_generation(def, use)) {
1549       return NULL;
1550     }
1551   }
1552   return use;
1553 }
1554 
1555 Node_List* CMoveKit::make_cmovevd_pack(Node_List* cmovd_pk) {
1556   Node *cmovd = cmovd_pk->at(0);
1557   if (!cmovd->is_CMove()) {
1558     return NULL;
1559   }
1560   if (pack(cmovd) != NULL) { // already in the cmov pack
1561     return NULL;
1562   }
1563   if (cmovd->in(0) != NULL) {
1564     NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CMoveD %d has control flow, escaping...", cmovd->_idx); cmovd->dump();})
1565     return NULL;
1566   }
1567 
1568   Node* bol = cmovd->as_CMove()->in(CMoveNode::Condition);
1569   if (!bol->is_Bool()
1570       || bol->outcnt() != 1
1571       || !_sw->same_generation(bol, cmovd)
1572       || bol->in(0) != NULL  // BoolNode has control flow!!
1573       || _sw->my_pack(bol) == NULL) {
1574       NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: Bool %d does not fit CMoveD %d for building vector, escaping...", bol->_idx, cmovd->_idx); bol->dump();})
1575       return NULL;
1576   }
1577   Node_List* bool_pk = _sw->my_pack(bol);
1578   if (bool_pk->size() != cmovd_pk->size() ) {
1579     return NULL;
1580   }
1581 
1582   Node* cmpd = bol->in(1);
1583   if (!cmpd->is_Cmp()
1584       || cmpd->outcnt() != 1
1585       || !_sw->same_generation(cmpd, cmovd)
1586       || cmpd->in(0) != NULL  // CmpDNode has control flow!!
1587       || _sw->my_pack(cmpd) == NULL) {
1588       NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CmpD %d does not fit CMoveD %d for building vector, escaping...", cmpd->_idx, cmovd->_idx); cmpd->dump();})
1589       return NULL;
1590   }
1591   Node_List* cmpd_pk = _sw->my_pack(cmpd);
1592   if (cmpd_pk->size() != cmovd_pk->size() ) {
1593     return NULL;
1594   }
1595 
1596   if (!test_cmpd_pack(cmpd_pk, cmovd_pk)) {
1597     NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: cmpd pack for CmpD %d failed vectorization test", cmpd->_idx); cmpd->dump();})
1598     return NULL;
1599   }
1600 
1601   Node_List* new_cmpd_pk = new Node_List();
1602   uint sz = cmovd_pk->size() - 1;
1603   for (uint i = 0; i <= sz; ++i) {
1604     Node* cmov = cmovd_pk->at(i);
1605     Node* bol  = bool_pk->at(i);
1606     Node* cmp  = cmpd_pk->at(i);
1607 
1608     new_cmpd_pk->insert(i, cmov);
1609 
1610     map(cmov, new_cmpd_pk);
1611     map(bol, new_cmpd_pk);
1612     map(cmp, new_cmpd_pk);
1613 
1614     _sw->set_my_pack(cmov, new_cmpd_pk); // and keep old packs for cmp and bool
1615   }
1616   _sw->_packset.remove(cmovd_pk);
1617   _sw->_packset.remove(bool_pk);
1618   _sw->_packset.remove(cmpd_pk);
1619   _sw->_packset.append(new_cmpd_pk);
1620   NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print_cr("CMoveKit::make_cmovevd_pack: added syntactic CMoveD pack"); _sw->print_pack(new_cmpd_pk);})
1621   return new_cmpd_pk;
1622 }
1623 
1624 bool CMoveKit::test_cmpd_pack(Node_List* cmpd_pk, Node_List* cmovd_pk) {
1625   Node* cmpd0 = cmpd_pk->at(0);
1626   assert(cmpd0->is_Cmp(), "CMoveKit::test_cmpd_pack: should be CmpDNode");
1627   assert(cmovd_pk->at(0)->is_CMove(), "CMoveKit::test_cmpd_pack: should be CMoveD");
1628   assert(cmpd_pk->size() == cmovd_pk->size(), "CMoveKit::test_cmpd_pack: should be same size");
1629   Node* in1 = cmpd0->in(1);
1630   Node* in2 = cmpd0->in(2);
1631   Node_List* in1_pk = _sw->my_pack(in1);
1632   Node_List* in2_pk = _sw->my_pack(in2);
1633 
1634   if (in1_pk != NULL && in1_pk->size() != cmpd_pk->size()
1635     || in2_pk != NULL && in2_pk->size() != cmpd_pk->size() ) {
1636     return false;
1637   }
1638 
1639   // test if "all" in1 are in the same pack or the same node
1640   if (in1_pk == NULL) {
1641     for (uint j = 1; j < cmpd_pk->size(); j++) {
1642       if (cmpd_pk->at(j)->in(1) != in1) {
1643         return false;
1644       }
1645     }//for: in1_pk is not pack but all CmpD nodes in the pack have the same in(1)
1646   }
1647   // test if "all" in2 are in the same pack or the same node
1648   if (in2_pk == NULL) {
1649     for (uint j = 1; j < cmpd_pk->size(); j++) {
1650       if (cmpd_pk->at(j)->in(2) != in2) {
1651         return false;
1652       }
1653     }//for: in2_pk is not pack but all CmpD nodes in the pack have the same in(2)
1654   }
1655   //now check if cmpd_pk may be subsumed in vector built for cmovd_pk
1656   int cmovd_ind1, cmovd_ind2;
1657   if (cmpd_pk->at(0)->in(1) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse)
1658    && cmpd_pk->at(0)->in(2) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) {
1659       cmovd_ind1 = CMoveNode::IfFalse;
1660       cmovd_ind2 = CMoveNode::IfTrue;
1661   } else if (cmpd_pk->at(0)->in(2) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse)
1662           && cmpd_pk->at(0)->in(1) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) {
1663       cmovd_ind2 = CMoveNode::IfFalse;
1664       cmovd_ind1 = CMoveNode::IfTrue;
1665   }
1666   else {
1667     return false;
1668   }
1669 
1670   for (uint j = 1; j < cmpd_pk->size(); j++) {
1671     if (cmpd_pk->at(j)->in(1) != cmovd_pk->at(j)->as_CMove()->in(cmovd_ind1)
1672         || cmpd_pk->at(j)->in(2) != cmovd_pk->at(j)->as_CMove()->in(cmovd_ind2)) {
1673         return false;
1674     }//if
1675   }
1676   NOT_PRODUCT(if(_sw->is_trace_cmov()) { tty->print("CMoveKit::test_cmpd_pack: cmpd pack for 1st CmpD %d is OK for vectorization: ", cmpd0->_idx); cmpd0->dump(); })
1677   return true;
1678 }
1679 
1680 //------------------------------implemented---------------------------
1681 // Can code be generated for pack p?
1682 bool SuperWord::implemented(Node_List* p) {
1683   bool retValue = false;
1684   Node* p0 = p->at(0);
1685   if (p0 != NULL) {
1686     int opc = p0->Opcode();
1687     uint size = p->size();
1688     if (p0->is_reduction()) {
1689       const Type *arith_type = p0->bottom_type();
1690       // Length 2 reductions of INT/LONG do not offer performance benefits
1691       if (((arith_type->basic_type() == T_INT) || (arith_type->basic_type() == T_LONG)) && (size == 2)) {
1692         retValue = false;
1693       } else {
1694         retValue = ReductionNode::implemented(opc, size, arith_type->basic_type());
1695       }
1696     } else {
1697       retValue = VectorNode::implemented(opc, size, velt_basic_type(p0));
1698     }
1699     if (!retValue) {
1700       if (is_cmov_pack(p)) {
1701         NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmpd pack"); print_pack(p);})
1702         return true;
1703       }
1704     }
1705   }
1706   return retValue;
1707 }
1708 
1709 bool SuperWord::is_cmov_pack(Node_List* p) {
1710   return _cmovev_kit.pack(p->at(0)) != NULL;
1711 }
1712 //------------------------------same_inputs--------------------------
1713 // For pack p, are all idx operands the same?
1714 bool SuperWord::same_inputs(Node_List* p, int idx) {
1715   Node* p0 = p->at(0);
1716   uint vlen = p->size();
1717   Node* p0_def = p0->in(idx);
1718   for (uint i = 1; i < vlen; i++) {
1719     Node* pi = p->at(i);
1720     Node* pi_def = pi->in(idx);
1721     if (p0_def != pi_def) {
1722       return false;
1723     }
1724   }
1725   return true;
1726 }
1727 
1728 //------------------------------profitable---------------------------
1729 // For pack p, are all operands and all uses (with in the block) vector?
1730 bool SuperWord::profitable(Node_List* p) {
1731   Node* p0 = p->at(0);
1732   uint start, end;
1733   VectorNode::vector_operands(p0, &start, &end);
1734 
1735   // Return false if some inputs are not vectors or vectors with different
1736   // size or alignment.
1737   // Also, for now, return false if not scalar promotion case when inputs are
1738   // the same. Later, implement PackNode and allow differing, non-vector inputs
1739   // (maybe just the ones from outside the block.)
1740   for (uint i = start; i < end; i++) {
1741     if (!is_vector_use(p0, i)) {
1742       return false;
1743     }
1744   }
1745   // Check if reductions are connected
1746   if (p0->is_reduction()) {
1747     Node* second_in = p0->in(2);
1748     Node_List* second_pk = my_pack(second_in);
1749     if ((second_pk == NULL) || (_num_work_vecs == _num_reductions)) {
1750       // Remove reduction flag if no parent pack or if not enough work
1751       // to cover reduction expansion overhead
1752       p0->remove_flag(Node::Flag_is_reduction);
1753       return false;
1754     } else if (second_pk->size() != p->size()) {
1755       return false;
1756     }
1757   }
1758   if (VectorNode::is_shift(p0)) {
1759     // For now, return false if shift count is vector or not scalar promotion
1760     // case (different shift counts) because it is not supported yet.
1761     Node* cnt = p0->in(2);
1762     Node_List* cnt_pk = my_pack(cnt);
1763     if (cnt_pk != NULL)
1764       return false;
1765     if (!same_inputs(p, 2))
1766       return false;
1767   }
1768   if (!p0->is_Store()) {
1769     // For now, return false if not all uses are vector.
1770     // Later, implement ExtractNode and allow non-vector uses (maybe
1771     // just the ones outside the block.)
1772     for (uint i = 0; i < p->size(); i++) {
1773       Node* def = p->at(i);
1774       if (is_cmov_pack_internal_node(p, def)) {
1775         continue;
1776       }
1777       for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1778         Node* use = def->fast_out(j);
1779         for (uint k = 0; k < use->req(); k++) {
1780           Node* n = use->in(k);
1781           if (def == n) {
1782             // reductions can be loop carried dependences
1783             if (def->is_reduction() && use->is_Phi())
1784               continue;
1785             if (!is_vector_use(use, k)) {
1786               return false;
1787             }
1788           }
1789         }
1790       }
1791     }
1792   }
1793   return true;
1794 }
1795 
1796 //------------------------------schedule---------------------------


2026     tty->print("SuperWord::output    ");
2027     lpt()->dump_head();
2028   }
2029 #endif
2030 
2031   // MUST ENSURE main loop's initial value is properly aligned:
2032   //  (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
2033 
2034   align_initial_loop_index(align_to_ref());
2035 
2036   // Insert extract (unpack) operations for scalar uses
2037   for (int i = 0; i < _packset.length(); i++) {
2038     insert_extracts(_packset.at(i));
2039   }
2040 
2041   Compile* C = _phase->C;
2042   CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
2043   uint max_vlen_in_bytes = 0;
2044   uint max_vlen = 0;
2045 
2046   NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop before create_reserve_version_of_loop"); print_loop(true);})
2047 
2048   CountedLoopReserveKit make_reversable(_phase, _lpt, do_reserve_copy());
2049 
2050   NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop after create_reserve_version_of_loop"); print_loop(true);})
2051 
2052   if (do_reserve_copy() && !make_reversable.has_reserved()) {
2053     NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: loop was not reserved correctly, exiting SuperWord");})
2054     return;
2055   }
2056 
2057   for (int i = 0; i < _block.length(); i++) {
2058     Node* n = _block.at(i);
2059     Node_List* p = my_pack(n);
2060     if (p && n == executed_last(p)) {
2061       uint vlen = p->size();
2062       uint vlen_in_bytes = 0;
2063       Node* vn = NULL;
2064       Node* low_adr = p->at(0);
2065       Node* first   = executed_first(p);
2066       NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d executed first, %d executed last in pack", first->_idx, n->_idx); print_pack(p);})
2067       int   opc = n->Opcode();
2068       if (n->is_Load()) {
2069         Node* ctl = n->in(MemNode::Control);
2070         Node* mem = first->in(MemNode::Memory);
2071         SWPointer p1(n->as_Mem(), this, NULL, false);
2072         // Identify the memory dependency for the new loadVector node by
2073         // walking up through memory chain.
2074         // This is done to give flexibility to the new loadVector node so that
2075         // it can move above independent storeVector nodes.
2076         while (mem->is_StoreVector()) {
2077           SWPointer p2(mem->as_Mem(), this, NULL, false);
2078           int cmp = p1.cmp(p2);
2079           if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) {
2080             mem = mem->in(MemNode::Memory);
2081           } else {
2082             break; // dependent memory
2083           }
2084         }
2085         Node* adr = low_adr->in(MemNode::Address);
2086         const TypePtr* atyp = n->adr_type();
2087         vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n), control_dependency(p));
2088         vlen_in_bytes = vn->as_LoadVector()->memory_size();
2089       } else if (n->is_Store()) {
2090         // Promote value to be stored to vector
2091         Node* val = vector_opd(p, MemNode::ValueIn);
2092         if (val == NULL) {
2093           if (do_reserve_copy()) {
2094             NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: val should not be NULL, exiting SuperWord");})
2095             return; //and reverse to backup IG
2096           }
2097           ShouldNotReachHere();
2098         }
2099 
2100         Node* ctl = n->in(MemNode::Control);
2101         Node* mem = first->in(MemNode::Memory);
2102         Node* adr = low_adr->in(MemNode::Address);
2103         const TypePtr* atyp = n->adr_type();
2104         vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
2105         vlen_in_bytes = vn->as_StoreVector()->memory_size();
2106       } else if (n->req() == 3 && !is_cmov_pack(p)) {
2107         // Promote operands to vector
2108         Node* in1 = NULL;
2109         bool node_isa_reduction = n->is_reduction();
2110         if (node_isa_reduction) {
2111           // the input to the first reduction operation is retained
2112           in1 = low_adr->in(1);
2113         } else {
2114           in1 = vector_opd(p, 1);
2115           if (in1 == NULL) {
2116             if (do_reserve_copy()) {
2117               NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in1 should not be NULL, exiting SuperWord");})
2118               return; //and reverse to backup IG
2119             }
2120             ShouldNotReachHere();
2121           }
2122         }
2123         Node* in2 = vector_opd(p, 2);
2124         if (in2 == NULL) {
2125           if (do_reserve_copy()) {
2126             NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in2 should not be NULL, exiting SuperWord");})
2127             return; //and reverse to backup IG
2128           }
2129           ShouldNotReachHere();
2130         }
2131         if (VectorNode::is_invariant_vector(in1) && (node_isa_reduction == false) && (n->is_Add() || n->is_Mul())) {
2132           // Move invariant vector input into second position to avoid register spilling.
2133           Node* tmp = in1;
2134           in1 = in2;
2135           in2 = tmp;
2136         }
2137         if (node_isa_reduction) {
2138           const Type *arith_type = n->bottom_type();
2139           vn = ReductionNode::make(opc, NULL, in1, in2, arith_type->basic_type());
2140           if (in2->is_Load()) {
2141             vlen_in_bytes = in2->as_LoadVector()->memory_size();
2142           } else {
2143             vlen_in_bytes = in2->as_Vector()->length_in_bytes();
2144           }
2145         } else {
2146           vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
2147           vlen_in_bytes = vn->as_Vector()->length_in_bytes();
2148         }
2149       } else if (opc == Op_SqrtD || opc == Op_AbsF || opc == Op_AbsD || opc == Op_NegF || opc == Op_NegD) {
2150         // Promote operand to vector (Sqrt/Abs/Neg are 2 address instructions)
2151         Node* in = vector_opd(p, 1);
2152         vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n));
2153         vlen_in_bytes = vn->as_Vector()->length_in_bytes();
2154       } else if (is_cmov_pack(p)) {
2155         if (!n->is_CMove()) {
2156           continue;
2157         }
2158         // place here CMoveVDNode
2159         NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: print before CMove vectorization"); print_loop(false);})
2160         Node* bol = n->in(CMoveNode::Condition);
2161         if (!bol->is_Bool() && bol->Opcode() == Op_ExtractI && bol->req() > 1 ) {
2162           NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d is not Bool node, trying its in(1) node %d", bol->_idx, bol->in(1)->_idx); bol->dump(); bol->in(1)->dump();})
2163           bol = bol->in(1); //may be ExtractNode
2164         }
2165 
2166         assert(bol->is_Bool(), "should be BoolNode - too late to bail out!");
2167         if (!bol->is_Bool()) {
2168           if (do_reserve_copy()) {
2169             NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: expected %d bool node, exiting SuperWord", bol->_idx); bol->dump();})
2170             return; //and reverse to backup IG
2171           }
2172           ShouldNotReachHere();
2173         }
2174 
2175         int cond = (int)bol->as_Bool()->_test._test;
2176         Node* in_cc  = _igvn.intcon(cond);
2177         NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created intcon in_cc node %d", in_cc->_idx); in_cc->dump();})
2178         Node* cc = bol->clone();
2179         cc->set_req(1, in_cc);
2180         NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created bool cc node %d", cc->_idx); cc->dump();})
2181 
2182         Node* src1 = vector_opd(p, 2); //2=CMoveNode::IfFalse
2183         if (src1 == NULL) {
2184           if (do_reserve_copy()) {
2185             NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src1 should not be NULL, exiting SuperWord");})
2186             return; //and reverse to backup IG
2187           }
2188           ShouldNotReachHere();
2189         }
2190         Node* src2 = vector_opd(p, 3); //3=CMoveNode::IfTrue
2191         if (src2 == NULL) {
2192           if (do_reserve_copy()) {
2193             NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src2 should not be NULL, exiting SuperWord");})
2194             return; //and reverse to backup IG
2195           }
2196           ShouldNotReachHere();
2197         }
2198         BasicType bt = velt_basic_type(n);
2199         const TypeVect* vt = TypeVect::make(bt, vlen);
2200         vn = new CMoveVDNode(cc, src1, src2, vt);
2201         NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created new CMove node %d: ", vn->_idx); vn->dump();})
2202       } else {
2203         if (do_reserve_copy()) {
2204           NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: ShouldNotReachHere, exiting SuperWord");})
2205           return; //and reverse to backup IG
2206         }
2207         ShouldNotReachHere();
2208       }
2209 
2210       assert(vn != NULL, "sanity");
2211       if (vn == NULL) {
2212         if (do_reserve_copy()){
2213           NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: got NULL node, cannot proceed, exiting SuperWord");})
2214           return; //and reverse to backup IG
2215         }
2216         ShouldNotReachHere();
2217       }
2218 
2219       _igvn.register_new_node_with_optimizer(vn);
2220       _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
2221       for (uint j = 0; j < p->size(); j++) {
2222         Node* pm = p->at(j);
2223         _igvn.replace_node(pm, vn);
2224       }
2225       _igvn._worklist.push(vn);
2226 
2227       if (vlen_in_bytes > max_vlen_in_bytes) {
2228         max_vlen = vlen;
2229         max_vlen_in_bytes = vlen_in_bytes;
2230       }
2231 #ifdef ASSERT
2232       if (TraceNewVectors) {
2233         tty->print("new Vector node: ");
2234         vn->dump();
2235       }
2236 #endif
2237     }
2238   }//for (int i = 0; i < _block.length(); i++)
2239 
2240   C->set_max_vector_size(max_vlen_in_bytes);
2241 
2242   if (SuperWordLoopUnrollAnalysis) {
2243     if (cl->has_passed_slp()) {
2244       uint slp_max_unroll_factor = cl->slp_max_unroll();
2245       if (slp_max_unroll_factor == max_vlen) {
2246         NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte));
2247         // For atomic unrolled loops which are vector mapped, instigate more unrolling.
2248         cl->set_notpassed_slp();
2249         C->set_major_progress();
2250         cl->mark_do_unroll_only();
2251       }
2252     }
2253   }
2254 
2255   if (do_reserve_copy()) {
2256     make_reversable.use_new();
2257   }
2258   NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("\n Final loop after SuperWord"); print_loop(true);})
2259   return;
2260 }
2261 
2262 //------------------------------vector_opd---------------------------
2263 // Create a vector operand for the nodes in pack p for operand: in(opd_idx)
2264 Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
2265   Node* p0 = p->at(0);
2266   uint vlen = p->size();
2267   Node* opd = p0->in(opd_idx);
2268 
2269   if (same_inputs(p, opd_idx)) {
2270     if (opd->is_Vector() || opd->is_LoadVector()) {
2271       assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector");
2272       if (opd_idx == 2 && VectorNode::is_shift(p0)) {
2273         NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("shift's count can't be vector");})
2274         return NULL;
2275       }
2276       return opd; // input is matching vector
2277     }
2278     if ((opd_idx == 2) && VectorNode::is_shift(p0)) {
2279       Compile* C = _phase->C;
2280       Node* cnt = opd;
2281       // Vector instructions do not mask shift count, do it here.
2282       juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
2283       const TypeInt* t = opd->find_int_type();
2284       if (t != NULL && t->is_con()) {
2285         juint shift = t->get_con();
2286         if (shift > mask) { // Unsigned cmp
2287           cnt = ConNode::make(TypeInt::make(shift & mask));
2288         }
2289       } else {
2290         if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {
2291           cnt = ConNode::make(TypeInt::make(mask));
2292           _igvn.register_new_node_with_optimizer(cnt);
2293           cnt = new AndINode(opd, cnt);
2294           _igvn.register_new_node_with_optimizer(cnt);
2295           _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
2296         }
2297         assert(opd->bottom_type()->isa_int(), "int type only");
2298         if (!opd->bottom_type()->isa_int()) {
2299           NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should be int type only");})
2300           return NULL;
2301         }
2302         // Move non constant shift count into vector register.
2303         cnt = VectorNode::shift_count(p0, cnt, vlen, velt_basic_type(p0));
2304       }
2305       if (cnt != opd) {
2306         _igvn.register_new_node_with_optimizer(cnt);
2307         _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
2308       }
2309       return cnt;
2310     }
2311     assert(!opd->is_StoreVector(), "such vector is not expected here");
2312     if (opd->is_StoreVector()) {
2313       NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("StoreVector is not expected here");})
2314       return NULL;
2315     }
2316     // Convert scalar input to vector with the same number of elements as
2317     // p0's vector. Use p0's type because size of operand's container in
2318     // vector should match p0's size regardless operand's size.
2319     const Type* p0_t = velt_type(p0);
2320     VectorNode* vn = VectorNode::scalar2vector(opd, vlen, p0_t);
2321 
2322     _igvn.register_new_node_with_optimizer(vn);
2323     _phase->set_ctrl(vn, _phase->get_ctrl(opd));
2324 #ifdef ASSERT
2325     if (TraceNewVectors) {
2326       tty->print("new Vector node: ");
2327       vn->dump();
2328     }
2329 #endif
2330     return vn;
2331   }
2332 
2333   // Insert pack operation
2334   BasicType bt = velt_basic_type(p0);
2335   PackNode* pk = PackNode::make(opd, vlen, bt);
2336   DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )
2337 
2338   for (uint i = 1; i < vlen; i++) {
2339     Node* pi = p->at(i);
2340     Node* in = pi->in(opd_idx);
2341     assert(my_pack(in) == NULL, "Should already have been unpacked");
2342     if (my_pack(in) != NULL) {
2343       NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should already have been unpacked");})
2344       return NULL;
2345     }
2346     assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
2347     pk->add_opd(in);
2348   }
2349   _igvn.register_new_node_with_optimizer(pk);
2350   _phase->set_ctrl(pk, _phase->get_ctrl(opd));
2351 #ifdef ASSERT
2352   if (TraceNewVectors) {
2353     tty->print("new Vector node: ");
2354     pk->dump();
2355   }
2356 #endif
2357   return pk;
2358 }
2359 
2360 //------------------------------insert_extracts---------------------------
2361 // If a use of pack p is not a vector use, then replace the
2362 // use with an extract operation.
2363 void SuperWord::insert_extracts(Node_List* p) {
2364   if (p->at(0)->is_Store()) return;
2365   assert(_n_idx_list.is_empty(), "empty (node,index) list");
2366 
2367   // Inspect each use of each pack member.  For each use that is
2368   // not a vector use, replace the use with an extract operation.
2369 
2370   for (uint i = 0; i < p->size(); i++) {
2371     Node* def = p->at(i);
2372     for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2373       Node* use = def->fast_out(j);
2374       for (uint k = 0; k < use->req(); k++) {
2375         Node* n = use->in(k);
2376         if (def == n) {
2377           Node_List* u_pk = my_pack(use);
2378           if ((u_pk == NULL || !is_cmov_pack(u_pk) || use->is_CMove()) && !is_vector_use(use, k)) {
2379               _n_idx_list.push(use, k);
2380           }
2381         }
2382       }
2383     }
2384   }
2385 
2386   while (_n_idx_list.is_nonempty()) {
2387     Node* use = _n_idx_list.node();
2388     int   idx = _n_idx_list.index();
2389     _n_idx_list.pop();
2390     Node* def = use->in(idx);
2391 
2392     if (def->is_reduction()) continue;
2393 
2394     // Insert extract operation
2395     _igvn.hash_delete(def);
2396     int def_pos = alignment(def) / data_size(def);
2397 
2398     Node* ex = ExtractNode::make(def, def_pos, velt_basic_type(def));


< prev index next >