20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 #include "precompiled.hpp"
25 #include "compiler/compileLog.hpp"
26 #include "libadt/vectset.hpp"
27 #include "memory/allocation.inline.hpp"
28 #include "opto/addnode.hpp"
29 #include "opto/callnode.hpp"
30 #include "opto/castnode.hpp"
31 #include "opto/convertnode.hpp"
32 #include "opto/divnode.hpp"
33 #include "opto/matcher.hpp"
34 #include "opto/memnode.hpp"
35 #include "opto/mulnode.hpp"
36 #include "opto/opcodes.hpp"
37 #include "opto/opaquenode.hpp"
38 #include "opto/superword.hpp"
39 #include "opto/vectornode.hpp"
40
41 //
42 // S U P E R W O R D T R A N S F O R M
43 //=============================================================================
44
45 //------------------------------SuperWord---------------------------
46 SuperWord::SuperWord(PhaseIdealLoop* phase) :
47 _phase(phase),
48 _igvn(phase->_igvn),
49 _arena(phase->C->comp_arena()),
50 _packset(arena(), 8, 0, NULL), // packs for the current block
51 _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
52 _block(arena(), 8, 0, NULL), // nodes in current block
53 _data_entry(arena(), 8, 0, NULL), // nodes with all inputs from outside
54 _mem_slice_head(arena(), 8, 0, NULL), // memory slice heads
55 _mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails
56 _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node
57 _clone_map(phase->C->clone_map()), // map of nodes created in cloning
58 _align_to_ref(NULL), // memory reference to align vectors to
59 _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs
60 _dg(_arena), // dependence graph
61 _visited(arena()), // visited node set
62 _post_visited(arena()), // post visited node set
63 _n_idx_list(arena(), 8), // scratch list of (node,index) pairs
64 _stk(arena(), 8, 0, NULL), // scratch stack of nodes
65 _nlist(arena(), 8, 0, NULL), // scratch list of nodes
66 _lpt(NULL), // loop tree node
67 _lp(NULL), // LoopNode
68 _bb(NULL), // basic block
69 _iv(NULL), // induction var
70 _race_possible(false), // cases where SDMU is true
71 _early_return(true), // analysis evaluations routine
72 _num_work_vecs(0), // amount of vector work we have
73 _num_reductions(0), // amount of reduction work we have
74 _do_vector_loop(phase->C->do_vector_loop()), // whether to do vectorization/simd style
75 _ii_first(-1), // first loop generation index - only if do_vector_loop()
76 _ii_last(-1), // last loop generation index - only if do_vector_loop()
77 _ii_order(arena(), 8, 0, 0)
78 {
79 #ifndef PRODUCT
80 _vector_loop_debug = 0;
81 if (_phase->C->method() != NULL) {
82 _phase->C->method()->has_option_value("VectorizeDebug", _vector_loop_debug);
83 }
84 #endif
85 }
86
87 //------------------------------transform_loop---------------------------
88 void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
89 assert(UseSuperWord, "should be");
90 // Do vectors exist on this architecture?
91 if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
92
93 assert(lpt->_head->is_CountedLoop(), "must be");
94 CountedLoopNode *cl = lpt->_head->as_CountedLoop();
95
96 if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
97
98 if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
99 // Check for no control flow in body (other than exit)
100 Node *cl_exit = cl->loopexit();
101 if (cl_exit->in(0) != lpt->_head) return;
102
103 // Make sure the are no extra control users of the loop backedge
104 if (cl->back_control()->outcnt() != 1) {
105 return;
106 }
107
108 // We only re-enter slp when we vector mapped a queried loop and we want to
109 // continue unrolling, in this case, slp is not subsequently done.
110 if (cl->do_unroll_only()) return;
111
112 // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
113 CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
114 if (pre_end == NULL) return;
115 Node *pre_opaq1 = pre_end->limit();
116 if (pre_opaq1->Opcode() != Op_Opaque1) return;
117
118 init(); // initialize data structures
119
120 set_lpt(lpt);
121 set_lp(cl);
370
371 find_adjacent_refs();
372
373 extend_packlist();
374
375 if (_do_vector_loop) {
376 if (_packset.length() == 0) {
377 #ifndef PRODUCT
378 if (TraceSuperWord) {
379 tty->print_cr("\nSuperWord::_do_vector_loop DFA could not build packset, now trying to build anyway");
380 }
381 #endif
382 pack_parallel();
383 }
384 }
385
386 combine_packs();
387
388 construct_my_pack_map();
389
390 filter_packs();
391
392 schedule();
393
394 output();
395 }
396
397 //------------------------------find_adjacent_refs---------------------------
398 // Find the adjacent memory references and create pack pairs for them.
399 // This is the initial set of packs that will then be extended by
400 // following use->def and def->use links. The align positions are
401 // assigned relative to the reference "align_to_ref"
402 void SuperWord::find_adjacent_refs() {
403 // Get list of memory operations
404 Node_List memops;
405 for (int i = 0; i < _block.length(); i++) {
406 Node* n = _block.at(i);
407 if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) &&
408 is_java_primitive(n->as_Mem()->memory_type())) {
409 int align = memory_alignment(n->as_Mem(), 0);
1050 if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) {
1051 return false;
1052 }
1053 }
1054 }
1055 return true;
1056 }
1057
1058 //------------------------------set_alignment---------------------------
1059 void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
1060 set_alignment(s1, align);
1061 if (align == top_align || align == bottom_align) {
1062 set_alignment(s2, align);
1063 } else {
1064 set_alignment(s2, align + data_size(s1));
1065 }
1066 }
1067
1068 //------------------------------data_size---------------------------
1069 int SuperWord::data_size(Node* s) {
1070 int bsize = type2aelembytes(velt_basic_type(s));
1071 assert(bsize != 0, "valid size");
1072 return bsize;
1073 }
1074
1075 //------------------------------extend_packlist---------------------------
1076 // Extend packset by following use->def and def->use links from pack members.
1077 void SuperWord::extend_packlist() {
1078 bool changed;
1079 do {
1080 packset_sort(_packset.length());
1081 changed = false;
1082 for (int i = 0; i < _packset.length(); i++) {
1083 Node_List* p = _packset.at(i);
1084 changed |= follow_use_defs(p);
1085 changed |= follow_def_uses(p);
1086 }
1087 } while (changed);
1088
1089 if (_race_possible) {
1096 #ifndef PRODUCT
1097 if (TraceSuperWord) {
1098 tty->print_cr("\nAfter extend_packlist");
1099 print_packset();
1100 }
1101 #endif
1102 }
1103
1104 //------------------------------follow_use_defs---------------------------
1105 // Extend the packset by visiting operand definitions of nodes in pack p
1106 bool SuperWord::follow_use_defs(Node_List* p) {
1107 assert(p->size() == 2, "just checking");
1108 Node* s1 = p->at(0);
1109 Node* s2 = p->at(1);
1110 assert(s1->req() == s2->req(), "just checking");
1111 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
1112
1113 if (s1->is_Load()) return false;
1114
1115 int align = alignment(s1);
1116 bool changed = false;
1117 int start = s1->is_Store() ? MemNode::ValueIn : 1;
1118 int end = s1->is_Store() ? MemNode::ValueIn+1 : s1->req();
1119 for (int j = start; j < end; j++) {
1120 Node* t1 = s1->in(j);
1121 Node* t2 = s2->in(j);
1122 if (!in_bb(t1) || !in_bb(t2))
1123 continue;
1124 if (stmts_can_pack(t1, t2, align)) {
1125 if (est_savings(t1, t2) >= 0) {
1126 Node_List* pair = new Node_List();
1127 pair->push(t1);
1128 pair->push(t2);
1129 _packset.append(pair);
1130 set_alignment(t1, t2, align);
1131 changed = true;
1132 }
1133 }
1134 }
1135 return changed;
1136 }
1137
1138 //------------------------------follow_def_uses---------------------------
1139 // Extend the packset by visiting uses of nodes in pack p
1140 bool SuperWord::follow_def_uses(Node_List* p) {
1141 bool changed = false;
1142 Node* s1 = p->at(0);
1143 Node* s2 = p->at(1);
1144 assert(p->size() == 2, "just checking");
1145 assert(s1->req() == s2->req(), "just checking");
1146 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
1147
1148 if (s1->is_Store()) return false;
1149
1150 int align = alignment(s1);
1151 int savings = -1;
1152 int num_s1_uses = 0;
1153 Node* u1 = NULL;
1154 Node* u2 = NULL;
1155 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
1156 Node* t1 = s1->fast_out(i);
1157 num_s1_uses++;
1158 if (!in_bb(t1)) continue;
1159 for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
1160 Node* t2 = s2->fast_out(j);
1161 if (!in_bb(t2)) continue;
1162 if (!opnd_positions_match(s1, t1, s2, t2))
1163 continue;
1164 if (stmts_can_pack(t1, t2, align)) {
1165 int my_savings = est_savings(t1, t2);
1166 if (my_savings > savings) {
1167 savings = my_savings;
1168 u1 = t1;
1169 u2 = t2;
1170 }
1171 }
1172 }
1173 }
1174 if (num_s1_uses > 1) {
1175 _race_possible = true;
1176 }
1177 if (savings >= 0) {
1178 Node_List* pair = new Node_List();
1179 pair->push(u1);
1180 pair->push(u2);
1181 _packset.append(pair);
1182 set_alignment(u1, u2, align);
1183 changed = true;
1184 }
1185 return changed;
1186 }
1187
1188 //------------------------------order_def_uses---------------------------
1189 // For extended packsets, ordinally arrange uses packset by major component
1190 void SuperWord::order_def_uses(Node_List* p) {
1191 Node* s1 = p->at(0);
1192
1193 if (s1->is_Store()) return;
1194
1195 // reductions are always managed beforehand
1196 if (s1->is_reduction()) return;
1197
1198 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
1199 Node* t1 = s1->fast_out(i);
1200
1201 // Only allow operand swap on commuting operations
1437 if (TraceSuperWord && Verbose) {
1438 tty->print_cr("Unprofitable");
1439 pk->at(0)->dump();
1440 }
1441 #endif
1442 remove_pack_at(i);
1443 changed = true;
1444 }
1445 }
1446 } while (changed);
1447
1448 #ifndef PRODUCT
1449 if (TraceSuperWord) {
1450 tty->print_cr("\nAfter filter_packs");
1451 print_packset();
1452 tty->cr();
1453 }
1454 #endif
1455 }
1456
1457 //------------------------------implemented---------------------------
1458 // Can code be generated for pack p?
1459 bool SuperWord::implemented(Node_List* p) {
1460 bool retValue = false;
1461 Node* p0 = p->at(0);
1462 if (p0 != NULL) {
1463 int opc = p0->Opcode();
1464 uint size = p->size();
1465 if (p0->is_reduction()) {
1466 const Type *arith_type = p0->bottom_type();
1467 // Length 2 reductions of INT/LONG do not offer performance benefits
1468 if (((arith_type->basic_type() == T_INT) || (arith_type->basic_type() == T_LONG)) && (size == 2)) {
1469 retValue = false;
1470 } else {
1471 retValue = ReductionNode::implemented(opc, size, arith_type->basic_type());
1472 }
1473 } else {
1474 retValue = VectorNode::implemented(opc, size, velt_basic_type(p0));
1475 }
1476 }
1477 return retValue;
1478 }
1479
1480 //------------------------------same_inputs--------------------------
1481 // For pack p, are all idx operands the same?
1482 static bool same_inputs(Node_List* p, int idx) {
1483 Node* p0 = p->at(0);
1484 uint vlen = p->size();
1485 Node* p0_def = p0->in(idx);
1486 for (uint i = 1; i < vlen; i++) {
1487 Node* pi = p->at(i);
1488 Node* pi_def = pi->in(idx);
1489 if (p0_def != pi_def)
1490 return false;
1491 }
1492 return true;
1493 }
1494
1495 //------------------------------profitable---------------------------
1496 // For pack p, are all operands and all uses (with in the block) vector?
1497 bool SuperWord::profitable(Node_List* p) {
1498 Node* p0 = p->at(0);
1499 uint start, end;
1500 VectorNode::vector_operands(p0, &start, &end);
1501
1502 // Return false if some inputs are not vectors or vectors with different
1503 // size or alignment.
1504 // Also, for now, return false if not scalar promotion case when inputs are
1505 // the same. Later, implement PackNode and allow differing, non-vector inputs
1506 // (maybe just the ones from outside the block.)
1507 for (uint i = start; i < end; i++) {
1508 if (!is_vector_use(p0, i))
1509 return false;
1510 }
1511 // Check if reductions are connected
1512 if (p0->is_reduction()) {
1513 Node* second_in = p0->in(2);
1514 Node_List* second_pk = my_pack(second_in);
1515 if ((second_pk == NULL) || (_num_work_vecs == _num_reductions)) {
1516 // Remove reduction flag if no parent pack or if not enough work
1517 // to cover reduction expansion overhead
1518 p0->remove_flag(Node::Flag_is_reduction);
1519 return false;
1520 } else if (second_pk->size() != p->size()) {
1521 return false;
1522 }
1523 }
1524 if (VectorNode::is_shift(p0)) {
1525 // For now, return false if shift count is vector or not scalar promotion
1526 // case (different shift counts) because it is not supported yet.
1527 Node* cnt = p0->in(2);
1528 Node_List* cnt_pk = my_pack(cnt);
1529 if (cnt_pk != NULL)
1530 return false;
1531 if (!same_inputs(p, 2))
1532 return false;
1533 }
1534 if (!p0->is_Store()) {
1535 // For now, return false if not all uses are vector.
1536 // Later, implement ExtractNode and allow non-vector uses (maybe
1537 // just the ones outside the block.)
1538 for (uint i = 0; i < p->size(); i++) {
1539 Node* def = p->at(i);
1540 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1541 Node* use = def->fast_out(j);
1542 for (uint k = 0; k < use->req(); k++) {
1543 Node* n = use->in(k);
1544 if (def == n) {
1545 // reductions can be loop carried dependences
1546 if (def->is_reduction() && use->is_Phi())
1547 continue;
1548 if (!is_vector_use(use, k)) {
1549 return false;
1550 }
1551 }
1552 }
1553 }
1554 }
1555 }
1556 return true;
1557 }
1558
1559 //------------------------------schedule---------------------------
1746 for (Node* current = last_mem; current != ld->in(MemNode::Memory);
1747 current=current->in(MemNode::Memory)) {
1748 assert(current != first_mem, "corrupted memory graph");
1749 if(current->is_Mem() && !independent(current, ld)){
1750 schedule_last = false; // a later store depends on this load
1751 break;
1752 }
1753 }
1754 }
1755
1756 Node* mem_input = schedule_last ? last_mem : first_mem;
1757 _igvn.hash_delete(mem_input);
1758 // Give each load the same memory state
1759 for (uint i = 0; i < pk->size(); i++) {
1760 LoadNode* ld = pk->at(i)->as_Load();
1761 _igvn.replace_input_of(ld, MemNode::Memory, mem_input);
1762 }
1763 }
1764 }
1765
1766 //------------------------------output---------------------------
1767 // Convert packs into vector node operations
1768 void SuperWord::output() {
1769 if (_packset.length() == 0) return;
1770
1771 #ifndef PRODUCT
1772 if (TraceLoopOpts) {
1773 tty->print("SuperWord ");
1774 lpt()->dump_head();
1775 }
1776 #endif
1777
1778 // MUST ENSURE main loop's initial value is properly aligned:
1779 // (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
1780
1781 align_initial_loop_index(align_to_ref());
1782
1783 // Insert extract (unpack) operations for scalar uses
1784 for (int i = 0; i < _packset.length(); i++) {
1785 insert_extracts(_packset.at(i));
1786 }
1787
1788 Compile* C = _phase->C;
1789 CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
1790 uint max_vlen_in_bytes = 0;
1791 uint max_vlen = 0;
1792 for (int i = 0; i < _block.length(); i++) {
1793 Node* n = _block.at(i);
1794 Node_List* p = my_pack(n);
1795 if (p && n == executed_last(p)) {
1796 uint vlen = p->size();
1797 uint vlen_in_bytes = 0;
1798 Node* vn = NULL;
1799 Node* low_adr = p->at(0);
1800 Node* first = executed_first(p);
1801 int opc = n->Opcode();
1802 if (n->is_Load()) {
1803 Node* ctl = n->in(MemNode::Control);
1804 Node* mem = first->in(MemNode::Memory);
1805 SWPointer p1(n->as_Mem(), this, NULL, false);
1806 // Identify the memory dependency for the new loadVector node by
1807 // walking up through memory chain.
1808 // This is done to give flexibility to the new loadVector node so that
1809 // it can move above independent storeVector nodes.
1810 while (mem->is_StoreVector()) {
1811 SWPointer p2(mem->as_Mem(), this, NULL, false);
1812 int cmp = p1.cmp(p2);
1813 if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) {
1814 mem = mem->in(MemNode::Memory);
1815 } else {
1816 break; // dependent memory
1817 }
1818 }
1819 Node* adr = low_adr->in(MemNode::Address);
1820 const TypePtr* atyp = n->adr_type();
1821 vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n), control_dependency(p));
1822 vlen_in_bytes = vn->as_LoadVector()->memory_size();
1823 } else if (n->is_Store()) {
1824 // Promote value to be stored to vector
1825 Node* val = vector_opd(p, MemNode::ValueIn);
1826 Node* ctl = n->in(MemNode::Control);
1827 Node* mem = first->in(MemNode::Memory);
1828 Node* adr = low_adr->in(MemNode::Address);
1829 const TypePtr* atyp = n->adr_type();
1830 vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
1831 vlen_in_bytes = vn->as_StoreVector()->memory_size();
1832 } else if (n->req() == 3) {
1833 // Promote operands to vector
1834 Node* in1 = NULL;
1835 bool node_isa_reduction = n->is_reduction();
1836 if (node_isa_reduction) {
1837 // the input to the first reduction operation is retained
1838 in1 = low_adr->in(1);
1839 } else {
1840 in1 = vector_opd(p, 1);
1841 }
1842 Node* in2 = vector_opd(p, 2);
1843 if (VectorNode::is_invariant_vector(in1) && (node_isa_reduction == false) && (n->is_Add() || n->is_Mul())) {
1844 // Move invariant vector input into second position to avoid register spilling.
1845 Node* tmp = in1;
1846 in1 = in2;
1847 in2 = tmp;
1848 }
1849 if (node_isa_reduction) {
1850 const Type *arith_type = n->bottom_type();
1851 vn = ReductionNode::make(opc, NULL, in1, in2, arith_type->basic_type());
1852 if (in2->is_Load()) {
1853 vlen_in_bytes = in2->as_LoadVector()->memory_size();
1854 } else {
1855 vlen_in_bytes = in2->as_Vector()->length_in_bytes();
1856 }
1857 } else {
1858 vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
1859 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
1860 }
1861 } else if (opc == Op_SqrtD || opc == Op_AbsF || opc == Op_AbsD || opc == Op_NegF || opc == Op_NegD) {
1862 // Promote operand to vector (Sqrt/Abs/Neg are 2 address instructions)
1863 Node* in = vector_opd(p, 1);
1864 vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n));
1865 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
1866 } else {
1867 ShouldNotReachHere();
1868 }
1869 assert(vn != NULL, "sanity");
1870 _igvn.register_new_node_with_optimizer(vn);
1871 _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
1872 for (uint j = 0; j < p->size(); j++) {
1873 Node* pm = p->at(j);
1874 _igvn.replace_node(pm, vn);
1875 }
1876 _igvn._worklist.push(vn);
1877
1878 if (vlen_in_bytes > max_vlen_in_bytes) {
1879 max_vlen = vlen;
1880 max_vlen_in_bytes = vlen_in_bytes;
1881 }
1882 #ifdef ASSERT
1883 if (TraceNewVectors) {
1884 tty->print("new Vector node: ");
1885 vn->dump();
1886 }
1887 #endif
1888 }
1889 }
1890 C->set_max_vector_size(max_vlen_in_bytes);
1891 if (SuperWordLoopUnrollAnalysis) {
1892 if (cl->has_passed_slp()) {
1893 uint slp_max_unroll_factor = cl->slp_max_unroll();
1894 if (slp_max_unroll_factor == max_vlen) {
1895 NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte));
1896 // For atomic unrolled loops which are vector mapped, instigate more unrolling.
1897 cl->set_notpassed_slp();
1898 C->set_major_progress();
1899 cl->mark_do_unroll_only();
1900 }
1901 }
1902 }
1903 }
1904
1905 //------------------------------vector_opd---------------------------
1906 // Create a vector operand for the nodes in pack p for operand: in(opd_idx)
1907 Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
1908 Node* p0 = p->at(0);
1909 uint vlen = p->size();
1910 Node* opd = p0->in(opd_idx);
1911
1912 if (same_inputs(p, opd_idx)) {
1913 if (opd->is_Vector() || opd->is_LoadVector()) {
1914 assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector");
1915 return opd; // input is matching vector
1916 }
1917 if ((opd_idx == 2) && VectorNode::is_shift(p0)) {
1918 Compile* C = _phase->C;
1919 Node* cnt = opd;
1920 // Vector instructions do not mask shift count, do it here.
1921 juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
1922 const TypeInt* t = opd->find_int_type();
1923 if (t != NULL && t->is_con()) {
1924 juint shift = t->get_con();
1925 if (shift > mask) { // Unsigned cmp
1926 cnt = ConNode::make(TypeInt::make(shift & mask));
1927 }
1928 } else {
1929 if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {
1930 cnt = ConNode::make(TypeInt::make(mask));
1931 _igvn.register_new_node_with_optimizer(cnt);
1932 cnt = new AndINode(opd, cnt);
1933 _igvn.register_new_node_with_optimizer(cnt);
1934 _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
1935 }
1936 assert(opd->bottom_type()->isa_int(), "int type only");
1937 // Move non constant shift count into vector register.
1938 cnt = VectorNode::shift_count(p0, cnt, vlen, velt_basic_type(p0));
1939 }
1940 if (cnt != opd) {
1941 _igvn.register_new_node_with_optimizer(cnt);
1942 _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
1943 }
1944 return cnt;
1945 }
1946 assert(!opd->is_StoreVector(), "such vector is not expected here");
1947 // Convert scalar input to vector with the same number of elements as
1948 // p0's vector. Use p0's type because size of operand's container in
1949 // vector should match p0's size regardless operand's size.
1950 const Type* p0_t = velt_type(p0);
1951 VectorNode* vn = VectorNode::scalar2vector(opd, vlen, p0_t);
1952
1953 _igvn.register_new_node_with_optimizer(vn);
1954 _phase->set_ctrl(vn, _phase->get_ctrl(opd));
1955 #ifdef ASSERT
1956 if (TraceNewVectors) {
1957 tty->print("new Vector node: ");
1958 vn->dump();
1959 }
1960 #endif
1961 return vn;
1962 }
1963
1964 // Insert pack operation
1965 BasicType bt = velt_basic_type(p0);
1966 PackNode* pk = PackNode::make(opd, vlen, bt);
1967 DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )
1968
1969 for (uint i = 1; i < vlen; i++) {
1970 Node* pi = p->at(i);
1971 Node* in = pi->in(opd_idx);
1972 assert(my_pack(in) == NULL, "Should already have been unpacked");
1973 assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
1974 pk->add_opd(in);
1975 }
1976 _igvn.register_new_node_with_optimizer(pk);
1977 _phase->set_ctrl(pk, _phase->get_ctrl(opd));
1978 #ifdef ASSERT
1979 if (TraceNewVectors) {
1980 tty->print("new Vector node: ");
1981 pk->dump();
1982 }
1983 #endif
1984 return pk;
1985 }
1986
1987 //------------------------------insert_extracts---------------------------
1988 // If a use of pack p is not a vector use, then replace the
1989 // use with an extract operation.
1990 void SuperWord::insert_extracts(Node_List* p) {
1991 if (p->at(0)->is_Store()) return;
1992 assert(_n_idx_list.is_empty(), "empty (node,index) list");
1993
1994 // Inspect each use of each pack member. For each use that is
1995 // not a vector use, replace the use with an extract operation.
1996
1997 for (uint i = 0; i < p->size(); i++) {
1998 Node* def = p->at(i);
1999 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2000 Node* use = def->fast_out(j);
2001 for (uint k = 0; k < use->req(); k++) {
2002 Node* n = use->in(k);
2003 if (def == n) {
2004 if (!is_vector_use(use, k)) {
2005 _n_idx_list.push(use, k);
2006 }
2007 }
2008 }
2009 }
2010 }
2011
2012 while (_n_idx_list.is_nonempty()) {
2013 Node* use = _n_idx_list.node();
2014 int idx = _n_idx_list.index();
2015 _n_idx_list.pop();
2016 Node* def = use->in(idx);
2017
2018 if (def->is_reduction()) continue;
2019
2020 // Insert extract operation
2021 _igvn.hash_delete(def);
2022 int def_pos = alignment(def) / data_size(def);
2023
2024 Node* ex = ExtractNode::make(def, def_pos, velt_basic_type(def));
|
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 #include "precompiled.hpp"
25 #include "compiler/compileLog.hpp"
26 #include "libadt/vectset.hpp"
27 #include "memory/allocation.inline.hpp"
28 #include "opto/addnode.hpp"
29 #include "opto/callnode.hpp"
30 #include "opto/castnode.hpp"
31 #include "opto/convertnode.hpp"
32 #include "opto/divnode.hpp"
33 #include "opto/matcher.hpp"
34 #include "opto/memnode.hpp"
35 #include "opto/mulnode.hpp"
36 #include "opto/opcodes.hpp"
37 #include "opto/opaquenode.hpp"
38 #include "opto/superword.hpp"
39 #include "opto/vectornode.hpp"
40 #include "opto/movenode.hpp"
41
42 //
43 // S U P E R W O R D T R A N S F O R M
44 //=============================================================================
45
46 //------------------------------SuperWord---------------------------
47 SuperWord::SuperWord(PhaseIdealLoop* phase) :
48 _phase(phase),
49 _igvn(phase->_igvn),
50 _arena(phase->C->comp_arena()),
51 _packset(arena(), 8, 0, NULL), // packs for the current block
52 _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
53 _block(arena(), 8, 0, NULL), // nodes in current block
54 _data_entry(arena(), 8, 0, NULL), // nodes with all inputs from outside
55 _mem_slice_head(arena(), 8, 0, NULL), // memory slice heads
56 _mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails
57 _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node
58 _clone_map(phase->C->clone_map()), // map of nodes created in cloning
59 _cmovev_kit(_arena, this), // map to facilitate CMoveVD creation
60 _align_to_ref(NULL), // memory reference to align vectors to
61 _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs
62 _dg(_arena), // dependence graph
63 _visited(arena()), // visited node set
64 _post_visited(arena()), // post visited node set
65 _n_idx_list(arena(), 8), // scratch list of (node,index) pairs
66 _stk(arena(), 8, 0, NULL), // scratch stack of nodes
67 _nlist(arena(), 8, 0, NULL), // scratch list of nodes
68 _lpt(NULL), // loop tree node
69 _lp(NULL), // LoopNode
70 _bb(NULL), // basic block
71 _iv(NULL), // induction var
72 _race_possible(false), // cases where SDMU is true
73 _early_return(true), // analysis evaluations routine
74 _num_work_vecs(0), // amount of vector work we have
75 _num_reductions(0), // amount of reduction work we have
76 _do_vector_loop(phase->C->do_vector_loop()), // whether to do vectorization/simd style
77 _do_reserve_copy(DoReserveCopyInSuperWord),
78 _ii_first(-1), // first loop generation index - only if do_vector_loop()
79 _ii_last(-1), // last loop generation index - only if do_vector_loop()
80 _ii_order(arena(), 8, 0, 0)
81 {
82 #ifndef PRODUCT
83 _vector_loop_debug = 0;
84 if (_phase->C->method() != NULL) {
85 _phase->C->method()->has_option_value("VectorizeDebug", _vector_loop_debug);
86 }
87 #endif
88 }
89
90 //------------------------------transform_loop---------------------------
91 void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
92 assert(UseSuperWord, "should be");
93 // Do vectors exist on this architecture?
94 if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
95
96 assert(lpt->_head->is_CountedLoop(), "must be");
97 CountedLoopNode *cl = lpt->_head->as_CountedLoop();
98
99 if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
100
101 if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
102 // Check for no control flow in body (other than exit)
103 Node *cl_exit = cl->loopexit();
104 if (cl_exit->in(0) != lpt->_head) {
105 #ifndef PRODUCT
106 if (TraceSuperWord) {
107 tty->print_cr("SuperWord::transform_loop: loop too complicated, cl_exit->in(0) != lpt->_head");
108 tty->print("cl_exit %d", cl_exit->_idx); cl_exit->dump();
109 tty->print("cl_exit->in(0) %d", cl_exit->in(0)->_idx); cl_exit->in(0)->dump();
110 tty->print("lpt->_head %d", lpt->_head->_idx); lpt->_head->dump();
111 lpt->dump_head();
112 }
113 #endif
114 return;
115 }
116
117 // Make sure the are no extra control users of the loop backedge
118 if (cl->back_control()->outcnt() != 1) {
119 return;
120 }
121
122 // We only re-enter slp when we vector mapped a queried loop and we want to
123 // continue unrolling, in this case, slp is not subsequently done.
124 if (cl->do_unroll_only()) return;
125
126 // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
127 CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
128 if (pre_end == NULL) return;
129 Node *pre_opaq1 = pre_end->limit();
130 if (pre_opaq1->Opcode() != Op_Opaque1) return;
131
132 init(); // initialize data structures
133
134 set_lpt(lpt);
135 set_lp(cl);
384
385 find_adjacent_refs();
386
387 extend_packlist();
388
389 if (_do_vector_loop) {
390 if (_packset.length() == 0) {
391 #ifndef PRODUCT
392 if (TraceSuperWord) {
393 tty->print_cr("\nSuperWord::_do_vector_loop DFA could not build packset, now trying to build anyway");
394 }
395 #endif
396 pack_parallel();
397 }
398 }
399
400 combine_packs();
401
402 construct_my_pack_map();
403
404 if (_do_vector_loop) {
405 merge_packs_to_cmovd();
406 }
407
408 filter_packs();
409
410 schedule();
411
412 output();
413 }
414
415 //------------------------------find_adjacent_refs---------------------------
416 // Find the adjacent memory references and create pack pairs for them.
417 // This is the initial set of packs that will then be extended by
418 // following use->def and def->use links. The align positions are
419 // assigned relative to the reference "align_to_ref"
420 void SuperWord::find_adjacent_refs() {
421 // Get list of memory operations
422 Node_List memops;
423 for (int i = 0; i < _block.length(); i++) {
424 Node* n = _block.at(i);
425 if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) &&
426 is_java_primitive(n->as_Mem()->memory_type())) {
427 int align = memory_alignment(n->as_Mem(), 0);
1068 if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) {
1069 return false;
1070 }
1071 }
1072 }
1073 return true;
1074 }
1075
1076 //------------------------------set_alignment---------------------------
1077 void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
1078 set_alignment(s1, align);
1079 if (align == top_align || align == bottom_align) {
1080 set_alignment(s2, align);
1081 } else {
1082 set_alignment(s2, align + data_size(s1));
1083 }
1084 }
1085
1086 //------------------------------data_size---------------------------
1087 int SuperWord::data_size(Node* s) {
1088 Node* use = NULL; //test if the node is a candidate for CMoveVD optimization, then return the size of CMov
1089 if (_do_vector_loop) {
1090 use = _cmovev_kit.is_Bool_candidate(s);
1091 if (use != NULL) {
1092 return data_size(use);
1093 }
1094 use = _cmovev_kit.is_CmpD_candidate(s);
1095 if (use != NULL) {
1096 return data_size(use);
1097 }
1098 }
1099 int bsize = type2aelembytes(velt_basic_type(s));
1100 assert(bsize != 0, "valid size");
1101 return bsize;
1102 }
1103
1104 //------------------------------extend_packlist---------------------------
1105 // Extend packset by following use->def and def->use links from pack members.
1106 void SuperWord::extend_packlist() {
1107 bool changed;
1108 do {
1109 packset_sort(_packset.length());
1110 changed = false;
1111 for (int i = 0; i < _packset.length(); i++) {
1112 Node_List* p = _packset.at(i);
1113 changed |= follow_use_defs(p);
1114 changed |= follow_def_uses(p);
1115 }
1116 } while (changed);
1117
1118 if (_race_possible) {
1125 #ifndef PRODUCT
1126 if (TraceSuperWord) {
1127 tty->print_cr("\nAfter extend_packlist");
1128 print_packset();
1129 }
1130 #endif
1131 }
1132
1133 //------------------------------follow_use_defs---------------------------
1134 // Extend the packset by visiting operand definitions of nodes in pack p
1135 bool SuperWord::follow_use_defs(Node_List* p) {
1136 assert(p->size() == 2, "just checking");
1137 Node* s1 = p->at(0);
1138 Node* s2 = p->at(1);
1139 assert(s1->req() == s2->req(), "just checking");
1140 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
1141
1142 if (s1->is_Load()) return false;
1143
1144 int align = alignment(s1);
1145 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: s1 %d, align %d", s1->_idx, align);)
1146 bool changed = false;
1147 int start = s1->is_Store() ? MemNode::ValueIn : 1;
1148 int end = s1->is_Store() ? MemNode::ValueIn+1 : s1->req();
1149 for (int j = start; j < end; j++) {
1150 Node* t1 = s1->in(j);
1151 Node* t2 = s2->in(j);
1152 if (!in_bb(t1) || !in_bb(t2))
1153 continue;
1154 if (stmts_can_pack(t1, t2, align)) {
1155 if (est_savings(t1, t2) >= 0) {
1156 Node_List* pair = new Node_List();
1157 pair->push(t1);
1158 pair->push(t2);
1159 _packset.append(pair);
1160 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: set_alignment(%d, %d, %d)", t1->_idx, t2->_idx, align);)
1161 set_alignment(t1, t2, align);
1162 changed = true;
1163 }
1164 }
1165 }
1166 return changed;
1167 }
1168
1169 //------------------------------follow_def_uses---------------------------
1170 // Extend the packset by visiting uses of nodes in pack p
1171 bool SuperWord::follow_def_uses(Node_List* p) {
1172 bool changed = false;
1173 Node* s1 = p->at(0);
1174 Node* s2 = p->at(1);
1175 assert(p->size() == 2, "just checking");
1176 assert(s1->req() == s2->req(), "just checking");
1177 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
1178
1179 if (s1->is_Store()) return false;
1180
1181 int align = alignment(s1);
1182 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: s1 %d, align %d", s1->_idx, align);)
1183 int savings = -1;
1184 int num_s1_uses = 0;
1185 Node* u1 = NULL;
1186 Node* u2 = NULL;
1187 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
1188 Node* t1 = s1->fast_out(i);
1189 num_s1_uses++;
1190 if (!in_bb(t1)) continue;
1191 for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
1192 Node* t2 = s2->fast_out(j);
1193 if (!in_bb(t2)) continue;
1194 if (!opnd_positions_match(s1, t1, s2, t2))
1195 continue;
1196 if (stmts_can_pack(t1, t2, align)) {
1197 int my_savings = est_savings(t1, t2);
1198 if (my_savings > savings) {
1199 savings = my_savings;
1200 u1 = t1;
1201 u2 = t2;
1202 }
1203 }
1204 }
1205 }
1206 if (num_s1_uses > 1) {
1207 _race_possible = true;
1208 }
1209 if (savings >= 0) {
1210 Node_List* pair = new Node_List();
1211 pair->push(u1);
1212 pair->push(u2);
1213 _packset.append(pair);
1214 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: set_alignment(%d, %d, %d)", u1->_idx, u2->_idx, align);)
1215 set_alignment(u1, u2, align);
1216 changed = true;
1217 }
1218 return changed;
1219 }
1220
1221 //------------------------------order_def_uses---------------------------
1222 // For extended packsets, ordinally arrange uses packset by major component
1223 void SuperWord::order_def_uses(Node_List* p) {
1224 Node* s1 = p->at(0);
1225
1226 if (s1->is_Store()) return;
1227
1228 // reductions are always managed beforehand
1229 if (s1->is_reduction()) return;
1230
1231 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
1232 Node* t1 = s1->fast_out(i);
1233
1234 // Only allow operand swap on commuting operations
1470 if (TraceSuperWord && Verbose) {
1471 tty->print_cr("Unprofitable");
1472 pk->at(0)->dump();
1473 }
1474 #endif
1475 remove_pack_at(i);
1476 changed = true;
1477 }
1478 }
1479 } while (changed);
1480
1481 #ifndef PRODUCT
1482 if (TraceSuperWord) {
1483 tty->print_cr("\nAfter filter_packs");
1484 print_packset();
1485 tty->cr();
1486 }
1487 #endif
1488 }
1489
1490 //------------------------------merge_packs_to_cmovd---------------------------
1491 // Merge CMoveD into new vector-nodes
1492 // We want to catch this pattern and subsume CmpD and Bool into CMoveD
1493 //
1494 // SubD ConD
1495 // / | /
1496 // / | / /
1497 // / | / /
1498 // / | / /
1499 // / / /
1500 // / / | /
1501 // v / | /
1502 // CmpD | /
1503 // | | /
1504 // v | /
1505 // Bool | /
1506 // \ | /
1507 // \ | /
1508 // \ | /
1509 // \ | /
1510 // \ v /
1511 // CMoveD
1512 //
1513
1514 void SuperWord::merge_packs_to_cmovd() {
1515 for (int i = _packset.length() - 1; i >= 0; i--) {
1516 _cmovev_kit.make_cmovevd_pack(_packset.at(i));
1517 }
1518 #ifndef PRODUCT
1519 if (TraceSuperWord) {
1520 tty->print_cr("\nSuperWord::merge_packs_to_cmovd(): After merge");
1521 print_packset();
1522 tty->cr();
1523 }
1524 #endif
1525 }
1526
1527 Node* CMoveKit::is_Bool_candidate(Node* def) const {
1528 Node* use = NULL;
1529 if (!def->is_Bool() || def->in(0) != NULL || def->outcnt() != 1) {
1530 return NULL;
1531 }
1532 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1533 use = def->fast_out(j);
1534 if (!_sw->same_generation(def, use) || !use->is_CMove()) {
1535 return NULL;
1536 }
1537 }
1538 return use;
1539 }
1540
1541 Node* CMoveKit::is_CmpD_candidate(Node* def) const {
1542 Node* use = NULL;
1543 if (!def->is_Cmp() || def->in(0) != NULL || def->outcnt() != 1) {
1544 return NULL;
1545 }
1546 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1547 use = def->fast_out(j);
1548 if (!_sw->same_generation(def, use) || (use = is_Bool_candidate(use)) == NULL || !_sw->same_generation(def, use)) {
1549 return NULL;
1550 }
1551 }
1552 return use;
1553 }
1554
1555 Node_List* CMoveKit::make_cmovevd_pack(Node_List* cmovd_pk) {
1556 Node *cmovd = cmovd_pk->at(0);
1557 if (!cmovd->is_CMove()) {
1558 return NULL;
1559 }
1560 if (pack(cmovd) != NULL) { // already in the cmov pack
1561 return NULL;
1562 }
1563 if (cmovd->in(0) != NULL) {
1564 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CMoveD %d has control flow, escaping...", cmovd->_idx); cmovd->dump();})
1565 return NULL;
1566 }
1567
1568 Node* bol = cmovd->as_CMove()->in(CMoveNode::Condition);
1569 if (!bol->is_Bool()
1570 || bol->outcnt() != 1
1571 || !_sw->same_generation(bol, cmovd)
1572 || bol->in(0) != NULL // BoolNode has control flow!!
1573 || _sw->my_pack(bol) == NULL) {
1574 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: Bool %d does not fit CMoveD %d for building vector, escaping...", bol->_idx, cmovd->_idx); bol->dump();})
1575 return NULL;
1576 }
1577 Node_List* bool_pk = _sw->my_pack(bol);
1578 if (bool_pk->size() != cmovd_pk->size() ) {
1579 return NULL;
1580 }
1581
1582 Node* cmpd = bol->in(1);
1583 if (!cmpd->is_Cmp()
1584 || cmpd->outcnt() != 1
1585 || !_sw->same_generation(cmpd, cmovd)
1586 || cmpd->in(0) != NULL // CmpDNode has control flow!!
1587 || _sw->my_pack(cmpd) == NULL) {
1588 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CmpD %d does not fit CMoveD %d for building vector, escaping...", cmpd->_idx, cmovd->_idx); cmpd->dump();})
1589 return NULL;
1590 }
1591 Node_List* cmpd_pk = _sw->my_pack(cmpd);
1592 if (cmpd_pk->size() != cmovd_pk->size() ) {
1593 return NULL;
1594 }
1595
1596 if (!test_cmpd_pack(cmpd_pk, cmovd_pk)) {
1597 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: cmpd pack for CmpD %d failed vectorization test", cmpd->_idx); cmpd->dump();})
1598 return NULL;
1599 }
1600
1601 Node_List* new_cmpd_pk = new Node_List();
1602 uint sz = cmovd_pk->size() - 1;
1603 for (uint i = 0; i <= sz; ++i) {
1604 Node* cmov = cmovd_pk->at(i);
1605 Node* bol = bool_pk->at(i);
1606 Node* cmp = cmpd_pk->at(i);
1607
1608 new_cmpd_pk->insert(i, cmov);
1609
1610 map(cmov, new_cmpd_pk);
1611 map(bol, new_cmpd_pk);
1612 map(cmp, new_cmpd_pk);
1613
1614 _sw->set_my_pack(cmov, new_cmpd_pk); // and keep old packs for cmp and bool
1615 }
1616 _sw->_packset.remove(cmovd_pk);
1617 _sw->_packset.remove(bool_pk);
1618 _sw->_packset.remove(cmpd_pk);
1619 _sw->_packset.append(new_cmpd_pk);
1620 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print_cr("CMoveKit::make_cmovevd_pack: added syntactic CMoveD pack"); _sw->print_pack(new_cmpd_pk);})
1621 return new_cmpd_pk;
1622 }
1623
1624 bool CMoveKit::test_cmpd_pack(Node_List* cmpd_pk, Node_List* cmovd_pk) {
1625 Node* cmpd0 = cmpd_pk->at(0);
1626 assert(cmpd0->is_Cmp(), "CMoveKit::test_cmpd_pack: should be CmpDNode");
1627 assert(cmovd_pk->at(0)->is_CMove(), "CMoveKit::test_cmpd_pack: should be CMoveD");
1628 assert(cmpd_pk->size() == cmovd_pk->size(), "CMoveKit::test_cmpd_pack: should be same size");
1629 Node* in1 = cmpd0->in(1);
1630 Node* in2 = cmpd0->in(2);
1631 Node_List* in1_pk = _sw->my_pack(in1);
1632 Node_List* in2_pk = _sw->my_pack(in2);
1633
1634 if (in1_pk != NULL && in1_pk->size() != cmpd_pk->size()
1635 || in2_pk != NULL && in2_pk->size() != cmpd_pk->size() ) {
1636 return false;
1637 }
1638
1639 // test if "all" in1 are in the same pack or the same node
1640 if (in1_pk == NULL) {
1641 for (uint j = 1; j < cmpd_pk->size(); j++) {
1642 if (cmpd_pk->at(j)->in(1) != in1) {
1643 return false;
1644 }
1645 }//for: in1_pk is not pack but all CmpD nodes in the pack have the same in(1)
1646 }
1647 // test if "all" in2 are in the same pack or the same node
1648 if (in2_pk == NULL) {
1649 for (uint j = 1; j < cmpd_pk->size(); j++) {
1650 if (cmpd_pk->at(j)->in(2) != in2) {
1651 return false;
1652 }
1653 }//for: in2_pk is not pack but all CmpD nodes in the pack have the same in(2)
1654 }
1655 //now check if cmpd_pk may be subsumed in vector built for cmovd_pk
1656 int cmovd_ind1, cmovd_ind2;
1657 if (cmpd_pk->at(0)->in(1) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse)
1658 && cmpd_pk->at(0)->in(2) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) {
1659 cmovd_ind1 = CMoveNode::IfFalse;
1660 cmovd_ind2 = CMoveNode::IfTrue;
1661 } else if (cmpd_pk->at(0)->in(2) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse)
1662 && cmpd_pk->at(0)->in(1) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) {
1663 cmovd_ind2 = CMoveNode::IfFalse;
1664 cmovd_ind1 = CMoveNode::IfTrue;
1665 }
1666 else {
1667 return false;
1668 }
1669
1670 for (uint j = 1; j < cmpd_pk->size(); j++) {
1671 if (cmpd_pk->at(j)->in(1) != cmovd_pk->at(j)->as_CMove()->in(cmovd_ind1)
1672 || cmpd_pk->at(j)->in(2) != cmovd_pk->at(j)->as_CMove()->in(cmovd_ind2)) {
1673 return false;
1674 }//if
1675 }
1676 NOT_PRODUCT(if(_sw->is_trace_cmov()) { tty->print("CMoveKit::test_cmpd_pack: cmpd pack for 1st CmpD %d is OK for vectorization: ", cmpd0->_idx); cmpd0->dump(); })
1677 return true;
1678 }
1679
1680 //------------------------------implemented---------------------------
1681 // Can code be generated for pack p?
1682 bool SuperWord::implemented(Node_List* p) {
1683 bool retValue = false;
1684 Node* p0 = p->at(0);
1685 if (p0 != NULL) {
1686 int opc = p0->Opcode();
1687 uint size = p->size();
1688 if (p0->is_reduction()) {
1689 const Type *arith_type = p0->bottom_type();
1690 // Length 2 reductions of INT/LONG do not offer performance benefits
1691 if (((arith_type->basic_type() == T_INT) || (arith_type->basic_type() == T_LONG)) && (size == 2)) {
1692 retValue = false;
1693 } else {
1694 retValue = ReductionNode::implemented(opc, size, arith_type->basic_type());
1695 }
1696 } else {
1697 retValue = VectorNode::implemented(opc, size, velt_basic_type(p0));
1698 }
1699 if (!retValue) {
1700 if (is_cmov_pack(p)) {
1701 NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmpd pack"); print_pack(p);})
1702 return true;
1703 }
1704 }
1705 }
1706 return retValue;
1707 }
1708
1709 bool SuperWord::is_cmov_pack(Node_List* p) {
1710 return _cmovev_kit.pack(p->at(0)) != NULL;
1711 }
1712 //------------------------------same_inputs--------------------------
1713 // For pack p, are all idx operands the same?
1714 bool SuperWord::same_inputs(Node_List* p, int idx) {
1715 Node* p0 = p->at(0);
1716 uint vlen = p->size();
1717 Node* p0_def = p0->in(idx);
1718 for (uint i = 1; i < vlen; i++) {
1719 Node* pi = p->at(i);
1720 Node* pi_def = pi->in(idx);
1721 if (p0_def != pi_def) {
1722 return false;
1723 }
1724 }
1725 return true;
1726 }
1727
1728 //------------------------------profitable---------------------------
1729 // For pack p, are all operands and all uses (with in the block) vector?
1730 bool SuperWord::profitable(Node_List* p) {
1731 Node* p0 = p->at(0);
1732 uint start, end;
1733 VectorNode::vector_operands(p0, &start, &end);
1734
1735 // Return false if some inputs are not vectors or vectors with different
1736 // size or alignment.
1737 // Also, for now, return false if not scalar promotion case when inputs are
1738 // the same. Later, implement PackNode and allow differing, non-vector inputs
1739 // (maybe just the ones from outside the block.)
1740 for (uint i = start; i < end; i++) {
1741 if (!is_vector_use(p0, i)) {
1742 return false;
1743 }
1744 }
1745 // Check if reductions are connected
1746 if (p0->is_reduction()) {
1747 Node* second_in = p0->in(2);
1748 Node_List* second_pk = my_pack(second_in);
1749 if ((second_pk == NULL) || (_num_work_vecs == _num_reductions)) {
1750 // Remove reduction flag if no parent pack or if not enough work
1751 // to cover reduction expansion overhead
1752 p0->remove_flag(Node::Flag_is_reduction);
1753 return false;
1754 } else if (second_pk->size() != p->size()) {
1755 return false;
1756 }
1757 }
1758 if (VectorNode::is_shift(p0)) {
1759 // For now, return false if shift count is vector or not scalar promotion
1760 // case (different shift counts) because it is not supported yet.
1761 Node* cnt = p0->in(2);
1762 Node_List* cnt_pk = my_pack(cnt);
1763 if (cnt_pk != NULL)
1764 return false;
1765 if (!same_inputs(p, 2))
1766 return false;
1767 }
1768 if (!p0->is_Store()) {
1769 // For now, return false if not all uses are vector.
1770 // Later, implement ExtractNode and allow non-vector uses (maybe
1771 // just the ones outside the block.)
1772 for (uint i = 0; i < p->size(); i++) {
1773 Node* def = p->at(i);
1774 if (is_cmov_pack_internal_node(p, def)) {
1775 continue;
1776 }
1777 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
1778 Node* use = def->fast_out(j);
1779 for (uint k = 0; k < use->req(); k++) {
1780 Node* n = use->in(k);
1781 if (def == n) {
1782 // reductions can be loop carried dependences
1783 if (def->is_reduction() && use->is_Phi())
1784 continue;
1785 if (!is_vector_use(use, k)) {
1786 return false;
1787 }
1788 }
1789 }
1790 }
1791 }
1792 }
1793 return true;
1794 }
1795
1796 //------------------------------schedule---------------------------
1983 for (Node* current = last_mem; current != ld->in(MemNode::Memory);
1984 current=current->in(MemNode::Memory)) {
1985 assert(current != first_mem, "corrupted memory graph");
1986 if(current->is_Mem() && !independent(current, ld)){
1987 schedule_last = false; // a later store depends on this load
1988 break;
1989 }
1990 }
1991 }
1992
1993 Node* mem_input = schedule_last ? last_mem : first_mem;
1994 _igvn.hash_delete(mem_input);
1995 // Give each load the same memory state
1996 for (uint i = 0; i < pk->size(); i++) {
1997 LoadNode* ld = pk->at(i)->as_Load();
1998 _igvn.replace_input_of(ld, MemNode::Memory, mem_input);
1999 }
2000 }
2001 }
2002
2003 #ifndef PRODUCT
2004 void SuperWord::print_loop(bool whole) {
2005 Node_Stack stack(_arena, _phase->C->unique() >> 2);
2006 Node_List rpo_list;
2007 VectorSet visited(_arena);
2008 visited.set(lpt()->_head->_idx);
2009 _phase->rpo(lpt()->_head, stack, visited, rpo_list);
2010 _phase->dump(lpt(), rpo_list.size(), rpo_list );
2011 if(whole) {
2012 tty->print_cr("\n Whole loop tree");
2013 _phase->dump();
2014 tty->print_cr(" End of whole loop tree\n");
2015 }
2016 }
2017 #endif
2018
2019 //------------------------------output---------------------------
2020 // Convert packs into vector node operations
2021 void SuperWord::output() {
2022 if (_packset.length() == 0) return;
2023
2024 #ifndef PRODUCT
2025 if (TraceLoopOpts) {
2026 tty->print("SuperWord::output ");
2027 lpt()->dump_head();
2028 }
2029 #endif
2030
2031 // MUST ENSURE main loop's initial value is properly aligned:
2032 // (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
2033
2034 align_initial_loop_index(align_to_ref());
2035
2036 // Insert extract (unpack) operations for scalar uses
2037 for (int i = 0; i < _packset.length(); i++) {
2038 insert_extracts(_packset.at(i));
2039 }
2040
2041 Compile* C = _phase->C;
2042 CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
2043 uint max_vlen_in_bytes = 0;
2044 uint max_vlen = 0;
2045
2046 NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop before create_reserve_version_of_loop"); print_loop(true);})
2047
2048 CountedLoopReserveKit make_reversable(_phase, _lpt, do_reserve_copy());
2049
2050 NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop after create_reserve_version_of_loop"); print_loop(true);})
2051
2052 if (do_reserve_copy() && !make_reversable.has_reserved()) {
2053 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: loop was not reserved correctly, exiting SuperWord");})
2054 return;
2055 }
2056
2057 for (int i = 0; i < _block.length(); i++) {
2058 Node* n = _block.at(i);
2059 Node_List* p = my_pack(n);
2060 if (p && n == executed_last(p)) {
2061 uint vlen = p->size();
2062 uint vlen_in_bytes = 0;
2063 Node* vn = NULL;
2064 Node* low_adr = p->at(0);
2065 Node* first = executed_first(p);
2066 NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d executed first, %d executed last in pack", first->_idx, n->_idx); print_pack(p);})
2067 int opc = n->Opcode();
2068 if (n->is_Load()) {
2069 Node* ctl = n->in(MemNode::Control);
2070 Node* mem = first->in(MemNode::Memory);
2071 SWPointer p1(n->as_Mem(), this, NULL, false);
2072 // Identify the memory dependency for the new loadVector node by
2073 // walking up through memory chain.
2074 // This is done to give flexibility to the new loadVector node so that
2075 // it can move above independent storeVector nodes.
2076 while (mem->is_StoreVector()) {
2077 SWPointer p2(mem->as_Mem(), this, NULL, false);
2078 int cmp = p1.cmp(p2);
2079 if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) {
2080 mem = mem->in(MemNode::Memory);
2081 } else {
2082 break; // dependent memory
2083 }
2084 }
2085 Node* adr = low_adr->in(MemNode::Address);
2086 const TypePtr* atyp = n->adr_type();
2087 vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n), control_dependency(p));
2088 vlen_in_bytes = vn->as_LoadVector()->memory_size();
2089 } else if (n->is_Store()) {
2090 // Promote value to be stored to vector
2091 Node* val = vector_opd(p, MemNode::ValueIn);
2092 if (val == NULL) {
2093 if (do_reserve_copy()) {
2094 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: val should not be NULL, exiting SuperWord");})
2095 return; //and reverse to backup IG
2096 }
2097 ShouldNotReachHere();
2098 }
2099
2100 Node* ctl = n->in(MemNode::Control);
2101 Node* mem = first->in(MemNode::Memory);
2102 Node* adr = low_adr->in(MemNode::Address);
2103 const TypePtr* atyp = n->adr_type();
2104 vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
2105 vlen_in_bytes = vn->as_StoreVector()->memory_size();
2106 } else if (n->req() == 3 && !is_cmov_pack(p)) {
2107 // Promote operands to vector
2108 Node* in1 = NULL;
2109 bool node_isa_reduction = n->is_reduction();
2110 if (node_isa_reduction) {
2111 // the input to the first reduction operation is retained
2112 in1 = low_adr->in(1);
2113 } else {
2114 in1 = vector_opd(p, 1);
2115 if (in1 == NULL) {
2116 if (do_reserve_copy()) {
2117 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in1 should not be NULL, exiting SuperWord");})
2118 return; //and reverse to backup IG
2119 }
2120 ShouldNotReachHere();
2121 }
2122 }
2123 Node* in2 = vector_opd(p, 2);
2124 if (in2 == NULL) {
2125 if (do_reserve_copy()) {
2126 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in2 should not be NULL, exiting SuperWord");})
2127 return; //and reverse to backup IG
2128 }
2129 ShouldNotReachHere();
2130 }
2131 if (VectorNode::is_invariant_vector(in1) && (node_isa_reduction == false) && (n->is_Add() || n->is_Mul())) {
2132 // Move invariant vector input into second position to avoid register spilling.
2133 Node* tmp = in1;
2134 in1 = in2;
2135 in2 = tmp;
2136 }
2137 if (node_isa_reduction) {
2138 const Type *arith_type = n->bottom_type();
2139 vn = ReductionNode::make(opc, NULL, in1, in2, arith_type->basic_type());
2140 if (in2->is_Load()) {
2141 vlen_in_bytes = in2->as_LoadVector()->memory_size();
2142 } else {
2143 vlen_in_bytes = in2->as_Vector()->length_in_bytes();
2144 }
2145 } else {
2146 vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
2147 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
2148 }
2149 } else if (opc == Op_SqrtD || opc == Op_AbsF || opc == Op_AbsD || opc == Op_NegF || opc == Op_NegD) {
2150 // Promote operand to vector (Sqrt/Abs/Neg are 2 address instructions)
2151 Node* in = vector_opd(p, 1);
2152 vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n));
2153 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
2154 } else if (is_cmov_pack(p)) {
2155 if (!n->is_CMove()) {
2156 continue;
2157 }
2158 // place here CMoveVDNode
2159 NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: print before CMove vectorization"); print_loop(false);})
2160 Node* bol = n->in(CMoveNode::Condition);
2161 if (!bol->is_Bool() && bol->Opcode() == Op_ExtractI && bol->req() > 1 ) {
2162 NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d is not Bool node, trying its in(1) node %d", bol->_idx, bol->in(1)->_idx); bol->dump(); bol->in(1)->dump();})
2163 bol = bol->in(1); //may be ExtractNode
2164 }
2165
2166 assert(bol->is_Bool(), "should be BoolNode - too late to bail out!");
2167 if (!bol->is_Bool()) {
2168 if (do_reserve_copy()) {
2169 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: expected %d bool node, exiting SuperWord", bol->_idx); bol->dump();})
2170 return; //and reverse to backup IG
2171 }
2172 ShouldNotReachHere();
2173 }
2174
2175 int cond = (int)bol->as_Bool()->_test._test;
2176 Node* in_cc = _igvn.intcon(cond);
2177 NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created intcon in_cc node %d", in_cc->_idx); in_cc->dump();})
2178 Node* cc = bol->clone();
2179 cc->set_req(1, in_cc);
2180 NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created bool cc node %d", cc->_idx); cc->dump();})
2181
2182 Node* src1 = vector_opd(p, 2); //2=CMoveNode::IfFalse
2183 if (src1 == NULL) {
2184 if (do_reserve_copy()) {
2185 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src1 should not be NULL, exiting SuperWord");})
2186 return; //and reverse to backup IG
2187 }
2188 ShouldNotReachHere();
2189 }
2190 Node* src2 = vector_opd(p, 3); //3=CMoveNode::IfTrue
2191 if (src2 == NULL) {
2192 if (do_reserve_copy()) {
2193 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src2 should not be NULL, exiting SuperWord");})
2194 return; //and reverse to backup IG
2195 }
2196 ShouldNotReachHere();
2197 }
2198 BasicType bt = velt_basic_type(n);
2199 const TypeVect* vt = TypeVect::make(bt, vlen);
2200 vn = new CMoveVDNode(cc, src1, src2, vt);
2201 NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created new CMove node %d: ", vn->_idx); vn->dump();})
2202 } else {
2203 if (do_reserve_copy()) {
2204 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: ShouldNotReachHere, exiting SuperWord");})
2205 return; //and reverse to backup IG
2206 }
2207 ShouldNotReachHere();
2208 }
2209
2210 assert(vn != NULL, "sanity");
2211 if (vn == NULL) {
2212 if (do_reserve_copy()){
2213 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: got NULL node, cannot proceed, exiting SuperWord");})
2214 return; //and reverse to backup IG
2215 }
2216 ShouldNotReachHere();
2217 }
2218
2219 _igvn.register_new_node_with_optimizer(vn);
2220 _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
2221 for (uint j = 0; j < p->size(); j++) {
2222 Node* pm = p->at(j);
2223 _igvn.replace_node(pm, vn);
2224 }
2225 _igvn._worklist.push(vn);
2226
2227 if (vlen_in_bytes > max_vlen_in_bytes) {
2228 max_vlen = vlen;
2229 max_vlen_in_bytes = vlen_in_bytes;
2230 }
2231 #ifdef ASSERT
2232 if (TraceNewVectors) {
2233 tty->print("new Vector node: ");
2234 vn->dump();
2235 }
2236 #endif
2237 }
2238 }//for (int i = 0; i < _block.length(); i++)
2239
2240 C->set_max_vector_size(max_vlen_in_bytes);
2241
2242 if (SuperWordLoopUnrollAnalysis) {
2243 if (cl->has_passed_slp()) {
2244 uint slp_max_unroll_factor = cl->slp_max_unroll();
2245 if (slp_max_unroll_factor == max_vlen) {
2246 NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte));
2247 // For atomic unrolled loops which are vector mapped, instigate more unrolling.
2248 cl->set_notpassed_slp();
2249 C->set_major_progress();
2250 cl->mark_do_unroll_only();
2251 }
2252 }
2253 }
2254
2255 if (do_reserve_copy()) {
2256 make_reversable.use_new();
2257 }
2258 NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("\n Final loop after SuperWord"); print_loop(true);})
2259 return;
2260 }
2261
2262 //------------------------------vector_opd---------------------------
2263 // Create a vector operand for the nodes in pack p for operand: in(opd_idx)
2264 Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
2265 Node* p0 = p->at(0);
2266 uint vlen = p->size();
2267 Node* opd = p0->in(opd_idx);
2268
2269 if (same_inputs(p, opd_idx)) {
2270 if (opd->is_Vector() || opd->is_LoadVector()) {
2271 assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector");
2272 if (opd_idx == 2 && VectorNode::is_shift(p0)) {
2273 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("shift's count can't be vector");})
2274 return NULL;
2275 }
2276 return opd; // input is matching vector
2277 }
2278 if ((opd_idx == 2) && VectorNode::is_shift(p0)) {
2279 Compile* C = _phase->C;
2280 Node* cnt = opd;
2281 // Vector instructions do not mask shift count, do it here.
2282 juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
2283 const TypeInt* t = opd->find_int_type();
2284 if (t != NULL && t->is_con()) {
2285 juint shift = t->get_con();
2286 if (shift > mask) { // Unsigned cmp
2287 cnt = ConNode::make(TypeInt::make(shift & mask));
2288 }
2289 } else {
2290 if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {
2291 cnt = ConNode::make(TypeInt::make(mask));
2292 _igvn.register_new_node_with_optimizer(cnt);
2293 cnt = new AndINode(opd, cnt);
2294 _igvn.register_new_node_with_optimizer(cnt);
2295 _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
2296 }
2297 assert(opd->bottom_type()->isa_int(), "int type only");
2298 if (!opd->bottom_type()->isa_int()) {
2299 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should be int type only");})
2300 return NULL;
2301 }
2302 // Move non constant shift count into vector register.
2303 cnt = VectorNode::shift_count(p0, cnt, vlen, velt_basic_type(p0));
2304 }
2305 if (cnt != opd) {
2306 _igvn.register_new_node_with_optimizer(cnt);
2307 _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
2308 }
2309 return cnt;
2310 }
2311 assert(!opd->is_StoreVector(), "such vector is not expected here");
2312 if (opd->is_StoreVector()) {
2313 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("StoreVector is not expected here");})
2314 return NULL;
2315 }
2316 // Convert scalar input to vector with the same number of elements as
2317 // p0's vector. Use p0's type because size of operand's container in
2318 // vector should match p0's size regardless operand's size.
2319 const Type* p0_t = velt_type(p0);
2320 VectorNode* vn = VectorNode::scalar2vector(opd, vlen, p0_t);
2321
2322 _igvn.register_new_node_with_optimizer(vn);
2323 _phase->set_ctrl(vn, _phase->get_ctrl(opd));
2324 #ifdef ASSERT
2325 if (TraceNewVectors) {
2326 tty->print("new Vector node: ");
2327 vn->dump();
2328 }
2329 #endif
2330 return vn;
2331 }
2332
2333 // Insert pack operation
2334 BasicType bt = velt_basic_type(p0);
2335 PackNode* pk = PackNode::make(opd, vlen, bt);
2336 DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )
2337
2338 for (uint i = 1; i < vlen; i++) {
2339 Node* pi = p->at(i);
2340 Node* in = pi->in(opd_idx);
2341 assert(my_pack(in) == NULL, "Should already have been unpacked");
2342 if (my_pack(in) != NULL) {
2343 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should already have been unpacked");})
2344 return NULL;
2345 }
2346 assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
2347 pk->add_opd(in);
2348 }
2349 _igvn.register_new_node_with_optimizer(pk);
2350 _phase->set_ctrl(pk, _phase->get_ctrl(opd));
2351 #ifdef ASSERT
2352 if (TraceNewVectors) {
2353 tty->print("new Vector node: ");
2354 pk->dump();
2355 }
2356 #endif
2357 return pk;
2358 }
2359
2360 //------------------------------insert_extracts---------------------------
2361 // If a use of pack p is not a vector use, then replace the
2362 // use with an extract operation.
2363 void SuperWord::insert_extracts(Node_List* p) {
2364 if (p->at(0)->is_Store()) return;
2365 assert(_n_idx_list.is_empty(), "empty (node,index) list");
2366
2367 // Inspect each use of each pack member. For each use that is
2368 // not a vector use, replace the use with an extract operation.
2369
2370 for (uint i = 0; i < p->size(); i++) {
2371 Node* def = p->at(i);
2372 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2373 Node* use = def->fast_out(j);
2374 for (uint k = 0; k < use->req(); k++) {
2375 Node* n = use->in(k);
2376 if (def == n) {
2377 Node_List* u_pk = my_pack(use);
2378 if ((u_pk == NULL || !is_cmov_pack(u_pk) || use->is_CMove()) && !is_vector_use(use, k)) {
2379 _n_idx_list.push(use, k);
2380 }
2381 }
2382 }
2383 }
2384 }
2385
2386 while (_n_idx_list.is_nonempty()) {
2387 Node* use = _n_idx_list.node();
2388 int idx = _n_idx_list.index();
2389 _n_idx_list.pop();
2390 Node* def = use->in(idx);
2391
2392 if (def->is_reduction()) continue;
2393
2394 // Insert extract operation
2395 _igvn.hash_delete(def);
2396 int def_pos = alignment(def) / data_size(def);
2397
2398 Node* ex = ExtractNode::make(def, def_pos, velt_basic_type(def));
|