20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 #include "precompiled.hpp" 25 #include "compiler/compileLog.hpp" 26 #include "libadt/vectset.hpp" 27 #include "memory/allocation.inline.hpp" 28 #include "opto/addnode.hpp" 29 #include "opto/callnode.hpp" 30 #include "opto/castnode.hpp" 31 #include "opto/convertnode.hpp" 32 #include "opto/divnode.hpp" 33 #include "opto/matcher.hpp" 34 #include "opto/memnode.hpp" 35 #include "opto/mulnode.hpp" 36 #include "opto/opcodes.hpp" 37 #include "opto/opaquenode.hpp" 38 #include "opto/superword.hpp" 39 #include "opto/vectornode.hpp" 40 41 // 42 // S U P E R W O R D T R A N S F O R M 43 //============================================================================= 44 45 //------------------------------SuperWord--------------------------- 46 SuperWord::SuperWord(PhaseIdealLoop* phase) : 47 _phase(phase), 48 _igvn(phase->_igvn), 49 _arena(phase->C->comp_arena()), 50 _packset(arena(), 8, 0, NULL), // packs for the current block 51 _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb 52 _block(arena(), 8, 0, NULL), // nodes in current block 53 _data_entry(arena(), 8, 0, NULL), // nodes with all inputs from outside 54 _mem_slice_head(arena(), 8, 0, NULL), // memory slice heads 55 _mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails 56 _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node 57 _clone_map(phase->C->clone_map()), // map of nodes created in cloning 58 _align_to_ref(NULL), // memory reference to align vectors to 59 _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs 60 _dg(_arena), // dependence graph 61 _visited(arena()), // visited node set 62 _post_visited(arena()), // post visited node set 63 _n_idx_list(arena(), 8), // scratch list of (node,index) pairs 64 _stk(arena(), 8, 0, NULL), // scratch stack of nodes 65 _nlist(arena(), 8, 0, NULL), // scratch list of nodes 66 _lpt(NULL), // loop tree node 67 _lp(NULL), // LoopNode 68 _bb(NULL), // basic block 69 _iv(NULL), // induction var 70 _race_possible(false), // cases where SDMU is true 71 _early_return(true), // analysis evaluations routine 72 _num_work_vecs(0), // amount of vector work we have 73 _num_reductions(0), // amount of reduction work we have 74 _do_vector_loop(phase->C->do_vector_loop()), // whether to do vectorization/simd style 75 _ii_first(-1), // first loop generation index - only if do_vector_loop() 76 _ii_last(-1), // last loop generation index - only if do_vector_loop() 77 _ii_order(arena(), 8, 0, 0) 78 { 79 #ifndef PRODUCT 80 _vector_loop_debug = 0; 81 if (_phase->C->method() != NULL) { 82 _phase->C->method()->has_option_value("VectorizeDebug", _vector_loop_debug); 83 } 84 _CountedLoopReserveKit_debug = 0; 85 if (_phase->C->method() != NULL) { 86 _phase->C->method()->has_option_value("DoReserveCopyInSuperWordDebug", _CountedLoopReserveKit_debug); 87 } 88 #endif 89 } 90 91 //------------------------------transform_loop--------------------------- 92 void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { 93 assert(UseSuperWord, "should be"); 94 // Do vectors exist on this architecture? 95 if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return; 96 97 assert(lpt->_head->is_CountedLoop(), "must be"); 98 CountedLoopNode *cl = lpt->_head->as_CountedLoop(); 99 100 if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop 101 102 if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops 103 // Check for no control flow in body (other than exit) 104 Node *cl_exit = cl->loopexit(); 105 if (cl_exit->in(0) != lpt->_head) return; 106 107 // Make sure the are no extra control users of the loop backedge 108 if (cl->back_control()->outcnt() != 1) { 109 return; 110 } 111 112 // We only re-enter slp when we vector mapped a queried loop and we want to 113 // continue unrolling, in this case, slp is not subsequently done. 114 if (cl->do_unroll_only()) return; 115 116 // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit)))) 117 CountedLoopEndNode* pre_end = get_pre_loop_end(cl); 118 if (pre_end == NULL) return; 119 Node *pre_opaq1 = pre_end->limit(); 120 if (pre_opaq1->Opcode() != Op_Opaque1) return; 121 122 init(); // initialize data structures 123 124 set_lpt(lpt); 125 set_lp(cl); 374 375 find_adjacent_refs(); 376 377 extend_packlist(); 378 379 if (_do_vector_loop) { 380 if (_packset.length() == 0) { 381 #ifndef PRODUCT 382 if (TraceSuperWord) { 383 tty->print_cr("\nSuperWord::_do_vector_loop DFA could not build packset, now trying to build anyway"); 384 } 385 #endif 386 pack_parallel(); 387 } 388 } 389 390 combine_packs(); 391 392 construct_my_pack_map(); 393 394 filter_packs(); 395 396 schedule(); 397 398 output(); 399 } 400 401 //------------------------------find_adjacent_refs--------------------------- 402 // Find the adjacent memory references and create pack pairs for them. 403 // This is the initial set of packs that will then be extended by 404 // following use->def and def->use links. The align positions are 405 // assigned relative to the reference "align_to_ref" 406 void SuperWord::find_adjacent_refs() { 407 // Get list of memory operations 408 Node_List memops; 409 for (int i = 0; i < _block.length(); i++) { 410 Node* n = _block.at(i); 411 if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) && 412 is_java_primitive(n->as_Mem()->memory_type())) { 413 int align = memory_alignment(n->as_Mem(), 0); 1054 if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) { 1055 return false; 1056 } 1057 } 1058 } 1059 return true; 1060 } 1061 1062 //------------------------------set_alignment--------------------------- 1063 void SuperWord::set_alignment(Node* s1, Node* s2, int align) { 1064 set_alignment(s1, align); 1065 if (align == top_align || align == bottom_align) { 1066 set_alignment(s2, align); 1067 } else { 1068 set_alignment(s2, align + data_size(s1)); 1069 } 1070 } 1071 1072 //------------------------------data_size--------------------------- 1073 int SuperWord::data_size(Node* s) { 1074 int bsize = type2aelembytes(velt_basic_type(s)); 1075 assert(bsize != 0, "valid size"); 1076 return bsize; 1077 } 1078 1079 //------------------------------extend_packlist--------------------------- 1080 // Extend packset by following use->def and def->use links from pack members. 1081 void SuperWord::extend_packlist() { 1082 bool changed; 1083 do { 1084 packset_sort(_packset.length()); 1085 changed = false; 1086 for (int i = 0; i < _packset.length(); i++) { 1087 Node_List* p = _packset.at(i); 1088 changed |= follow_use_defs(p); 1089 changed |= follow_def_uses(p); 1090 } 1091 } while (changed); 1092 1093 if (_race_possible) { 1100 #ifndef PRODUCT 1101 if (TraceSuperWord) { 1102 tty->print_cr("\nAfter extend_packlist"); 1103 print_packset(); 1104 } 1105 #endif 1106 } 1107 1108 //------------------------------follow_use_defs--------------------------- 1109 // Extend the packset by visiting operand definitions of nodes in pack p 1110 bool SuperWord::follow_use_defs(Node_List* p) { 1111 assert(p->size() == 2, "just checking"); 1112 Node* s1 = p->at(0); 1113 Node* s2 = p->at(1); 1114 assert(s1->req() == s2->req(), "just checking"); 1115 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); 1116 1117 if (s1->is_Load()) return false; 1118 1119 int align = alignment(s1); 1120 bool changed = false; 1121 int start = s1->is_Store() ? MemNode::ValueIn : 1; 1122 int end = s1->is_Store() ? MemNode::ValueIn+1 : s1->req(); 1123 for (int j = start; j < end; j++) { 1124 Node* t1 = s1->in(j); 1125 Node* t2 = s2->in(j); 1126 if (!in_bb(t1) || !in_bb(t2)) 1127 continue; 1128 if (stmts_can_pack(t1, t2, align)) { 1129 if (est_savings(t1, t2) >= 0) { 1130 Node_List* pair = new Node_List(); 1131 pair->push(t1); 1132 pair->push(t2); 1133 _packset.append(pair); 1134 set_alignment(t1, t2, align); 1135 changed = true; 1136 } 1137 } 1138 } 1139 return changed; 1140 } 1141 1142 //------------------------------follow_def_uses--------------------------- 1143 // Extend the packset by visiting uses of nodes in pack p 1144 bool SuperWord::follow_def_uses(Node_List* p) { 1145 bool changed = false; 1146 Node* s1 = p->at(0); 1147 Node* s2 = p->at(1); 1148 assert(p->size() == 2, "just checking"); 1149 assert(s1->req() == s2->req(), "just checking"); 1150 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); 1151 1152 if (s1->is_Store()) return false; 1153 1154 int align = alignment(s1); 1155 int savings = -1; 1156 int num_s1_uses = 0; 1157 Node* u1 = NULL; 1158 Node* u2 = NULL; 1159 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { 1160 Node* t1 = s1->fast_out(i); 1161 num_s1_uses++; 1162 if (!in_bb(t1)) continue; 1163 for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) { 1164 Node* t2 = s2->fast_out(j); 1165 if (!in_bb(t2)) continue; 1166 if (!opnd_positions_match(s1, t1, s2, t2)) 1167 continue; 1168 if (stmts_can_pack(t1, t2, align)) { 1169 int my_savings = est_savings(t1, t2); 1170 if (my_savings > savings) { 1171 savings = my_savings; 1172 u1 = t1; 1173 u2 = t2; 1174 } 1175 } 1176 } 1177 } 1178 if (num_s1_uses > 1) { 1179 _race_possible = true; 1180 } 1181 if (savings >= 0) { 1182 Node_List* pair = new Node_List(); 1183 pair->push(u1); 1184 pair->push(u2); 1185 _packset.append(pair); 1186 set_alignment(u1, u2, align); 1187 changed = true; 1188 } 1189 return changed; 1190 } 1191 1192 //------------------------------order_def_uses--------------------------- 1193 // For extended packsets, ordinally arrange uses packset by major component 1194 void SuperWord::order_def_uses(Node_List* p) { 1195 Node* s1 = p->at(0); 1196 1197 if (s1->is_Store()) return; 1198 1199 // reductions are always managed beforehand 1200 if (s1->is_reduction()) return; 1201 1202 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { 1203 Node* t1 = s1->fast_out(i); 1204 1205 // Only allow operand swap on commuting operations 1441 if (TraceSuperWord && Verbose) { 1442 tty->print_cr("Unprofitable"); 1443 pk->at(0)->dump(); 1444 } 1445 #endif 1446 remove_pack_at(i); 1447 changed = true; 1448 } 1449 } 1450 } while (changed); 1451 1452 #ifndef PRODUCT 1453 if (TraceSuperWord) { 1454 tty->print_cr("\nAfter filter_packs"); 1455 print_packset(); 1456 tty->cr(); 1457 } 1458 #endif 1459 } 1460 1461 //------------------------------implemented--------------------------- 1462 // Can code be generated for pack p? 1463 bool SuperWord::implemented(Node_List* p) { 1464 bool retValue = false; 1465 Node* p0 = p->at(0); 1466 if (p0 != NULL) { 1467 int opc = p0->Opcode(); 1468 uint size = p->size(); 1469 if (p0->is_reduction()) { 1470 const Type *arith_type = p0->bottom_type(); 1471 // Length 2 reductions of INT/LONG do not offer performance benefits 1472 if (((arith_type->basic_type() == T_INT) || (arith_type->basic_type() == T_LONG)) && (size == 2)) { 1473 retValue = false; 1474 } else { 1475 retValue = ReductionNode::implemented(opc, size, arith_type->basic_type()); 1476 } 1477 } else { 1478 retValue = VectorNode::implemented(opc, size, velt_basic_type(p0)); 1479 } 1480 } 1481 return retValue; 1482 } 1483 1484 //------------------------------same_inputs-------------------------- 1485 // For pack p, are all idx operands the same? 1486 static bool same_inputs(Node_List* p, int idx) { 1487 Node* p0 = p->at(0); 1488 uint vlen = p->size(); 1489 Node* p0_def = p0->in(idx); 1490 for (uint i = 1; i < vlen; i++) { 1491 Node* pi = p->at(i); 1492 Node* pi_def = pi->in(idx); 1493 if (p0_def != pi_def) 1494 return false; 1495 } 1496 return true; 1497 } 1498 1499 //------------------------------profitable--------------------------- 1500 // For pack p, are all operands and all uses (with in the block) vector? 1501 bool SuperWord::profitable(Node_List* p) { 1502 Node* p0 = p->at(0); 1503 uint start, end; 1504 VectorNode::vector_operands(p0, &start, &end); 1505 1506 // Return false if some inputs are not vectors or vectors with different 1507 // size or alignment. 1508 // Also, for now, return false if not scalar promotion case when inputs are 1509 // the same. Later, implement PackNode and allow differing, non-vector inputs 1510 // (maybe just the ones from outside the block.) 1511 for (uint i = start; i < end; i++) { 1512 if (!is_vector_use(p0, i)) 1513 return false; 1514 } 1515 // Check if reductions are connected 1516 if (p0->is_reduction()) { 1517 Node* second_in = p0->in(2); 1518 Node_List* second_pk = my_pack(second_in); 1519 if ((second_pk == NULL) || (_num_work_vecs == _num_reductions)) { 1520 // Remove reduction flag if no parent pack or if not enough work 1521 // to cover reduction expansion overhead 1522 p0->remove_flag(Node::Flag_is_reduction); 1523 return false; 1524 } else if (second_pk->size() != p->size()) { 1525 return false; 1526 } 1527 } 1528 if (VectorNode::is_shift(p0)) { 1529 // For now, return false if shift count is vector or not scalar promotion 1530 // case (different shift counts) because it is not supported yet. 1531 Node* cnt = p0->in(2); 1532 Node_List* cnt_pk = my_pack(cnt); 1533 if (cnt_pk != NULL) 1534 return false; 1535 if (!same_inputs(p, 2)) 1536 return false; 1537 } 1538 if (!p0->is_Store()) { 1539 // For now, return false if not all uses are vector. 1540 // Later, implement ExtractNode and allow non-vector uses (maybe 1541 // just the ones outside the block.) 1542 for (uint i = 0; i < p->size(); i++) { 1543 Node* def = p->at(i); 1544 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { 1545 Node* use = def->fast_out(j); 1546 for (uint k = 0; k < use->req(); k++) { 1547 Node* n = use->in(k); 1548 if (def == n) { 1549 // reductions can be loop carried dependences 1550 if (def->is_reduction() && use->is_Phi()) 1551 continue; 1552 if (!is_vector_use(use, k)) { 1553 return false; 1554 } 1555 } 1556 } 1557 } 1558 } 1559 } 1560 return true; 1561 } 1562 1563 //------------------------------schedule--------------------------- 1793 tty->print("SuperWord::output "); 1794 lpt()->dump_head(); 1795 } 1796 #endif 1797 1798 // MUST ENSURE main loop's initial value is properly aligned: 1799 // (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0 1800 1801 align_initial_loop_index(align_to_ref()); 1802 1803 // Insert extract (unpack) operations for scalar uses 1804 for (int i = 0; i < _packset.length(); i++) { 1805 insert_extracts(_packset.at(i)); 1806 } 1807 1808 Compile* C = _phase->C; 1809 CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); 1810 uint max_vlen_in_bytes = 0; 1811 uint max_vlen = 0; 1812 1813 NOT_PRODUCT(if(_CountedLoopReserveKit_debug > 0) {tty->print_cr("SWPointer::output: print loop before create_reserve_version_of_loop"); print_loop(true);}) 1814 1815 CountedLoopReserveKit make_reversable(_phase, _lpt, DoReserveCopyInSuperWord); 1816 1817 NOT_PRODUCT(if(_CountedLoopReserveKit_debug > 0) {tty->print_cr("SWPointer::output: print loop after create_reserve_version_of_loop"); print_loop(true);}) 1818 1819 if (DoReserveCopyInSuperWord && !make_reversable.has_reserved()) { 1820 NOT_PRODUCT({tty->print_cr("SWPointer::output: loop was not reserved correctly, exiting SuperWord");}) 1821 return; 1822 } 1823 1824 for (int i = 0; i < _block.length(); i++) { 1825 Node* n = _block.at(i); 1826 Node_List* p = my_pack(n); 1827 if (p && n == executed_last(p)) { 1828 uint vlen = p->size(); 1829 uint vlen_in_bytes = 0; 1830 Node* vn = NULL; 1831 Node* low_adr = p->at(0); 1832 Node* first = executed_first(p); 1833 int opc = n->Opcode(); 1834 if (n->is_Load()) { 1835 Node* ctl = n->in(MemNode::Control); 1836 Node* mem = first->in(MemNode::Memory); 1837 SWPointer p1(n->as_Mem(), this, NULL, false); 1838 // Identify the memory dependency for the new loadVector node by 1839 // walking up through memory chain. 1840 // This is done to give flexibility to the new loadVector node so that 1841 // it can move above independent storeVector nodes. 1842 while (mem->is_StoreVector()) { 1843 SWPointer p2(mem->as_Mem(), this, NULL, false); 1844 int cmp = p1.cmp(p2); 1845 if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) { 1846 mem = mem->in(MemNode::Memory); 1847 } else { 1848 break; // dependent memory 1849 } 1850 } 1851 Node* adr = low_adr->in(MemNode::Address); 1852 const TypePtr* atyp = n->adr_type(); 1853 vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n), control_dependency(p)); 1854 vlen_in_bytes = vn->as_LoadVector()->memory_size(); 1855 } else if (n->is_Store()) { 1856 // Promote value to be stored to vector 1857 Node* val = vector_opd(p, MemNode::ValueIn); 1858 Node* ctl = n->in(MemNode::Control); 1859 Node* mem = first->in(MemNode::Memory); 1860 Node* adr = low_adr->in(MemNode::Address); 1861 const TypePtr* atyp = n->adr_type(); 1862 vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen); 1863 vlen_in_bytes = vn->as_StoreVector()->memory_size(); 1864 } else if (n->req() == 3) { 1865 // Promote operands to vector 1866 Node* in1 = NULL; 1867 bool node_isa_reduction = n->is_reduction(); 1868 if (node_isa_reduction) { 1869 // the input to the first reduction operation is retained 1870 in1 = low_adr->in(1); 1871 } else { 1872 in1 = vector_opd(p, 1); 1873 } 1874 Node* in2 = vector_opd(p, 2); 1875 if (VectorNode::is_invariant_vector(in1) && (node_isa_reduction == false) && (n->is_Add() || n->is_Mul())) { 1876 // Move invariant vector input into second position to avoid register spilling. 1877 Node* tmp = in1; 1878 in1 = in2; 1879 in2 = tmp; 1880 } 1881 if (node_isa_reduction) { 1882 const Type *arith_type = n->bottom_type(); 1883 vn = ReductionNode::make(opc, NULL, in1, in2, arith_type->basic_type()); 1884 if (in2->is_Load()) { 1885 vlen_in_bytes = in2->as_LoadVector()->memory_size(); 1886 } else { 1887 vlen_in_bytes = in2->as_Vector()->length_in_bytes(); 1888 } 1889 } else { 1890 vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n)); 1891 vlen_in_bytes = vn->as_Vector()->length_in_bytes(); 1892 } 1893 } else if (opc == Op_SqrtD || opc == Op_AbsF || opc == Op_AbsD || opc == Op_NegF || opc == Op_NegD) { 1894 // Promote operand to vector (Sqrt/Abs/Neg are 2 address instructions) 1895 Node* in = vector_opd(p, 1); 1896 vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n)); 1897 vlen_in_bytes = vn->as_Vector()->length_in_bytes(); 1898 } else { 1899 ShouldNotReachHere(); 1900 } 1901 assert(vn != NULL, "sanity"); 1902 _igvn.register_new_node_with_optimizer(vn); 1903 _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0))); 1904 for (uint j = 0; j < p->size(); j++) { 1905 Node* pm = p->at(j); 1906 _igvn.replace_node(pm, vn); 1907 } 1908 _igvn._worklist.push(vn); 1909 1910 if (vlen_in_bytes > max_vlen_in_bytes) { 1911 max_vlen = vlen; 1912 max_vlen_in_bytes = vlen_in_bytes; 1913 } 1914 #ifdef ASSERT 1915 if (TraceNewVectors) { 1916 tty->print("new Vector node: "); 1917 vn->dump(); 1918 } 1919 #endif 1920 } 1921 } 1922 C->set_max_vector_size(max_vlen_in_bytes); 1923 1924 if (SuperWordLoopUnrollAnalysis) { 1925 if (cl->has_passed_slp()) { 1926 uint slp_max_unroll_factor = cl->slp_max_unroll(); 1927 if (slp_max_unroll_factor == max_vlen) { 1928 NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte)); 1929 // For atomic unrolled loops which are vector mapped, instigate more unrolling. 1930 cl->set_notpassed_slp(); 1931 C->set_major_progress(); 1932 cl->mark_do_unroll_only(); 1933 } 1934 } 1935 } 1936 1937 if (DoReserveCopyInSuperWord) { 1938 make_reversable.use_new(); 1939 } 1940 NOT_PRODUCT(if(_CountedLoopReserveKit_debug > 0) {tty->print_cr("\n Final loop after SuperWord"); print_loop(true);}) 1941 return; 1942 } 1943 1944 //------------------------------vector_opd--------------------------- 1945 // Create a vector operand for the nodes in pack p for operand: in(opd_idx) 1946 Node* SuperWord::vector_opd(Node_List* p, int opd_idx) { 1947 Node* p0 = p->at(0); 1948 uint vlen = p->size(); 1949 Node* opd = p0->in(opd_idx); 1950 1951 if (same_inputs(p, opd_idx)) { 1952 if (opd->is_Vector() || opd->is_LoadVector()) { 1953 assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector"); 1954 return opd; // input is matching vector 1955 } 1956 if ((opd_idx == 2) && VectorNode::is_shift(p0)) { 1957 Compile* C = _phase->C; 1958 Node* cnt = opd; 1959 // Vector instructions do not mask shift count, do it here. 1960 juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1); 1961 const TypeInt* t = opd->find_int_type(); 1962 if (t != NULL && t->is_con()) { 1963 juint shift = t->get_con(); 1964 if (shift > mask) { // Unsigned cmp 1965 cnt = ConNode::make(TypeInt::make(shift & mask)); 1966 } 1967 } else { 1968 if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) { 1969 cnt = ConNode::make(TypeInt::make(mask)); 1970 _igvn.register_new_node_with_optimizer(cnt); 1971 cnt = new AndINode(opd, cnt); 1972 _igvn.register_new_node_with_optimizer(cnt); 1973 _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); 1974 } 1975 assert(opd->bottom_type()->isa_int(), "int type only"); 1976 // Move non constant shift count into vector register. 1977 cnt = VectorNode::shift_count(p0, cnt, vlen, velt_basic_type(p0)); 1978 } 1979 if (cnt != opd) { 1980 _igvn.register_new_node_with_optimizer(cnt); 1981 _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); 1982 } 1983 return cnt; 1984 } 1985 assert(!opd->is_StoreVector(), "such vector is not expected here"); 1986 // Convert scalar input to vector with the same number of elements as 1987 // p0's vector. Use p0's type because size of operand's container in 1988 // vector should match p0's size regardless operand's size. 1989 const Type* p0_t = velt_type(p0); 1990 VectorNode* vn = VectorNode::scalar2vector(opd, vlen, p0_t); 1991 1992 _igvn.register_new_node_with_optimizer(vn); 1993 _phase->set_ctrl(vn, _phase->get_ctrl(opd)); 1994 #ifdef ASSERT 1995 if (TraceNewVectors) { 1996 tty->print("new Vector node: "); 1997 vn->dump(); 1998 } 1999 #endif 2000 return vn; 2001 } 2002 2003 // Insert pack operation 2004 BasicType bt = velt_basic_type(p0); 2005 PackNode* pk = PackNode::make(opd, vlen, bt); 2006 DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); ) 2007 2008 for (uint i = 1; i < vlen; i++) { 2009 Node* pi = p->at(i); 2010 Node* in = pi->in(opd_idx); 2011 assert(my_pack(in) == NULL, "Should already have been unpacked"); 2012 assert(opd_bt == in->bottom_type()->basic_type(), "all same type"); 2013 pk->add_opd(in); 2014 } 2015 _igvn.register_new_node_with_optimizer(pk); 2016 _phase->set_ctrl(pk, _phase->get_ctrl(opd)); 2017 #ifdef ASSERT 2018 if (TraceNewVectors) { 2019 tty->print("new Vector node: "); 2020 pk->dump(); 2021 } 2022 #endif 2023 return pk; 2024 } 2025 2026 //------------------------------insert_extracts--------------------------- 2027 // If a use of pack p is not a vector use, then replace the 2028 // use with an extract operation. 2029 void SuperWord::insert_extracts(Node_List* p) { 2030 if (p->at(0)->is_Store()) return; 2031 assert(_n_idx_list.is_empty(), "empty (node,index) list"); 2032 2033 // Inspect each use of each pack member. For each use that is 2034 // not a vector use, replace the use with an extract operation. 2035 2036 for (uint i = 0; i < p->size(); i++) { 2037 Node* def = p->at(i); 2038 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { 2039 Node* use = def->fast_out(j); 2040 for (uint k = 0; k < use->req(); k++) { 2041 Node* n = use->in(k); 2042 if (def == n) { 2043 if (!is_vector_use(use, k)) { 2044 _n_idx_list.push(use, k); 2045 } 2046 } 2047 } 2048 } 2049 } 2050 2051 while (_n_idx_list.is_nonempty()) { 2052 Node* use = _n_idx_list.node(); 2053 int idx = _n_idx_list.index(); 2054 _n_idx_list.pop(); 2055 Node* def = use->in(idx); 2056 2057 if (def->is_reduction()) continue; 2058 2059 // Insert extract operation 2060 _igvn.hash_delete(def); 2061 int def_pos = alignment(def) / data_size(def); 2062 2063 Node* ex = ExtractNode::make(def, def_pos, velt_basic_type(def)); | 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 #include "precompiled.hpp" 25 #include "compiler/compileLog.hpp" 26 #include "libadt/vectset.hpp" 27 #include "memory/allocation.inline.hpp" 28 #include "opto/addnode.hpp" 29 #include "opto/callnode.hpp" 30 #include "opto/castnode.hpp" 31 #include "opto/convertnode.hpp" 32 #include "opto/divnode.hpp" 33 #include "opto/matcher.hpp" 34 #include "opto/memnode.hpp" 35 #include "opto/mulnode.hpp" 36 #include "opto/opcodes.hpp" 37 #include "opto/opaquenode.hpp" 38 #include "opto/superword.hpp" 39 #include "opto/vectornode.hpp" 40 #include "opto/movenode.hpp" 41 42 // 43 // S U P E R W O R D T R A N S F O R M 44 //============================================================================= 45 46 //------------------------------SuperWord--------------------------- 47 SuperWord::SuperWord(PhaseIdealLoop* phase) : 48 _phase(phase), 49 _igvn(phase->_igvn), 50 _arena(phase->C->comp_arena()), 51 _packset(arena(), 8, 0, NULL), // packs for the current block 52 _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb 53 _block(arena(), 8, 0, NULL), // nodes in current block 54 _data_entry(arena(), 8, 0, NULL), // nodes with all inputs from outside 55 _mem_slice_head(arena(), 8, 0, NULL), // memory slice heads 56 _mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails 57 _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node 58 _clone_map(phase->C->clone_map()), // map of nodes created in cloning 59 _cmovev_kit(_arena, this), // map to facilitate CMoveVD creation 60 _align_to_ref(NULL), // memory reference to align vectors to 61 _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs 62 _dg(_arena), // dependence graph 63 _visited(arena()), // visited node set 64 _post_visited(arena()), // post visited node set 65 _n_idx_list(arena(), 8), // scratch list of (node,index) pairs 66 _stk(arena(), 8, 0, NULL), // scratch stack of nodes 67 _nlist(arena(), 8, 0, NULL), // scratch list of nodes 68 _lpt(NULL), // loop tree node 69 _lp(NULL), // LoopNode 70 _bb(NULL), // basic block 71 _iv(NULL), // induction var 72 _race_possible(false), // cases where SDMU is true 73 _early_return(true), // analysis evaluations routine 74 _num_work_vecs(0), // amount of vector work we have 75 _num_reductions(0), // amount of reduction work we have 76 _do_vector_loop(phase->C->do_vector_loop()), // whether to do vectorization/simd style 77 _do_reserve_copy(DoReserveCopyInSuperWord), 78 _ii_first(-1), // first loop generation index - only if do_vector_loop() 79 _ii_last(-1), // last loop generation index - only if do_vector_loop() 80 _ii_order(arena(), 8, 0, 0) 81 { 82 #ifndef PRODUCT 83 _vector_loop_debug = 0; 84 if (_phase->C->method() != NULL) { 85 _phase->C->method()->has_option_value("VectorizeDebug", _vector_loop_debug); 86 } 87 #endif 88 } 89 90 //------------------------------transform_loop--------------------------- 91 void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { 92 assert(UseSuperWord, "should be"); 93 // Do vectors exist on this architecture? 94 if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return; 95 96 assert(lpt->_head->is_CountedLoop(), "must be"); 97 CountedLoopNode *cl = lpt->_head->as_CountedLoop(); 98 99 if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop 100 101 if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops 102 // Check for no control flow in body (other than exit) 103 Node *cl_exit = cl->loopexit(); 104 if (cl_exit->in(0) != lpt->_head) { 105 #ifndef PRODUCT 106 if (TraceSuperWord) { 107 tty->print_cr("SuperWord::transform_loop: loop too complicated, cl_exit->in(0) != lpt->_head"); 108 tty->print("cl_exit %d", cl_exit->_idx); cl_exit->dump(); 109 tty->print("cl_exit->in(0) %d", cl_exit->in(0)->_idx); cl_exit->in(0)->dump(); 110 tty->print("lpt->_head %d", lpt->_head->_idx); lpt->_head->dump(); 111 lpt->dump_head(); 112 } 113 #endif 114 return; 115 } 116 117 // Make sure the are no extra control users of the loop backedge 118 if (cl->back_control()->outcnt() != 1) { 119 return; 120 } 121 122 // We only re-enter slp when we vector mapped a queried loop and we want to 123 // continue unrolling, in this case, slp is not subsequently done. 124 if (cl->do_unroll_only()) return; 125 126 // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit)))) 127 CountedLoopEndNode* pre_end = get_pre_loop_end(cl); 128 if (pre_end == NULL) return; 129 Node *pre_opaq1 = pre_end->limit(); 130 if (pre_opaq1->Opcode() != Op_Opaque1) return; 131 132 init(); // initialize data structures 133 134 set_lpt(lpt); 135 set_lp(cl); 384 385 find_adjacent_refs(); 386 387 extend_packlist(); 388 389 if (_do_vector_loop) { 390 if (_packset.length() == 0) { 391 #ifndef PRODUCT 392 if (TraceSuperWord) { 393 tty->print_cr("\nSuperWord::_do_vector_loop DFA could not build packset, now trying to build anyway"); 394 } 395 #endif 396 pack_parallel(); 397 } 398 } 399 400 combine_packs(); 401 402 construct_my_pack_map(); 403 404 if (_do_vector_loop) { 405 merge_packs_to_cmovd(); 406 } 407 408 filter_packs(); 409 410 schedule(); 411 412 output(); 413 } 414 415 //------------------------------find_adjacent_refs--------------------------- 416 // Find the adjacent memory references and create pack pairs for them. 417 // This is the initial set of packs that will then be extended by 418 // following use->def and def->use links. The align positions are 419 // assigned relative to the reference "align_to_ref" 420 void SuperWord::find_adjacent_refs() { 421 // Get list of memory operations 422 Node_List memops; 423 for (int i = 0; i < _block.length(); i++) { 424 Node* n = _block.at(i); 425 if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) && 426 is_java_primitive(n->as_Mem()->memory_type())) { 427 int align = memory_alignment(n->as_Mem(), 0); 1068 if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) { 1069 return false; 1070 } 1071 } 1072 } 1073 return true; 1074 } 1075 1076 //------------------------------set_alignment--------------------------- 1077 void SuperWord::set_alignment(Node* s1, Node* s2, int align) { 1078 set_alignment(s1, align); 1079 if (align == top_align || align == bottom_align) { 1080 set_alignment(s2, align); 1081 } else { 1082 set_alignment(s2, align + data_size(s1)); 1083 } 1084 } 1085 1086 //------------------------------data_size--------------------------- 1087 int SuperWord::data_size(Node* s) { 1088 Node* use = NULL; //test if the node is a candidate for CMoveVD optimization, then return the size of CMov 1089 if (_do_vector_loop) { 1090 use = _cmovev_kit.is_Bool_candidate(s); 1091 if (use != NULL) { 1092 return data_size(use); 1093 } 1094 use = _cmovev_kit.is_CmpD_candidate(s); 1095 if (use != NULL) { 1096 return data_size(use); 1097 } 1098 } 1099 int bsize = type2aelembytes(velt_basic_type(s)); 1100 assert(bsize != 0, "valid size"); 1101 return bsize; 1102 } 1103 1104 //------------------------------extend_packlist--------------------------- 1105 // Extend packset by following use->def and def->use links from pack members. 1106 void SuperWord::extend_packlist() { 1107 bool changed; 1108 do { 1109 packset_sort(_packset.length()); 1110 changed = false; 1111 for (int i = 0; i < _packset.length(); i++) { 1112 Node_List* p = _packset.at(i); 1113 changed |= follow_use_defs(p); 1114 changed |= follow_def_uses(p); 1115 } 1116 } while (changed); 1117 1118 if (_race_possible) { 1125 #ifndef PRODUCT 1126 if (TraceSuperWord) { 1127 tty->print_cr("\nAfter extend_packlist"); 1128 print_packset(); 1129 } 1130 #endif 1131 } 1132 1133 //------------------------------follow_use_defs--------------------------- 1134 // Extend the packset by visiting operand definitions of nodes in pack p 1135 bool SuperWord::follow_use_defs(Node_List* p) { 1136 assert(p->size() == 2, "just checking"); 1137 Node* s1 = p->at(0); 1138 Node* s2 = p->at(1); 1139 assert(s1->req() == s2->req(), "just checking"); 1140 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); 1141 1142 if (s1->is_Load()) return false; 1143 1144 int align = alignment(s1); 1145 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: s1 %d, align %d", s1->_idx, align);) 1146 bool changed = false; 1147 int start = s1->is_Store() ? MemNode::ValueIn : 1; 1148 int end = s1->is_Store() ? MemNode::ValueIn+1 : s1->req(); 1149 for (int j = start; j < end; j++) { 1150 Node* t1 = s1->in(j); 1151 Node* t2 = s2->in(j); 1152 if (!in_bb(t1) || !in_bb(t2)) 1153 continue; 1154 if (stmts_can_pack(t1, t2, align)) { 1155 if (est_savings(t1, t2) >= 0) { 1156 Node_List* pair = new Node_List(); 1157 pair->push(t1); 1158 pair->push(t2); 1159 _packset.append(pair); 1160 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: set_alignment(%d, %d, %d)", t1->_idx, t2->_idx, align);) 1161 set_alignment(t1, t2, align); 1162 changed = true; 1163 } 1164 } 1165 } 1166 return changed; 1167 } 1168 1169 //------------------------------follow_def_uses--------------------------- 1170 // Extend the packset by visiting uses of nodes in pack p 1171 bool SuperWord::follow_def_uses(Node_List* p) { 1172 bool changed = false; 1173 Node* s1 = p->at(0); 1174 Node* s2 = p->at(1); 1175 assert(p->size() == 2, "just checking"); 1176 assert(s1->req() == s2->req(), "just checking"); 1177 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); 1178 1179 if (s1->is_Store()) return false; 1180 1181 int align = alignment(s1); 1182 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: s1 %d, align %d", s1->_idx, align);) 1183 int savings = -1; 1184 int num_s1_uses = 0; 1185 Node* u1 = NULL; 1186 Node* u2 = NULL; 1187 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { 1188 Node* t1 = s1->fast_out(i); 1189 num_s1_uses++; 1190 if (!in_bb(t1)) continue; 1191 for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) { 1192 Node* t2 = s2->fast_out(j); 1193 if (!in_bb(t2)) continue; 1194 if (!opnd_positions_match(s1, t1, s2, t2)) 1195 continue; 1196 if (stmts_can_pack(t1, t2, align)) { 1197 int my_savings = est_savings(t1, t2); 1198 if (my_savings > savings) { 1199 savings = my_savings; 1200 u1 = t1; 1201 u2 = t2; 1202 } 1203 } 1204 } 1205 } 1206 if (num_s1_uses > 1) { 1207 _race_possible = true; 1208 } 1209 if (savings >= 0) { 1210 Node_List* pair = new Node_List(); 1211 pair->push(u1); 1212 pair->push(u2); 1213 _packset.append(pair); 1214 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: set_alignment(%d, %d, %d)", u1->_idx, u2->_idx, align);) 1215 set_alignment(u1, u2, align); 1216 changed = true; 1217 } 1218 return changed; 1219 } 1220 1221 //------------------------------order_def_uses--------------------------- 1222 // For extended packsets, ordinally arrange uses packset by major component 1223 void SuperWord::order_def_uses(Node_List* p) { 1224 Node* s1 = p->at(0); 1225 1226 if (s1->is_Store()) return; 1227 1228 // reductions are always managed beforehand 1229 if (s1->is_reduction()) return; 1230 1231 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { 1232 Node* t1 = s1->fast_out(i); 1233 1234 // Only allow operand swap on commuting operations 1470 if (TraceSuperWord && Verbose) { 1471 tty->print_cr("Unprofitable"); 1472 pk->at(0)->dump(); 1473 } 1474 #endif 1475 remove_pack_at(i); 1476 changed = true; 1477 } 1478 } 1479 } while (changed); 1480 1481 #ifndef PRODUCT 1482 if (TraceSuperWord) { 1483 tty->print_cr("\nAfter filter_packs"); 1484 print_packset(); 1485 tty->cr(); 1486 } 1487 #endif 1488 } 1489 1490 //------------------------------merge_packs_to_cmovd--------------------------- 1491 // Merge CMoveD into new vector-nodes 1492 // We want to catch this pattern and subsume CmpD and Bool into CMoveD 1493 // 1494 // SubD ConD 1495 // / | / 1496 // / | / / 1497 // / | / / 1498 // / | / / 1499 // / / / 1500 // / / | / 1501 // v / | / 1502 // CmpD | / 1503 // | | / 1504 // v | / 1505 // Bool | / 1506 // \ | / 1507 // \ | / 1508 // \ | / 1509 // \ | / 1510 // \ v / 1511 // CMoveD 1512 // 1513 1514 void SuperWord::merge_packs_to_cmovd() { 1515 for (int i = _packset.length() - 1; i >= 0; i--) { 1516 _cmovev_kit.make_cmovevd_pack(_packset.at(i)); 1517 } 1518 #ifndef PRODUCT 1519 if (TraceSuperWord) { 1520 tty->print_cr("\nSuperWord::merge_packs_to_cmovd(): After merge"); 1521 print_packset(); 1522 tty->cr(); 1523 } 1524 #endif 1525 } 1526 1527 Node* CMoveKit::is_Bool_candidate(Node* def) const { 1528 Node* use = NULL; 1529 if (!def->is_Bool() || def->in(0) != NULL || def->outcnt() != 1) { 1530 return NULL; 1531 } 1532 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { 1533 use = def->fast_out(j); 1534 if (!_sw->same_generation(def, use) || !use->is_CMove()) { 1535 return NULL; 1536 } 1537 } 1538 return use; 1539 } 1540 1541 Node* CMoveKit::is_CmpD_candidate(Node* def) const { 1542 Node* use = NULL; 1543 if (!def->is_Cmp() || def->in(0) != NULL || def->outcnt() != 1) { 1544 return NULL; 1545 } 1546 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { 1547 use = def->fast_out(j); 1548 if (!_sw->same_generation(def, use) || (use = is_Bool_candidate(use)) == NULL || !_sw->same_generation(def, use)) { 1549 return NULL; 1550 } 1551 } 1552 return use; 1553 } 1554 1555 Node_List* CMoveKit::make_cmovevd_pack(Node_List* cmovd_pk) { 1556 Node *cmovd = cmovd_pk->at(0); 1557 if (!cmovd->is_CMove()) { 1558 return NULL; 1559 } 1560 if (pack(cmovd) != NULL) { // already in the cmov pack 1561 return NULL; 1562 } 1563 if (cmovd->in(0) != NULL) { 1564 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CMoveD %d has control flow, escaping...", cmovd->_idx); cmovd->dump();}) 1565 return NULL; 1566 } 1567 1568 Node* bol = cmovd->as_CMove()->in(CMoveNode::Condition); 1569 if (!bol->is_Bool() 1570 || bol->outcnt() != 1 1571 || !_sw->same_generation(bol, cmovd) 1572 || bol->in(0) != NULL // BoolNode has control flow!! 1573 || _sw->my_pack(bol) == NULL) { 1574 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: Bool %d does not fit CMoveD %d for building vector, escaping...", bol->_idx, cmovd->_idx); bol->dump();}) 1575 return NULL; 1576 } 1577 Node_List* bool_pk = _sw->my_pack(bol); 1578 if (bool_pk->size() != cmovd_pk->size() ) { 1579 return NULL; 1580 } 1581 1582 Node* cmpd = bol->in(1); 1583 if (!cmpd->is_Cmp() 1584 || cmpd->outcnt() != 1 1585 || !_sw->same_generation(cmpd, cmovd) 1586 || cmpd->in(0) != NULL // CmpDNode has control flow!! 1587 || _sw->my_pack(cmpd) == NULL) { 1588 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CmpD %d does not fit CMoveD %d for building vector, escaping...", cmpd->_idx, cmovd->_idx); cmpd->dump();}) 1589 return NULL; 1590 } 1591 Node_List* cmpd_pk = _sw->my_pack(cmpd); 1592 if (cmpd_pk->size() != cmovd_pk->size() ) { 1593 return NULL; 1594 } 1595 1596 if (!test_cmpd_pack(cmpd_pk, cmovd_pk)) { 1597 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: cmpd pack for CmpD %d failed vectorization test", cmpd->_idx); cmpd->dump();}) 1598 return NULL; 1599 } 1600 1601 Node_List* new_cmpd_pk = new Node_List(); 1602 uint sz = cmovd_pk->size() - 1; 1603 for (uint i = 0; i <= sz; ++i) { 1604 Node* cmov = cmovd_pk->at(i); 1605 Node* bol = bool_pk->at(i); 1606 Node* cmp = cmpd_pk->at(i); 1607 1608 new_cmpd_pk->insert(i, cmov); 1609 1610 map(cmov, new_cmpd_pk); 1611 map(bol, new_cmpd_pk); 1612 map(cmp, new_cmpd_pk); 1613 1614 _sw->set_my_pack(cmov, new_cmpd_pk); // and keep old packs for cmp and bool 1615 } 1616 _sw->_packset.remove(cmovd_pk); 1617 _sw->_packset.remove(bool_pk); 1618 _sw->_packset.remove(cmpd_pk); 1619 _sw->_packset.append(new_cmpd_pk); 1620 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print_cr("CMoveKit::make_cmovevd_pack: added syntactic CMoveD pack"); _sw->print_pack(new_cmpd_pk);}) 1621 return new_cmpd_pk; 1622 } 1623 1624 bool CMoveKit::test_cmpd_pack(Node_List* cmpd_pk, Node_List* cmovd_pk) { 1625 Node* cmpd0 = cmpd_pk->at(0); 1626 assert(cmpd0->is_Cmp(), "CMoveKit::test_cmpd_pack: should be CmpDNode"); 1627 assert(cmovd_pk->at(0)->is_CMove(), "CMoveKit::test_cmpd_pack: should be CMoveD"); 1628 assert(cmpd_pk->size() == cmovd_pk->size(), "CMoveKit::test_cmpd_pack: should be same size"); 1629 Node* in1 = cmpd0->in(1); 1630 Node* in2 = cmpd0->in(2); 1631 Node_List* in1_pk = _sw->my_pack(in1); 1632 Node_List* in2_pk = _sw->my_pack(in2); 1633 1634 if (in1_pk != NULL && in1_pk->size() != cmpd_pk->size() 1635 || in2_pk != NULL && in2_pk->size() != cmpd_pk->size() ) { 1636 return false; 1637 } 1638 1639 // test if "all" in1 are in the same pack or the same node 1640 if (in1_pk == NULL) { 1641 for (uint j = 1; j < cmpd_pk->size(); j++) { 1642 if (cmpd_pk->at(j)->in(1) != in1) { 1643 return false; 1644 } 1645 }//for: in1_pk is not pack but all CmpD nodes in the pack have the same in(1) 1646 } 1647 // test if "all" in2 are in the same pack or the same node 1648 if (in2_pk == NULL) { 1649 for (uint j = 1; j < cmpd_pk->size(); j++) { 1650 if (cmpd_pk->at(j)->in(2) != in2) { 1651 return false; 1652 } 1653 }//for: in2_pk is not pack but all CmpD nodes in the pack have the same in(2) 1654 } 1655 //now check if cmpd_pk may be subsumed in vector built for cmovd_pk 1656 int cmovd_ind1, cmovd_ind2; 1657 if (cmpd_pk->at(0)->in(1) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse) 1658 && cmpd_pk->at(0)->in(2) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) { 1659 cmovd_ind1 = CMoveNode::IfFalse; 1660 cmovd_ind2 = CMoveNode::IfTrue; 1661 } else if (cmpd_pk->at(0)->in(2) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse) 1662 && cmpd_pk->at(0)->in(1) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) { 1663 cmovd_ind2 = CMoveNode::IfFalse; 1664 cmovd_ind1 = CMoveNode::IfTrue; 1665 } 1666 else { 1667 return false; 1668 } 1669 1670 for (uint j = 1; j < cmpd_pk->size(); j++) { 1671 if (cmpd_pk->at(j)->in(1) != cmovd_pk->at(j)->as_CMove()->in(cmovd_ind1) 1672 || cmpd_pk->at(j)->in(2) != cmovd_pk->at(j)->as_CMove()->in(cmovd_ind2)) { 1673 return false; 1674 }//if 1675 } 1676 NOT_PRODUCT(if(_sw->is_trace_cmov()) { tty->print("CMoveKit::test_cmpd_pack: cmpd pack for 1st CmpD %d is OK for vectorization: ", cmpd0->_idx); cmpd0->dump(); }) 1677 return true; 1678 } 1679 1680 //------------------------------implemented--------------------------- 1681 // Can code be generated for pack p? 1682 bool SuperWord::implemented(Node_List* p) { 1683 bool retValue = false; 1684 Node* p0 = p->at(0); 1685 if (p0 != NULL) { 1686 int opc = p0->Opcode(); 1687 uint size = p->size(); 1688 if (p0->is_reduction()) { 1689 const Type *arith_type = p0->bottom_type(); 1690 // Length 2 reductions of INT/LONG do not offer performance benefits 1691 if (((arith_type->basic_type() == T_INT) || (arith_type->basic_type() == T_LONG)) && (size == 2)) { 1692 retValue = false; 1693 } else { 1694 retValue = ReductionNode::implemented(opc, size, arith_type->basic_type()); 1695 } 1696 } else { 1697 retValue = VectorNode::implemented(opc, size, velt_basic_type(p0)); 1698 } 1699 if (!retValue) { 1700 if (is_cmov_pack(p)) { 1701 NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmpd pack"); print_pack(p);}) 1702 return true; 1703 } 1704 } 1705 } 1706 return retValue; 1707 } 1708 1709 bool SuperWord::is_cmov_pack(Node_List* p) { 1710 return _cmovev_kit.pack(p->at(0)) != NULL; 1711 } 1712 //------------------------------same_inputs-------------------------- 1713 // For pack p, are all idx operands the same? 1714 bool SuperWord::same_inputs(Node_List* p, int idx) { 1715 Node* p0 = p->at(0); 1716 uint vlen = p->size(); 1717 Node* p0_def = p0->in(idx); 1718 for (uint i = 1; i < vlen; i++) { 1719 Node* pi = p->at(i); 1720 Node* pi_def = pi->in(idx); 1721 if (p0_def != pi_def) { 1722 return false; 1723 } 1724 } 1725 return true; 1726 } 1727 1728 //------------------------------profitable--------------------------- 1729 // For pack p, are all operands and all uses (with in the block) vector? 1730 bool SuperWord::profitable(Node_List* p) { 1731 Node* p0 = p->at(0); 1732 uint start, end; 1733 VectorNode::vector_operands(p0, &start, &end); 1734 1735 // Return false if some inputs are not vectors or vectors with different 1736 // size or alignment. 1737 // Also, for now, return false if not scalar promotion case when inputs are 1738 // the same. Later, implement PackNode and allow differing, non-vector inputs 1739 // (maybe just the ones from outside the block.) 1740 for (uint i = start; i < end; i++) { 1741 if (!is_vector_use(p0, i)) { 1742 return false; 1743 } 1744 } 1745 // Check if reductions are connected 1746 if (p0->is_reduction()) { 1747 Node* second_in = p0->in(2); 1748 Node_List* second_pk = my_pack(second_in); 1749 if ((second_pk == NULL) || (_num_work_vecs == _num_reductions)) { 1750 // Remove reduction flag if no parent pack or if not enough work 1751 // to cover reduction expansion overhead 1752 p0->remove_flag(Node::Flag_is_reduction); 1753 return false; 1754 } else if (second_pk->size() != p->size()) { 1755 return false; 1756 } 1757 } 1758 if (VectorNode::is_shift(p0)) { 1759 // For now, return false if shift count is vector or not scalar promotion 1760 // case (different shift counts) because it is not supported yet. 1761 Node* cnt = p0->in(2); 1762 Node_List* cnt_pk = my_pack(cnt); 1763 if (cnt_pk != NULL) 1764 return false; 1765 if (!same_inputs(p, 2)) 1766 return false; 1767 } 1768 if (!p0->is_Store()) { 1769 // For now, return false if not all uses are vector. 1770 // Later, implement ExtractNode and allow non-vector uses (maybe 1771 // just the ones outside the block.) 1772 for (uint i = 0; i < p->size(); i++) { 1773 Node* def = p->at(i); 1774 if (is_cmov_pack_internal_node(p, def)) { 1775 continue; 1776 } 1777 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { 1778 Node* use = def->fast_out(j); 1779 for (uint k = 0; k < use->req(); k++) { 1780 Node* n = use->in(k); 1781 if (def == n) { 1782 // reductions can be loop carried dependences 1783 if (def->is_reduction() && use->is_Phi()) 1784 continue; 1785 if (!is_vector_use(use, k)) { 1786 return false; 1787 } 1788 } 1789 } 1790 } 1791 } 1792 } 1793 return true; 1794 } 1795 1796 //------------------------------schedule--------------------------- 2026 tty->print("SuperWord::output "); 2027 lpt()->dump_head(); 2028 } 2029 #endif 2030 2031 // MUST ENSURE main loop's initial value is properly aligned: 2032 // (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0 2033 2034 align_initial_loop_index(align_to_ref()); 2035 2036 // Insert extract (unpack) operations for scalar uses 2037 for (int i = 0; i < _packset.length(); i++) { 2038 insert_extracts(_packset.at(i)); 2039 } 2040 2041 Compile* C = _phase->C; 2042 CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); 2043 uint max_vlen_in_bytes = 0; 2044 uint max_vlen = 0; 2045 2046 NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop before create_reserve_version_of_loop"); print_loop(true);}) 2047 2048 CountedLoopReserveKit make_reversable(_phase, _lpt, do_reserve_copy()); 2049 2050 NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop after create_reserve_version_of_loop"); print_loop(true);}) 2051 2052 if (do_reserve_copy() && !make_reversable.has_reserved()) { 2053 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: loop was not reserved correctly, exiting SuperWord");}) 2054 return; 2055 } 2056 2057 for (int i = 0; i < _block.length(); i++) { 2058 Node* n = _block.at(i); 2059 Node_List* p = my_pack(n); 2060 if (p && n == executed_last(p)) { 2061 uint vlen = p->size(); 2062 uint vlen_in_bytes = 0; 2063 Node* vn = NULL; 2064 Node* low_adr = p->at(0); 2065 Node* first = executed_first(p); 2066 NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d executed first, %d executed last in pack", first->_idx, n->_idx); print_pack(p);}) 2067 int opc = n->Opcode(); 2068 if (n->is_Load()) { 2069 Node* ctl = n->in(MemNode::Control); 2070 Node* mem = first->in(MemNode::Memory); 2071 SWPointer p1(n->as_Mem(), this, NULL, false); 2072 // Identify the memory dependency for the new loadVector node by 2073 // walking up through memory chain. 2074 // This is done to give flexibility to the new loadVector node so that 2075 // it can move above independent storeVector nodes. 2076 while (mem->is_StoreVector()) { 2077 SWPointer p2(mem->as_Mem(), this, NULL, false); 2078 int cmp = p1.cmp(p2); 2079 if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) { 2080 mem = mem->in(MemNode::Memory); 2081 } else { 2082 break; // dependent memory 2083 } 2084 } 2085 Node* adr = low_adr->in(MemNode::Address); 2086 const TypePtr* atyp = n->adr_type(); 2087 vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n), control_dependency(p)); 2088 vlen_in_bytes = vn->as_LoadVector()->memory_size(); 2089 } else if (n->is_Store()) { 2090 // Promote value to be stored to vector 2091 Node* val = vector_opd(p, MemNode::ValueIn); 2092 if (val == NULL) { 2093 if (do_reserve_copy()) { 2094 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: val should not be NULL, exiting SuperWord");}) 2095 return; //and reverse to backup IG 2096 } 2097 ShouldNotReachHere(); 2098 } 2099 2100 Node* ctl = n->in(MemNode::Control); 2101 Node* mem = first->in(MemNode::Memory); 2102 Node* adr = low_adr->in(MemNode::Address); 2103 const TypePtr* atyp = n->adr_type(); 2104 vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen); 2105 vlen_in_bytes = vn->as_StoreVector()->memory_size(); 2106 } else if (n->req() == 3 && !is_cmov_pack(p)) { 2107 // Promote operands to vector 2108 Node* in1 = NULL; 2109 bool node_isa_reduction = n->is_reduction(); 2110 if (node_isa_reduction) { 2111 // the input to the first reduction operation is retained 2112 in1 = low_adr->in(1); 2113 } else { 2114 in1 = vector_opd(p, 1); 2115 if (in1 == NULL) { 2116 if (do_reserve_copy()) { 2117 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in1 should not be NULL, exiting SuperWord");}) 2118 return; //and reverse to backup IG 2119 } 2120 ShouldNotReachHere(); 2121 } 2122 } 2123 Node* in2 = vector_opd(p, 2); 2124 if (in2 == NULL) { 2125 if (do_reserve_copy()) { 2126 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in2 should not be NULL, exiting SuperWord");}) 2127 return; //and reverse to backup IG 2128 } 2129 ShouldNotReachHere(); 2130 } 2131 if (VectorNode::is_invariant_vector(in1) && (node_isa_reduction == false) && (n->is_Add() || n->is_Mul())) { 2132 // Move invariant vector input into second position to avoid register spilling. 2133 Node* tmp = in1; 2134 in1 = in2; 2135 in2 = tmp; 2136 } 2137 if (node_isa_reduction) { 2138 const Type *arith_type = n->bottom_type(); 2139 vn = ReductionNode::make(opc, NULL, in1, in2, arith_type->basic_type()); 2140 if (in2->is_Load()) { 2141 vlen_in_bytes = in2->as_LoadVector()->memory_size(); 2142 } else { 2143 vlen_in_bytes = in2->as_Vector()->length_in_bytes(); 2144 } 2145 } else { 2146 vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n)); 2147 vlen_in_bytes = vn->as_Vector()->length_in_bytes(); 2148 } 2149 } else if (opc == Op_SqrtD || opc == Op_AbsF || opc == Op_AbsD || opc == Op_NegF || opc == Op_NegD) { 2150 // Promote operand to vector (Sqrt/Abs/Neg are 2 address instructions) 2151 Node* in = vector_opd(p, 1); 2152 vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n)); 2153 vlen_in_bytes = vn->as_Vector()->length_in_bytes(); 2154 } else if (is_cmov_pack(p)) { 2155 if (!n->is_CMove()) { 2156 continue; 2157 } 2158 // place here CMoveVDNode 2159 NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: print before CMove vectorization"); print_loop(false);}) 2160 Node* bol = n->in(CMoveNode::Condition); 2161 if (!bol->is_Bool() && bol->Opcode() == Op_ExtractI && bol->req() > 1 ) { 2162 NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d is not Bool node, trying its in(1) node %d", bol->_idx, bol->in(1)->_idx); bol->dump(); bol->in(1)->dump();}) 2163 bol = bol->in(1); //may be ExtractNode 2164 } 2165 2166 assert(bol->is_Bool(), "should be BoolNode - too late to bail out!"); 2167 if (!bol->is_Bool()) { 2168 if (do_reserve_copy()) { 2169 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: expected %d bool node, exiting SuperWord", bol->_idx); bol->dump();}) 2170 return; //and reverse to backup IG 2171 } 2172 ShouldNotReachHere(); 2173 } 2174 2175 int cond = (int)bol->as_Bool()->_test._test; 2176 Node* in_cc = _igvn.intcon(cond); 2177 NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created intcon in_cc node %d", in_cc->_idx); in_cc->dump();}) 2178 Node* cc = bol->clone(); 2179 cc->set_req(1, in_cc); 2180 NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created bool cc node %d", cc->_idx); cc->dump();}) 2181 2182 Node* src1 = vector_opd(p, 2); //2=CMoveNode::IfFalse 2183 if (src1 == NULL) { 2184 if (do_reserve_copy()) { 2185 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src1 should not be NULL, exiting SuperWord");}) 2186 return; //and reverse to backup IG 2187 } 2188 ShouldNotReachHere(); 2189 } 2190 Node* src2 = vector_opd(p, 3); //3=CMoveNode::IfTrue 2191 if (src2 == NULL) { 2192 if (do_reserve_copy()) { 2193 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src2 should not be NULL, exiting SuperWord");}) 2194 return; //and reverse to backup IG 2195 } 2196 ShouldNotReachHere(); 2197 } 2198 BasicType bt = velt_basic_type(n); 2199 const TypeVect* vt = TypeVect::make(bt, vlen); 2200 vn = new CMoveVDNode(cc, src1, src2, vt); 2201 NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created new CMove node %d: ", vn->_idx); vn->dump();}) 2202 } else { 2203 if (do_reserve_copy()) { 2204 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: ShouldNotReachHere, exiting SuperWord");}) 2205 return; //and reverse to backup IG 2206 } 2207 ShouldNotReachHere(); 2208 } 2209 2210 assert(vn != NULL, "sanity"); 2211 if (vn == NULL) { 2212 if (do_reserve_copy()){ 2213 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: got NULL node, cannot proceed, exiting SuperWord");}) 2214 return; //and reverse to backup IG 2215 } 2216 ShouldNotReachHere(); 2217 } 2218 2219 _igvn.register_new_node_with_optimizer(vn); 2220 _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0))); 2221 for (uint j = 0; j < p->size(); j++) { 2222 Node* pm = p->at(j); 2223 _igvn.replace_node(pm, vn); 2224 } 2225 _igvn._worklist.push(vn); 2226 2227 if (vlen_in_bytes > max_vlen_in_bytes) { 2228 max_vlen = vlen; 2229 max_vlen_in_bytes = vlen_in_bytes; 2230 } 2231 #ifdef ASSERT 2232 if (TraceNewVectors) { 2233 tty->print("new Vector node: "); 2234 vn->dump(); 2235 } 2236 #endif 2237 } 2238 }//for (int i = 0; i < _block.length(); i++) 2239 2240 C->set_max_vector_size(max_vlen_in_bytes); 2241 2242 if (SuperWordLoopUnrollAnalysis) { 2243 if (cl->has_passed_slp()) { 2244 uint slp_max_unroll_factor = cl->slp_max_unroll(); 2245 if (slp_max_unroll_factor == max_vlen) { 2246 NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte)); 2247 // For atomic unrolled loops which are vector mapped, instigate more unrolling. 2248 cl->set_notpassed_slp(); 2249 C->set_major_progress(); 2250 cl->mark_do_unroll_only(); 2251 } 2252 } 2253 } 2254 2255 if (do_reserve_copy()) { 2256 make_reversable.use_new(); 2257 } 2258 NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("\n Final loop after SuperWord"); print_loop(true);}) 2259 return; 2260 } 2261 2262 //------------------------------vector_opd--------------------------- 2263 // Create a vector operand for the nodes in pack p for operand: in(opd_idx) 2264 Node* SuperWord::vector_opd(Node_List* p, int opd_idx) { 2265 Node* p0 = p->at(0); 2266 uint vlen = p->size(); 2267 Node* opd = p0->in(opd_idx); 2268 2269 if (same_inputs(p, opd_idx)) { 2270 if (opd->is_Vector() || opd->is_LoadVector()) { 2271 assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector"); 2272 if (opd_idx == 2 && VectorNode::is_shift(p0)) { 2273 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("shift's count can't be vector");}) 2274 return NULL; 2275 } 2276 return opd; // input is matching vector 2277 } 2278 if ((opd_idx == 2) && VectorNode::is_shift(p0)) { 2279 Compile* C = _phase->C; 2280 Node* cnt = opd; 2281 // Vector instructions do not mask shift count, do it here. 2282 juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1); 2283 const TypeInt* t = opd->find_int_type(); 2284 if (t != NULL && t->is_con()) { 2285 juint shift = t->get_con(); 2286 if (shift > mask) { // Unsigned cmp 2287 cnt = ConNode::make(TypeInt::make(shift & mask)); 2288 } 2289 } else { 2290 if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) { 2291 cnt = ConNode::make(TypeInt::make(mask)); 2292 _igvn.register_new_node_with_optimizer(cnt); 2293 cnt = new AndINode(opd, cnt); 2294 _igvn.register_new_node_with_optimizer(cnt); 2295 _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); 2296 } 2297 assert(opd->bottom_type()->isa_int(), "int type only"); 2298 if (!opd->bottom_type()->isa_int()) { 2299 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should be int type only");}) 2300 return NULL; 2301 } 2302 // Move non constant shift count into vector register. 2303 cnt = VectorNode::shift_count(p0, cnt, vlen, velt_basic_type(p0)); 2304 } 2305 if (cnt != opd) { 2306 _igvn.register_new_node_with_optimizer(cnt); 2307 _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); 2308 } 2309 return cnt; 2310 } 2311 assert(!opd->is_StoreVector(), "such vector is not expected here"); 2312 if (opd->is_StoreVector()) { 2313 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("StoreVector is not expected here");}) 2314 return NULL; 2315 } 2316 // Convert scalar input to vector with the same number of elements as 2317 // p0's vector. Use p0's type because size of operand's container in 2318 // vector should match p0's size regardless operand's size. 2319 const Type* p0_t = velt_type(p0); 2320 VectorNode* vn = VectorNode::scalar2vector(opd, vlen, p0_t); 2321 2322 _igvn.register_new_node_with_optimizer(vn); 2323 _phase->set_ctrl(vn, _phase->get_ctrl(opd)); 2324 #ifdef ASSERT 2325 if (TraceNewVectors) { 2326 tty->print("new Vector node: "); 2327 vn->dump(); 2328 } 2329 #endif 2330 return vn; 2331 } 2332 2333 // Insert pack operation 2334 BasicType bt = velt_basic_type(p0); 2335 PackNode* pk = PackNode::make(opd, vlen, bt); 2336 DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); ) 2337 2338 for (uint i = 1; i < vlen; i++) { 2339 Node* pi = p->at(i); 2340 Node* in = pi->in(opd_idx); 2341 assert(my_pack(in) == NULL, "Should already have been unpacked"); 2342 if (my_pack(in) != NULL) { 2343 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should already have been unpacked");}) 2344 return NULL; 2345 } 2346 assert(opd_bt == in->bottom_type()->basic_type(), "all same type"); 2347 pk->add_opd(in); 2348 } 2349 _igvn.register_new_node_with_optimizer(pk); 2350 _phase->set_ctrl(pk, _phase->get_ctrl(opd)); 2351 #ifdef ASSERT 2352 if (TraceNewVectors) { 2353 tty->print("new Vector node: "); 2354 pk->dump(); 2355 } 2356 #endif 2357 return pk; 2358 } 2359 2360 //------------------------------insert_extracts--------------------------- 2361 // If a use of pack p is not a vector use, then replace the 2362 // use with an extract operation. 2363 void SuperWord::insert_extracts(Node_List* p) { 2364 if (p->at(0)->is_Store()) return; 2365 assert(_n_idx_list.is_empty(), "empty (node,index) list"); 2366 2367 // Inspect each use of each pack member. For each use that is 2368 // not a vector use, replace the use with an extract operation. 2369 2370 for (uint i = 0; i < p->size(); i++) { 2371 Node* def = p->at(i); 2372 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { 2373 Node* use = def->fast_out(j); 2374 for (uint k = 0; k < use->req(); k++) { 2375 Node* n = use->in(k); 2376 if (def == n) { 2377 Node_List* u_pk = my_pack(use); 2378 if ((u_pk == NULL || !is_cmov_pack(u_pk) || use->is_CMove()) && !is_vector_use(use, k)) { 2379 _n_idx_list.push(use, k); 2380 } 2381 } 2382 } 2383 } 2384 } 2385 2386 while (_n_idx_list.is_nonempty()) { 2387 Node* use = _n_idx_list.node(); 2388 int idx = _n_idx_list.index(); 2389 _n_idx_list.pop(); 2390 Node* def = use->in(idx); 2391 2392 if (def->is_reduction()) continue; 2393 2394 // Insert extract operation 2395 _igvn.hash_delete(def); 2396 int def_pos = alignment(def) / data_size(def); 2397 2398 Node* ex = ExtractNode::make(def, def_pos, velt_basic_type(def)); |