src/share/vm/opto/superword.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File hotspot Cdiff src/share/vm/opto/superword.cpp

src/share/vm/opto/superword.cpp

Print this page

        

*** 63,73 **** _stk(arena(), 8, 0, NULL), // scratch stack of nodes _nlist(arena(), 8, 0, NULL), // scratch list of nodes _lpt(NULL), // loop tree node _lp(NULL), // LoopNode _bb(NULL), // basic block ! _iv(NULL) // induction var {} //------------------------------transform_loop--------------------------- void SuperWord::transform_loop(IdealLoopTree* lpt) { assert(UseSuperWord, "should be"); --- 63,74 ---- _stk(arena(), 8, 0, NULL), // scratch stack of nodes _nlist(arena(), 8, 0, NULL), // scratch list of nodes _lpt(NULL), // loop tree node _lp(NULL), // LoopNode _bb(NULL), // basic block ! _iv(NULL), // induction var ! _race_possible(false) // cases where SDMU is true {} //------------------------------transform_loop--------------------------- void SuperWord::transform_loop(IdealLoopTree* lpt) { assert(UseSuperWord, "should be");
*** 143,153 **** // extraction of scalar values from vectors. // void SuperWord::SLP_extract() { // Ready the block - if (!construct_bb()) return; // Exit if no interesting nodes or complex graph. dependence_graph(); --- 144,153 ----
*** 638,648 **** if (Matcher::max_vector_size(bt1) < 2) { return false; // No vectors for this type } if (isomorphic(s1, s2)) { ! if (independent(s1, s2)) { if (!exists_at(s1, 0) && !exists_at(s2, 1)) { if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) { int s1_align = alignment(s1); int s2_align = alignment(s2); if (s1_align == top_align || s1_align == align) { --- 638,648 ---- if (Matcher::max_vector_size(bt1) < 2) { return false; // No vectors for this type } if (isomorphic(s1, s2)) { ! if (independent(s1, s2) || reduction(s1, s2)) { if (!exists_at(s1, 0) && !exists_at(s2, 1)) { if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) { int s1_align = alignment(s1); int s2_align = alignment(s2); if (s1_align == top_align || s1_align == align) {
*** 716,725 **** --- 716,747 ---- visited_clear(); return independent_path(shallow, deep); } + //------------------------------reduction--------------------------- + // Is there a data path between s1 and s2 and the nodes reductions? + bool SuperWord::reduction(Node* s1, Node* s2) { + bool retValue = false; + int d1 = depth(s1); + int d2 = depth(s2); + if (d1 + 1 == d2) { + if (s1->is_reduction() && s2->is_reduction()) { + // This is an ordered set, so s1 should define s2 + for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { + Node* t1 = s1->fast_out(i); + if (t1 == s2) { + // both nodes are reductions and connected + retValue = true; + } + } + } + } + + return retValue; + } + //------------------------------independent_path------------------------------ // Helper for independent bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) { if (dp >= 1000) return false; // stop deep recursion visited_set(deep);
*** 759,776 **** --- 781,806 ---- //------------------------------extend_packlist--------------------------- // Extend packset by following use->def and def->use links from pack members. void SuperWord::extend_packlist() { bool changed; do { + packset_sort(_packset.length()); changed = false; for (int i = 0; i < _packset.length(); i++) { Node_List* p = _packset.at(i); changed |= follow_use_defs(p); changed |= follow_def_uses(p); } } while (changed); + if (_race_possible) { + for (int i = 0; i < _packset.length(); i++) { + Node_List* p = _packset.at(i); + order_def_uses(p); + } + } + #ifndef PRODUCT if (TraceSuperWord) { tty->print_cr("\nAfter extend_packlist"); print_packset(); }
*** 823,836 **** --- 853,868 ---- if (s1->is_Store()) return false; int align = alignment(s1); int savings = -1; + int num_s1_uses = 0; Node* u1 = NULL; Node* u2 = NULL; for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { Node* t1 = s1->fast_out(i); + num_s1_uses++; if (!in_bb(t1)) continue; for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) { Node* t2 = s2->fast_out(j); if (!in_bb(t2)) continue; if (!opnd_positions_match(s1, t1, s2, t2))
*** 843,852 **** --- 875,887 ---- u2 = t2; } } } } + if (num_s1_uses > 1) { + _race_possible = true; + } if (savings >= 0) { Node_List* pair = new Node_List(); pair->push(u1); pair->push(u2); _packset.append(pair);
*** 854,866 **** --- 889,956 ---- changed = true; } return changed; } + //------------------------------order_def_uses--------------------------- + // For extended packsets, ordinally arrange uses packset by major component + void SuperWord::order_def_uses(Node_List* p) { + Node* s1 = p->at(0); + + if (s1->is_Store()) return; + + // reductions are always managed beforehand + if (s1->is_reduction()) return; + + for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { + Node* t1 = s1->fast_out(i); + + // Only allow operand swap on commuting operations + if (!t1->is_Add() && !t1->is_Mul()) { + break; + } + + // Now find t1's packset + Node_List* p2 = NULL; + for (int j = 0; j < _packset.length(); j++) { + p2 = _packset.at(j); + Node* first = p2->at(0); + if (t1 == first) { + break; + } + p2 = NULL; + } + // Arrange all sub components by the major component + if (p2 != NULL) { + for (uint j = 1; j < p->size(); j++) { + Node* d1 = p->at(j); + Node* u1 = p2->at(j); + opnd_positions_match(s1, t1, d1, u1); + } + } + } + } + //---------------------------opnd_positions_match------------------------- // Is the use of d1 in u1 at the same operand position as d2 in u2? bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) { + // check reductions to see if they are marshalled to represent the reduction + // operator in a specified opnd + if (u1->is_reduction() && u2->is_reduction()) { + // ensure reductions have phis and reduction definitions feeding the 1st operand + Node* first = u1->in(2); + if (first->is_Phi() || first->is_reduction()) { + u1->swap_edges(1, 2); + } + // ensure reductions have phis and reduction definitions feeding the 1st operand + first = u2->in(2); + if (first->is_Phi() || first->is_reduction()) { + u2->swap_edges(1, 2); + } + return true; + } + uint ct = u1->req(); if (ct != u2->req()) return false; uint i1 = 0; uint i2 = 0; do {
*** 938,948 **** while (changed) { changed = false; for (int i = 0; i < _packset.length(); i++) { Node_List* p1 = _packset.at(i); if (p1 == NULL) continue; ! for (int j = 0; j < _packset.length(); j++) { Node_List* p2 = _packset.at(j); if (p2 == NULL) continue; if (i == j) continue; if (p1->at(p1->size()-1) == p2->at(0)) { for (uint k = 1; k < p2->size(); k++) { --- 1028,1039 ---- while (changed) { changed = false; for (int i = 0; i < _packset.length(); i++) { Node_List* p1 = _packset.at(i); if (p1 == NULL) continue; ! // Because of sorting we can start at i + 1 ! for (int j = i + 1; j < _packset.length(); j++) { Node_List* p2 = _packset.at(j); if (p2 == NULL) continue; if (i == j) continue; if (p1->at(p1->size()-1) == p2->at(0)) { for (uint k = 1; k < p2->size(); k++) {
*** 1065,1076 **** } //------------------------------implemented--------------------------- // Can code be generated for pack p? bool SuperWord::implemented(Node_List* p) { Node* p0 = p->at(0); ! return VectorNode::implemented(p0->Opcode(), p->size(), velt_basic_type(p0)); } //------------------------------same_inputs-------------------------- // For pack p, are all idx operands the same? static bool same_inputs(Node_List* p, int idx) { --- 1156,1178 ---- } //------------------------------implemented--------------------------- // Can code be generated for pack p? bool SuperWord::implemented(Node_List* p) { + bool retValue = false; Node* p0 = p->at(0); ! if (p0 != NULL) { ! int opc = p0->Opcode(); ! uint size = p->size(); ! if (p0->is_reduction()) { ! const Type *arith_type = p0->bottom_type(); ! retValue = ReductionNode::implemented(opc, size, arith_type->basic_type()); ! } else { ! retValue = VectorNode::implemented(opc, size, velt_basic_type(p0)); ! } ! } ! return retValue; } //------------------------------same_inputs-------------------------- // For pack p, are all idx operands the same? static bool same_inputs(Node_List* p, int idx) {
*** 1100,1109 **** --- 1202,1223 ---- // (maybe just the ones from outside the block.) for (uint i = start; i < end; i++) { if (!is_vector_use(p0, i)) return false; } + // Check if reductions are connected + if (p0->is_reduction()) { + Node* second_in = p0->in(2); + Node_List* second_pk = my_pack(second_in); + if (second_pk == NULL) { + // Remove reduction flag if no parent pack, it is not profitable + p0->remove_flag(Node::Flag_is_reduction); + return false; + } else if (second_pk->size() != p->size()) { + return false; + } + } if (VectorNode::is_shift(p0)) { // For now, return false if shift count is vector or not scalar promotion // case (different shift counts) because it is not supported yet. Node* cnt = p0->in(2); Node_List* cnt_pk = my_pack(cnt);
*** 1121,1130 **** --- 1235,1247 ---- for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { Node* use = def->fast_out(j); for (uint k = 0; k < use->req(); k++) { Node* n = use->in(k); if (def == n) { + // reductions can be loop carried dependences + if (def->is_reduction() && use->is_Phi()) + continue; if (!is_vector_use(use, k)) { return false; } } }
*** 1405,1424 **** const TypePtr* atyp = n->adr_type(); vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen); vlen_in_bytes = vn->as_StoreVector()->memory_size(); } else if (n->req() == 3) { // Promote operands to vector ! Node* in1 = vector_opd(p, 1); Node* in2 = vector_opd(p, 2); ! if (VectorNode::is_invariant_vector(in1) && (n->is_Add() || n->is_Mul())) { // Move invariant vector input into second position to avoid register spilling. Node* tmp = in1; in1 = in2; in2 = tmp; } vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n)); vlen_in_bytes = vn->as_Vector()->length_in_bytes(); } else { ShouldNotReachHere(); } assert(vn != NULL, "sanity"); _igvn.register_new_node_with_optimizer(vn); --- 1522,1558 ---- const TypePtr* atyp = n->adr_type(); vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen); vlen_in_bytes = vn->as_StoreVector()->memory_size(); } else if (n->req() == 3) { // Promote operands to vector ! Node* in1 = NULL; ! bool node_isa_reduction = n->is_reduction(); ! if (node_isa_reduction) { ! // the input to the first reduction operation is retained ! in1 = low_adr->in(1); ! } else { ! in1 = vector_opd(p, 1); ! } Node* in2 = vector_opd(p, 2); ! if (VectorNode::is_invariant_vector(in1) && (node_isa_reduction == false) && (n->is_Add() || n->is_Mul())) { // Move invariant vector input into second position to avoid register spilling. Node* tmp = in1; in1 = in2; in2 = tmp; } + if (node_isa_reduction) { + const Type *arith_type = n->bottom_type(); + vn = ReductionNode::make(opc, NULL, in1, in2, arith_type->basic_type()); + if (in2->is_Load()) { + vlen_in_bytes = in2->as_LoadVector()->memory_size(); + } else { + vlen_in_bytes = in2->as_Vector()->length_in_bytes(); + } + } else { vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n)); vlen_in_bytes = vn->as_Vector()->length_in_bytes(); + } } else { ShouldNotReachHere(); } assert(vn != NULL, "sanity"); _igvn.register_new_node_with_optimizer(vn);
*** 1554,1563 **** --- 1688,1699 ---- Node* use = _n_idx_list.node(); int idx = _n_idx_list.index(); _n_idx_list.pop(); Node* def = use->in(idx); + if (def->is_reduction()) continue; + // Insert extract operation _igvn.hash_delete(def); int def_pos = alignment(def) / data_size(def); Node* ex = ExtractNode::make(def, def_pos, velt_basic_type(def));
*** 1574,1583 **** --- 1710,1720 ---- //------------------------------is_vector_use--------------------------- // Is use->in(u_idx) a vector use? bool SuperWord::is_vector_use(Node* use, int u_idx) { Node_List* u_pk = my_pack(use); if (u_pk == NULL) return false; + if (use->is_reduction()) return true; Node* def = use->in(u_idx); Node_List* d_pk = my_pack(def); if (d_pk == NULL) { // check for scalar promotion Node* n = u_pk->at(0)->in(u_idx);
*** 1611,1621 **** // Find non-control nodes with no inputs from within block, // create a temporary map from node _idx to bb_idx for use // by the visited and post_visited sets, // and count number of nodes in block. int bb_ct = 0; ! for (uint i = 0; i < lpt()->_body.size(); i++ ) { Node *n = lpt()->_body.at(i); set_bb_idx(n, i); // Create a temporary map if (in_bb(n)) { if (n->is_LoadStore() || n->is_MergeMem() || (n->is_Proj() && !n->as_Proj()->is_CFG())) { --- 1748,1758 ---- // Find non-control nodes with no inputs from within block, // create a temporary map from node _idx to bb_idx for use // by the visited and post_visited sets, // and count number of nodes in block. int bb_ct = 0; ! for (uint i = 0; i < lpt()->_body.size(); i++) { Node *n = lpt()->_body.at(i); set_bb_idx(n, i); // Create a temporary map if (in_bb(n)) { if (n->is_LoadStore() || n->is_MergeMem() || (n->is_Proj() && !n->as_Proj()->is_CFG())) {
*** 1672,1681 **** --- 1809,1819 ---- _stk.push(entry); // Do a depth first walk over out edges int rpo_idx = bb_ct - 1; int size; + int reduction_uses = 0; while ((size = _stk.length()) > 0) { Node* n = _stk.top(); // Leave node on stack if (!visited_test_set(n)) { // forward arc in graph } else if (!post_visited_test(n)) {
*** 1683,1693 **** --- 1821,1847 ---- for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { Node *use = n->fast_out(i); if (in_bb(use) && !visited_test(use) && // Don't go around backedge (!use->is_Phi() || n == entry)) { + if (use->is_reduction()) { + // First see if we can map the reduction on the given system we are on, then + // make a data entry operation for each reduction we see. + const Type *arith_type = use->bottom_type(); + int vopc = ReductionNode::opcode(use->Opcode(), arith_type->basic_type()); + if (vopc != use->Opcode()) { + if (Matcher::match_rule_supported(vopc)) { _stk.push(use); + reduction_uses++; + } else { + // failed a support issue + return false; + } + } + } else { + _stk.push(use); + } } } if (_stk.length() == size) { // There were no additional uses, post visit node now _stk.pop(); // Remove node from stack
*** 1706,1716 **** for (int j = 0; j < _block.length(); j++) { Node* n = _block.at(j); set_bb_idx(n, j); } ! initialize_bb(); // Ensure extra info is allocated. #ifndef PRODUCT if (TraceSuperWord) { print_bb(); tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE"); --- 1860,1871 ---- for (int j = 0; j < _block.length(); j++) { Node* n = _block.at(j); set_bb_idx(n, j); } ! // Ensure extra info is allocated. ! initialize_bb(); #ifndef PRODUCT if (TraceSuperWord) { print_bb(); tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE");
*** 1724,1734 **** tty->print(" "); _mem_slice_tail.at(m)->dump(); } } #endif assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found"); ! return (_mem_slice_head.length() > 0) || (_data_entry.length() > 0); } //------------------------------initialize_bb--------------------------- // Initialize per node info void SuperWord::initialize_bb() { --- 1879,1889 ---- tty->print(" "); _mem_slice_tail.at(m)->dump(); } } #endif assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found"); ! return (_mem_slice_head.length() > 0) || (reduction_uses > 0) || (_data_entry.length() > 0); } //------------------------------initialize_bb--------------------------- // Initialize per node info void SuperWord::initialize_bb() {
*** 1957,1966 **** --- 2112,2143 ---- set_my_pack(s, NULL); } _packset.remove_at(pos); } + void SuperWord::packset_sort(int n) + { + // simple bubble sort so that we capitalize with O(n) when its already sorted + while (n != 0) { + bool swapped = false; + for (int i = 1; i < n; i++) { + Node_List* q_low = _packset.at(i-1); + Node_List* q_i = _packset.at(i); + + // only swap when we find something to swap + if (alignment(q_low->at(0)) > alignment(q_i->at(0))) { + Node_List* t = q_i; + *(_packset.adr_at(i)) = q_low; + *(_packset.adr_at(i-1)) = q_i; + swapped = true; + } + } + if (swapped == false) break; + n--; + } + } + //------------------------------executed_first--------------------------- // Return the node executed first in pack p. Uses the RPO block list // to determine order. Node* SuperWord::executed_first(Node_List* p) { Node* n = p->at(0);
src/share/vm/opto/superword.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File