< prev index next >

src/share/vm/opto/superword.cpp

Print this page
rev 10955 : undo
rev 10956 : 8154939: 8153998 broke vectorization on aarch64
Summary: code assumes SuperWordLoopUnrollAnalysis on which is not true on aarch64
Reviewed-by:
rev 10962 : vectorization fix
rev 10963 : vectorization fix exp

*** 196,206 **** // First clear the entries for (uint i = 0; i < lpt()->_body.size(); i++) { ignored_loop_nodes[i] = -1; } ! int max_vector = Matcher::max_vector_size(T_INT); bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop()); // Process the loop, some/all of the stack entries will not be in order, ergo // need to preprocess the ignored initial state before we process the loop for (uint i = 0; i < lpt()->_body.size(); i++) { --- 196,206 ---- // First clear the entries for (uint i = 0; i < lpt()->_body.size(); i++) { ignored_loop_nodes[i] = -1; } ! int max_vector = Matcher::max_vector_size(T_BYTE); bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop()); // Process the loop, some/all of the stack entries will not be in order, ergo // need to preprocess the ignored initial state before we process the loop for (uint i = 0; i < lpt()->_body.size(); i++) {
*** 296,305 **** --- 296,309 ---- } if (is_slp) { // Now we try to find the maximum supported consistent vector which the machine // description can use + int implemented[T_LONG+1]; + for (uint i = 0; i <= T_LONG; i++) { + implemented[i] = -1; + } bool small_basic_type = false; for (uint i = 0; i < lpt()->_body.size(); i++) { if (ignored_loop_nodes[i] != -1) continue; BasicType bt;
*** 330,339 **** --- 334,369 ---- } if (is_java_primitive(bt) == false) continue; int cur_max_vector = Matcher::max_vector_size(bt); + bool impl = VectorNode::implemented(n->Opcode(), cur_max_vector, bt); + + if (impl) { + // We only process post loops on predicated targets where we want to + // mask map the loop to a single iteration + if (post_loop_allowed) { + _post_block.at_put_grow(rpo_idx++, n); + } + } + + assert(bt <= T_LONG, "bad basic type"); + if (implemented[bt] == 0) { + continue; + } + + implemented[bt] = (int)impl; + } + + is_slp = false; + for (uint i = T_BOOLEAN; i <= T_LONG; i++) { + BasicType bt = (BasicType)i; + if (implemented[bt] == -1 || implemented[bt] == 0) { + continue; + } + + int cur_max_vector = Matcher::max_vector_size(bt); // If a max vector exists which is not larger than _local_loop_unroll_factor // stop looking, we already have the max vector to map to. if (cur_max_vector < local_loop_unroll_factor) { is_slp = false;
*** 341,362 **** tty->print_cr("slp analysis fails: unroll limit greater than max vector\n"); } break; } ! // Map the maximal common vector ! if (VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) { if (cur_max_vector < max_vector) { max_vector = cur_max_vector; } - - // We only process post loops on predicated targets where we want to - // mask map the loop to a single iteration - if (post_loop_allowed) { - _post_block.at_put_grow(rpo_idx++, n); - } - } } if (is_slp) { local_loop_unroll_factor = max_vector; cl->mark_passed_slp(); } --- 371,385 ---- tty->print_cr("slp analysis fails: unroll limit greater than max vector\n"); } break; } ! is_slp = true; ! if (cur_max_vector < max_vector) { max_vector = cur_max_vector; } } if (is_slp) { local_loop_unroll_factor = max_vector; cl->mark_passed_slp(); }
< prev index next >