< prev index next >
src/share/vm/opto/superword.cpp
Print this page
rev 10955 : undo
rev 10956 : 8154939: 8153998 broke vectorization on aarch64
Summary: code assumes SuperWordLoopUnrollAnalysis on which is not true on aarch64
Reviewed-by:
rev 10962 : vectorization fix
rev 10963 : vectorization fix exp
@@ -196,11 +196,11 @@
// First clear the entries
for (uint i = 0; i < lpt()->_body.size(); i++) {
ignored_loop_nodes[i] = -1;
}
- int max_vector = Matcher::max_vector_size(T_INT);
+ int max_vector = Matcher::max_vector_size(T_BYTE);
bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
// Process the loop, some/all of the stack entries will not be in order, ergo
// need to preprocess the ignored initial state before we process the loop
for (uint i = 0; i < lpt()->_body.size(); i++) {
@@ -296,10 +296,14 @@
}
if (is_slp) {
// Now we try to find the maximum supported consistent vector which the machine
// description can use
+ int implemented[T_LONG+1];
+ for (uint i = 0; i <= T_LONG; i++) {
+ implemented[i] = -1;
+ }
bool small_basic_type = false;
for (uint i = 0; i < lpt()->_body.size(); i++) {
if (ignored_loop_nodes[i] != -1) continue;
BasicType bt;
@@ -330,10 +334,36 @@
}
if (is_java_primitive(bt) == false) continue;
int cur_max_vector = Matcher::max_vector_size(bt);
+ bool impl = VectorNode::implemented(n->Opcode(), cur_max_vector, bt);
+
+ if (impl) {
+ // We only process post loops on predicated targets where we want to
+ // mask map the loop to a single iteration
+ if (post_loop_allowed) {
+ _post_block.at_put_grow(rpo_idx++, n);
+ }
+ }
+
+ assert(bt <= T_LONG, "bad basic type");
+ if (implemented[bt] == 0) {
+ continue;
+ }
+
+ implemented[bt] = (int)impl;
+ }
+
+ is_slp = false;
+ for (uint i = T_BOOLEAN; i <= T_LONG; i++) {
+ BasicType bt = (BasicType)i;
+ if (implemented[bt] == -1 || implemented[bt] == 0) {
+ continue;
+ }
+
+ int cur_max_vector = Matcher::max_vector_size(bt);
// If a max vector exists which is not larger than _local_loop_unroll_factor
// stop looking, we already have the max vector to map to.
if (cur_max_vector < local_loop_unroll_factor) {
is_slp = false;
@@ -341,22 +371,15 @@
tty->print_cr("slp analysis fails: unroll limit greater than max vector\n");
}
break;
}
- // Map the maximal common vector
- if (VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) {
+ is_slp = true;
+
if (cur_max_vector < max_vector) {
max_vector = cur_max_vector;
}
-
- // We only process post loops on predicated targets where we want to
- // mask map the loop to a single iteration
- if (post_loop_allowed) {
- _post_block.at_put_grow(rpo_idx++, n);
- }
- }
}
if (is_slp) {
local_loop_unroll_factor = max_vector;
cl->mark_passed_slp();
}
< prev index next >