--- old/src/cpu/aarch64/vm/c2_globals_aarch64.hpp 2016-04-27 17:48:36.207829840 +0200 +++ new/src/cpu/aarch64/vm/c2_globals_aarch64.hpp 2016-04-27 17:48:36.138830134 +0200 @@ -71,7 +71,7 @@ define_pd_global(bool, OptoScheduling, false); define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoRegScheduling, false); -define_pd_global(bool, SuperWordLoopUnrollAnalysis, false); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); define_pd_global(intx, ReservedCodeCacheSize, 48*M); define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); --- old/src/share/vm/opto/superword.cpp 2016-04-27 17:48:36.562828326 +0200 +++ new/src/share/vm/opto/superword.cpp 2016-04-27 17:48:36.470828718 +0200 @@ -198,7 +198,7 @@ ignored_loop_nodes[i] = -1; } - int max_vector = Matcher::max_vector_size(T_INT); + int max_vector = Matcher::max_vector_size(T_BYTE); bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop()); // Process the loop, some/all of the stack entries will not be in order, ergo @@ -298,6 +298,10 @@ if (is_slp) { // Now we try to find the maximum supported consistent vector which the machine // description can use + int implemented[T_LONG+1]; + for (uint i = 0; i <= T_LONG; i++) { + implemented[i] = -1; + } bool small_basic_type = false; for (uint i = 0; i < lpt()->_body.size(); i++) { if (ignored_loop_nodes[i] != -1) continue; @@ -332,6 +336,32 @@ if (is_java_primitive(bt) == false) continue; int cur_max_vector = Matcher::max_vector_size(bt); + bool impl = VectorNode::implemented(n->Opcode(), cur_max_vector, bt); + + if (impl) { + // We only process post loops on predicated targets where we want to + // mask map the loop to a single iteration + if (post_loop_allowed) { + _post_block.at_put_grow(rpo_idx++, n); + } + } + + assert(bt <= T_LONG, "bad basic type"); + if (implemented[bt] == 0) { + continue; + } + + implemented[bt] = (int)impl; + } + + is_slp = false; + for (uint i = T_BOOLEAN; i <= T_LONG; i++) { + BasicType bt = (BasicType)i; + if (implemented[bt] == -1 || implemented[bt] == 0) { + continue; + } + + int cur_max_vector = Matcher::max_vector_size(bt); // If a max vector exists which is not larger than _local_loop_unroll_factor // stop looking, we already have the max vector to map to. @@ -343,17 +373,10 @@ break; } - // Map the maximal common vector - if (VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) { - if (cur_max_vector < max_vector) { - max_vector = cur_max_vector; - } + is_slp = true; - // We only process post loops on predicated targets where we want to - // mask map the loop to a single iteration - if (post_loop_allowed) { - _post_block.at_put_grow(rpo_idx++, n); - } + if (cur_max_vector < max_vector) { + max_vector = cur_max_vector; } } if (is_slp) {