--- old/src/share/vm/opto/loopTransform.cpp 2015-06-15 13:37:15.900151500 -0700 +++ new/src/share/vm/opto/loopTransform.cpp 2015-06-15 13:37:15.690130500 -0700 @@ -38,6 +38,7 @@ #include "opto/rootnode.hpp" #include "opto/runtime.hpp" #include "opto/subnode.hpp" +#include "opto/superword.hpp" #include "opto/vectornode.hpp" //------------------------------is_loop_exit----------------------------------- @@ -640,7 +641,7 @@ //------------------------------policy_unroll---------------------------------- // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if // the loop is a CountedLoop and the body is small enough. -bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const { +bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) { CountedLoopNode *cl = _head->as_CountedLoop(); assert(cl->is_normal_loop() || cl->is_main_loop(), ""); @@ -652,6 +653,8 @@ // After split at least one iteration will be executed in pre-loop. if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false; + _local_loop_unroll_limit = LoopUnrollLimit; + _local_loop_unroll_factor = 4; int future_unroll_ct = cl->unrolled_count() * 2; if (future_unroll_ct > LoopMaxUnroll) return false; @@ -747,8 +750,23 @@ } // switch } + if (UseSuperWord) { + if (!cl->is_reduction_loop()) { + phase->mark_reductions(this); + } + + // Only attempt slp analysis when user controls do not prohibit it + if (LoopMaxUnroll > _local_loop_unroll_factor) { + // Once policy_slp_analysis succeeds, mark the loop with the + // maximal unroll factor so that we minimize analysis passes + if (future_unroll_ct > _local_loop_unroll_factor) { + policy_unroll_slp_analysis(cl, phase, future_unroll_ct); + } + } + } + // Check for being too big - if (body_size > (uint)LoopUnrollLimit) { + if (body_size > (uint)_local_loop_unroll_limit) { if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true; // Normal case: loop too big return false; @@ -758,6 +776,36 @@ return true; } +void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct) { + // Enable this functionality target by target as needed + if (SuperWordLoopUnrollAnalysis) { + if (!cl->has_passed_slp()) { + SuperWord sw(phase); + sw.transform_loop(this, false); + + // If the loop is slp canonical analyze it + if (sw.early_return() == false) { + sw.unrolling_analysis(cl, _local_loop_unroll_factor); + } + } + + int slp_max_unroll_factor = cl->slp_max_unroll(); + if ((slp_max_unroll_factor > 4) && + (slp_max_unroll_factor >= future_unroll_ct)) { + int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor; + if (new_limit > LoopUnrollLimit) { +#ifndef PRODUCT + if (TraceSuperWordLoopUnrollAnalysis) { + tty->print_cr("slp analysis is applying unroll limit %d, the original limit was %d\n", + new_limit, _local_loop_unroll_limit); + } +#endif + _local_loop_unroll_limit = new_limit; + } + } + } +} + //------------------------------policy_align----------------------------------- // Return TRUE or FALSE if the loop should be cache-line aligned. Gather the // expression that does the alignment. Note that only one array base can be @@ -1585,6 +1633,7 @@ for (unsigned j = 1; j < def_node->req(); j++) { Node* in = def_node->in(j); if (in == phi) { + loop_head->mark_has_reductions(); def_node->add_flag(Node::Flag_is_reduction); break; } @@ -2435,7 +2484,6 @@ // and we'd rather unroll the post-RCE'd loop SO... do not unroll if // peeling. if (should_unroll && !should_peel) { - phase->mark_reductions(this); phase->do_unroll(this, old_new, true); }