< prev index next >

src/share/vm/opto/loopTransform.cpp

Print this page

        

*** 36,45 **** --- 36,46 ---- #include "opto/movenode.hpp" #include "opto/opaquenode.hpp" #include "opto/rootnode.hpp" #include "opto/runtime.hpp" #include "opto/subnode.hpp" + #include "opto/superword.hpp" #include "opto/vectornode.hpp" //------------------------------is_loop_exit----------------------------------- // Given an IfNode, return the loop-exiting projection or NULL if both // arms remain in the loop.
*** 638,648 **** //------------------------------policy_unroll---------------------------------- // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if // the loop is a CountedLoop and the body is small enough. ! bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const { CountedLoopNode *cl = _head->as_CountedLoop(); assert(cl->is_normal_loop() || cl->is_main_loop(), ""); if (!cl->is_valid_counted_loop()) --- 639,649 ---- //------------------------------policy_unroll---------------------------------- // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if // the loop is a CountedLoop and the body is small enough. ! bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) { CountedLoopNode *cl = _head->as_CountedLoop(); assert(cl->is_normal_loop() || cl->is_main_loop(), ""); if (!cl->is_valid_counted_loop())
*** 650,659 **** --- 651,662 ---- // Protect against over-unrolling. // After split at least one iteration will be executed in pre-loop. if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false; + _local_loop_unroll_limit = LoopUnrollLimit; + _local_loop_unroll_factor = 4; int future_unroll_ct = cl->unrolled_count() * 2; if (future_unroll_ct > LoopMaxUnroll) return false; // Check for initial stride being a small enough constant if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false;
*** 745,765 **** } #endif } // switch } // Check for being too big ! if (body_size > (uint)LoopUnrollLimit) { if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true; // Normal case: loop too big return false; } // Unroll once! (Each trip will soon do double iterations) return true; } //------------------------------policy_align----------------------------------- // Return TRUE or FALSE if the loop should be cache-line aligned. Gather the // expression that does the alignment. Note that only one array base can be // aligned in a loop (unless the VM guarantees mutual alignment). Note that // if we vectorize short memory ops into longer memory ops, we may want to --- 748,813 ---- } #endif } // switch } + if (UseSuperWord) { + if (!cl->is_reduction_loop()) { + phase->mark_reductions(this); + } + + // Only attempt slp analysis when user controls do not prohibit it + if (LoopMaxUnroll > _local_loop_unroll_factor) { + // Once policy_slp_analysis succeeds, mark the loop with the + // maximal unroll factor so that we minimize analysis passes + if (future_unroll_ct > _local_loop_unroll_factor) { + policy_unroll_slp_analysis(cl, phase, future_unroll_ct); + } + } + } + // Check for being too big ! if (body_size > (uint)_local_loop_unroll_limit) { if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true; // Normal case: loop too big return false; } // Unroll once! (Each trip will soon do double iterations) return true; } + void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct) { + // Enable this functionality target by target as needed + if (SuperWordLoopUnrollAnalysis) { + if (!cl->has_passed_slp()) { + SuperWord sw(phase); + sw.transform_loop(this, false); + + // If the loop is slp canonical analyze it + if (sw.early_return() == false) { + sw.unrolling_analysis(cl, _local_loop_unroll_factor); + } + } + + int slp_max_unroll_factor = cl->slp_max_unroll(); + if ((slp_max_unroll_factor > 4) && + (slp_max_unroll_factor >= future_unroll_ct)) { + int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor; + if (new_limit > LoopUnrollLimit) { + #ifndef PRODUCT + if (TraceSuperWordLoopUnrollAnalysis) { + tty->print_cr("slp analysis is applying unroll limit %d, the original limit was %d\n", + new_limit, _local_loop_unroll_limit); + } + #endif + _local_loop_unroll_limit = new_limit; + } + } + } + } + //------------------------------policy_align----------------------------------- // Return TRUE or FALSE if the loop should be cache-line aligned. Gather the // expression that does the alignment. Note that only one array base can be // aligned in a loop (unless the VM guarantees mutual alignment). Note that // if we vectorize short memory ops into longer memory ops, we may want to
*** 1583,1592 **** --- 1631,1641 ---- if (!def_node->is_reduction()) { // Not marked yet // To be a reduction, the arithmetic node must have the phi as input and provide a def to it for (unsigned j = 1; j < def_node->req(); j++) { Node* in = def_node->in(j); if (in == phi) { + loop_head->mark_has_reductions(); def_node->add_flag(Node::Flag_is_reduction); break; } } }
*** 2433,2443 **** // twice as many iterations as before) and the main body limit (only do // an even number of trips). If we are peeling, we might enable some RCE // and we'd rather unroll the post-RCE'd loop SO... do not unroll if // peeling. if (should_unroll && !should_peel) { - phase->mark_reductions(this); phase->do_unroll(this, old_new, true); } // Adjust the pre-loop limits to align the main body // iterations. --- 2482,2491 ----
< prev index next >