< prev index next >
src/share/vm/opto/loopTransform.cpp
Print this page
*** 36,45 ****
--- 36,46 ----
#include "opto/movenode.hpp"
#include "opto/opaquenode.hpp"
#include "opto/rootnode.hpp"
#include "opto/runtime.hpp"
#include "opto/subnode.hpp"
+ #include "opto/superword.hpp"
#include "opto/vectornode.hpp"
//------------------------------is_loop_exit-----------------------------------
// Given an IfNode, return the loop-exiting projection or NULL if both
// arms remain in the loop.
*** 638,648 ****
//------------------------------policy_unroll----------------------------------
// Return TRUE or FALSE if the loop should be unrolled or not. Unroll if
// the loop is a CountedLoop and the body is small enough.
! bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const {
CountedLoopNode *cl = _head->as_CountedLoop();
assert(cl->is_normal_loop() || cl->is_main_loop(), "");
if (!cl->is_valid_counted_loop())
--- 639,649 ----
//------------------------------policy_unroll----------------------------------
// Return TRUE or FALSE if the loop should be unrolled or not. Unroll if
// the loop is a CountedLoop and the body is small enough.
! bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) {
CountedLoopNode *cl = _head->as_CountedLoop();
assert(cl->is_normal_loop() || cl->is_main_loop(), "");
if (!cl->is_valid_counted_loop())
*** 650,659 ****
--- 651,662 ----
// Protect against over-unrolling.
// After split at least one iteration will be executed in pre-loop.
if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
+ _local_loop_unroll_limit = LoopUnrollLimit;
+ _local_loop_unroll_factor = 4;
int future_unroll_ct = cl->unrolled_count() * 2;
if (future_unroll_ct > LoopMaxUnroll) return false;
// Check for initial stride being a small enough constant
if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false;
*** 745,765 ****
}
#endif
} // switch
}
// Check for being too big
! if (body_size > (uint)LoopUnrollLimit) {
if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
// Normal case: loop too big
return false;
}
// Unroll once! (Each trip will soon do double iterations)
return true;
}
//------------------------------policy_align-----------------------------------
// Return TRUE or FALSE if the loop should be cache-line aligned. Gather the
// expression that does the alignment. Note that only one array base can be
// aligned in a loop (unless the VM guarantees mutual alignment). Note that
// if we vectorize short memory ops into longer memory ops, we may want to
--- 748,813 ----
}
#endif
} // switch
}
+ if (UseSuperWord) {
+ if (!cl->is_reduction_loop()) {
+ phase->mark_reductions(this);
+ }
+
+ // Only attempt slp analysis when user controls do not prohibit it
+ if (LoopMaxUnroll > _local_loop_unroll_factor) {
+ // Once policy_slp_analysis succeeds, mark the loop with the
+ // maximal unroll factor so that we minimize analysis passes
+ if (future_unroll_ct > _local_loop_unroll_factor) {
+ policy_unroll_slp_analysis(cl, phase, future_unroll_ct);
+ }
+ }
+ }
+
// Check for being too big
! if (body_size > (uint)_local_loop_unroll_limit) {
if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
// Normal case: loop too big
return false;
}
// Unroll once! (Each trip will soon do double iterations)
return true;
}
+ void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct) {
+ // Enable this functionality target by target as needed
+ if (SuperWordLoopUnrollAnalysis) {
+ if (!cl->has_passed_slp()) {
+ SuperWord sw(phase);
+ sw.transform_loop(this, false);
+
+ // If the loop is slp canonical analyze it
+ if (sw.early_return() == false) {
+ sw.unrolling_analysis(cl, _local_loop_unroll_factor);
+ }
+ }
+
+ int slp_max_unroll_factor = cl->slp_max_unroll();
+ if ((slp_max_unroll_factor > 4) &&
+ (slp_max_unroll_factor >= future_unroll_ct)) {
+ int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor;
+ if (new_limit > LoopUnrollLimit) {
+ #ifndef PRODUCT
+ if (TraceSuperWordLoopUnrollAnalysis) {
+ tty->print_cr("slp analysis is applying unroll limit %d, the original limit was %d\n",
+ new_limit, _local_loop_unroll_limit);
+ }
+ #endif
+ _local_loop_unroll_limit = new_limit;
+ }
+ }
+ }
+ }
+
//------------------------------policy_align-----------------------------------
// Return TRUE or FALSE if the loop should be cache-line aligned. Gather the
// expression that does the alignment. Note that only one array base can be
// aligned in a loop (unless the VM guarantees mutual alignment). Note that
// if we vectorize short memory ops into longer memory ops, we may want to
*** 1583,1592 ****
--- 1631,1641 ----
if (!def_node->is_reduction()) { // Not marked yet
// To be a reduction, the arithmetic node must have the phi as input and provide a def to it
for (unsigned j = 1; j < def_node->req(); j++) {
Node* in = def_node->in(j);
if (in == phi) {
+ loop_head->mark_has_reductions();
def_node->add_flag(Node::Flag_is_reduction);
break;
}
}
}
*** 2433,2443 ****
// twice as many iterations as before) and the main body limit (only do
// an even number of trips). If we are peeling, we might enable some RCE
// and we'd rather unroll the post-RCE'd loop SO... do not unroll if
// peeling.
if (should_unroll && !should_peel) {
- phase->mark_reductions(this);
phase->do_unroll(this, old_new, true);
}
// Adjust the pre-loop limits to align the main body
// iterations.
--- 2482,2491 ----
< prev index next >