< prev index next >
src/share/vm/opto/loopTransform.cpp
Print this page
*** 278,287 ****
--- 278,291 ----
// Peeling does loop cloning which can result in O(N^2) node construction
if( body_size > 255 /* Prevent overflow for large body_size */
|| (body_size * body_size + phase->C->live_nodes()) > phase->C->max_node_limit() ) {
return false; // too large to safely clone
}
+
+ // check for vectorized loops, any peeling done was already applied
+ if (_head->is_CountedLoop() && _head->as_CountedLoop()->ignore_slp()) return false;
+
while( test != _head ) { // Scan till run off top of loop
if( test->is_If() ) { // Test?
Node *ctrl = phase->get_ctrl(test->in(1));
if (ctrl->is_top())
return false; // Found dead test on live IF? No peeling!
*** 654,664 ****
--- 658,673 ----
if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
_local_loop_unroll_limit = LoopUnrollLimit;
_local_loop_unroll_factor = 4;
int future_unroll_ct = cl->unrolled_count() * 2;
+ if (!cl->ignore_slp()) {
if (future_unroll_ct > LoopMaxUnroll) return false;
+ } else {
+ // obey user constraints on vector mapped loops with additional unrolling applied
+ if ((future_unroll_ct / cl->slp_max_unroll()) > LoopMaxUnroll) return false;
+ }
// Check for initial stride being a small enough constant
if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false;
// Don't unroll if the next round of unrolling would push us
*** 757,812 ****
// Only attempt slp analysis when user controls do not prohibit it
if (LoopMaxUnroll > _local_loop_unroll_factor) {
// Once policy_slp_analysis succeeds, mark the loop with the
// maximal unroll factor so that we minimize analysis passes
! if ((future_unroll_ct > _local_loop_unroll_factor) ||
! (body_size > (uint)_local_loop_unroll_limit)) {
policy_unroll_slp_analysis(cl, phase, future_unroll_ct);
}
}
}
// Check for being too big
if (body_size > (uint)_local_loop_unroll_limit) {
if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
// Normal case: loop too big
return false;
}
// Unroll once! (Each trip will soon do double iterations)
return true;
}
void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct) {
// Enable this functionality target by target as needed
if (SuperWordLoopUnrollAnalysis) {
! if (!cl->has_passed_slp()) {
SuperWord sw(phase);
sw.transform_loop(this, false);
// If the loop is slp canonical analyze it
if (sw.early_return() == false) {
! sw.unrolling_analysis(cl, _local_loop_unroll_factor);
}
}
int slp_max_unroll_factor = cl->slp_max_unroll();
! if ((slp_max_unroll_factor > 4) &&
! (slp_max_unroll_factor >= future_unroll_ct)) {
int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor;
if (new_limit > LoopUnrollLimit) {
! #ifndef PRODUCT
! if (TraceSuperWordLoopUnrollAnalysis) {
! tty->print_cr("slp analysis is applying unroll limit %d, the original limit was %d\n",
! new_limit, _local_loop_unroll_limit);
! }
! #endif
_local_loop_unroll_limit = new_limit;
}
}
}
}
//------------------------------policy_align-----------------------------------
// Return TRUE or FALSE if the loop should be cache-line aligned. Gather the
// expression that does the alignment. Note that only one array base can be
--- 766,827 ----
// Only attempt slp analysis when user controls do not prohibit it
if (LoopMaxUnroll > _local_loop_unroll_factor) {
// Once policy_slp_analysis succeeds, mark the loop with the
// maximal unroll factor so that we minimize analysis passes
! if (future_unroll_ct >= _local_loop_unroll_factor) {
policy_unroll_slp_analysis(cl, phase, future_unroll_ct);
}
}
}
+ int slp_max_unroll_factor = cl->slp_max_unroll();
+ if (cl->has_passed_slp()) {
+ if (slp_max_unroll_factor >= future_unroll_ct) return true;
+ // Normal case: loop too big
+ return false;
+ }
+
// Check for being too big
if (body_size > (uint)_local_loop_unroll_limit) {
if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
// Normal case: loop too big
return false;
}
+ if(cl->ignore_slp()) {
+ NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("policy_unroll passed vector loop(vlen=%d,factor = %d)\n", slp_max_unroll_factor, future_unroll_ct));
+ }
+
// Unroll once! (Each trip will soon do double iterations)
return true;
}
void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct) {
// Enable this functionality target by target as needed
if (SuperWordLoopUnrollAnalysis) {
! if (!cl->was_slp_analyzed()) {
SuperWord sw(phase);
sw.transform_loop(this, false);
// If the loop is slp canonical analyze it
if (sw.early_return() == false) {
! sw.unrolling_analysis(_local_loop_unroll_factor);
}
}
+ if (cl->has_passed_slp()) {
int slp_max_unroll_factor = cl->slp_max_unroll();
! if (slp_max_unroll_factor >= future_unroll_ct) {
int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor;
if (new_limit > LoopUnrollLimit) {
! NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit));
_local_loop_unroll_limit = new_limit;
}
}
}
+ }
}
//------------------------------policy_align-----------------------------------
// Return TRUE or FALSE if the loop should be cache-line aligned. Gather the
// expression that does the alignment. Note that only one array base can be
*** 828,837 ****
--- 843,855 ----
// changed our minds, we got no pre-loop. Either we need to
// make a new pre-loop, or we gotta disallow RCE.
if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
Node *trip_counter = cl->phi();
+ // check for vectorized loops, some opts are no longer needed
+ if (cl->ignore_slp()) return false;
+
// Check loop body for tests of trip-counter plus loop-invariant vs
// loop-invariant.
for (uint i = 0; i < _body.size(); i++) {
Node *iff = _body[i];
if (iff->Opcode() == Op_If) { // Test?
*** 878,887 ****
--- 896,907 ----
//------------------------------policy_peel_only-------------------------------
// Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned. Useful
// for unrolling loops with NO array accesses.
bool IdealLoopTree::policy_peel_only( PhaseIdealLoop *phase ) const {
+ // check for vectorized loops, any peeling done was already applied
+ if (_head->is_CountedLoop() && _head->as_CountedLoop()->ignore_slp()) return false;
for( uint i = 0; i < _body.size(); i++ )
if( _body[i]->is_Mem() )
return false;
< prev index next >