src/share/vm/opto/loopTransform.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 5091921 Cdiff src/share/vm/opto/loopTransform.cpp

src/share/vm/opto/loopTransform.cpp

Print this page

        

*** 81,91 **** return; // Infinite loop #ifdef ASSERT BoolTest::mask bt = cl->loopexit()->test_trip(); assert(bt == BoolTest::lt || bt == BoolTest::gt || ! bt == BoolTest::ne, "canonical test is expected"); #endif Node* init_n = cl->init_trip(); Node* limit_n = cl->limit(); if (init_n != NULL && init_n->is_Con() && --- 81,91 ---- return; // Infinite loop #ifdef ASSERT BoolTest::mask bt = cl->loopexit()->test_trip(); assert(bt == BoolTest::lt || bt == BoolTest::gt || ! (bt == BoolTest::ne && !LoopLimitCheck), "canonical test is expected"); #endif Node* init_n = cl->init_trip(); Node* limit_n = cl->limit(); if (init_n != NULL && init_n->is_Con() &&
*** 508,518 **** // around the loopback from the prior iteration (follow the old-loop // backedges) and then map to the new peeled iteration. This leaves // the pre-loop with only 1 user (the new peeled iteration), but the // peeled-loop backedge has 2 users. Node* new_exit_value = old_new[head->in(LoopNode::LoopBackControl)->_idx]; ! new_exit_value = move_loop_predicates(entry, new_exit_value); _igvn.hash_delete(head); head->set_req(LoopNode::EntryControl, new_exit_value); for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) { Node* old = head->fast_out(j); if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) { --- 508,518 ---- // around the loopback from the prior iteration (follow the old-loop // backedges) and then map to the new peeled iteration. This leaves // the pre-loop with only 1 user (the new peeled iteration), but the // peeled-loop backedge has 2 users. Node* new_exit_value = old_new[head->in(LoopNode::LoopBackControl)->_idx]; ! new_exit_value = move_loop_predicates(entry, new_exit_value, !counted_loop); _igvn.hash_delete(head); head->set_req(LoopNode::EntryControl, new_exit_value); for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) { Node* old = head->fast_out(j); if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) {
*** 591,600 **** --- 591,606 ---- assert( (intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits"); if (trip_count > unroll_limit || body_size > unroll_limit) { return false; } + // Fully unroll a loop with few iterations regardless next + // conditions since following loop optimizations will split + // such loop anyway (pre-main-post). + if (trip_count <= 3) + return true; + // Take into account that after unroll conjoined heads and tails will fold, // otherwise policy_unroll() may allow more unrolling than max unrolling. uint new_body_size = EMPTY_LOOP_SIZE + (body_size - EMPTY_LOOP_SIZE) * trip_count; uint tst_body_size = (new_body_size - EMPTY_LOOP_SIZE) / trip_count + EMPTY_LOOP_SIZE; if (body_size != tst_body_size) // Check for int overflow
*** 603,621 **** // Unrolling can result in a large amount of node construction new_body_size >= MaxNodeLimit - phase->C->unique()) { return false; } - // Currently we don't have policy to optimize one iteration loops. - // Maximally unrolling transformation is used for that: - // it is peeled and the original loop become non reachable (dead). - // Also fully unroll a loop with few iterations regardless next - // conditions since following loop optimizations will split - // such loop anyway (pre-main-post). - if (trip_count <= 3) - return true; - // Do not unroll a loop with String intrinsics code. // String intrinsics are large and have loops. for (uint k = 0; k < _body.size(); k++) { Node* n = _body.at(k); switch (n->Opcode()) { --- 609,618 ----
*** 641,652 **** assert(cl->is_normal_loop() || cl->is_main_loop(), ""); if (!cl->is_valid_counted_loop()) return false; // Malformed counted loop ! // protect against over-unrolling ! if (cl->trip_count() <= 1) return false; // Check for stride being a small enough constant if (abs(cl->stride_con()) > (1<<3)) return false; int future_unroll_ct = cl->unrolled_count() * 2; --- 638,650 ---- assert(cl->is_normal_loop() || cl->is_main_loop(), ""); if (!cl->is_valid_counted_loop()) return false; // Malformed counted loop ! // Protect against over-unrolling. ! // After split at least one iteration will be executed in pre-loop. ! if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false; // Check for stride being a small enough constant if (abs(cl->stride_con()) > (1<<3)) return false; int future_unroll_ct = cl->unrolled_count() * 2;
*** 673,692 **** return false; } Node *init_n = cl->init_trip(); Node *limit_n = cl->limit(); // Non-constant bounds. // Protect against over-unrolling when init or/and limit are not constant // (so that trip_count's init value is maxint) but iv range is known. if (init_n == NULL || !init_n->is_Con() || limit_n == NULL || !limit_n->is_Con()) { Node* phi = cl->phi(); if (phi != NULL) { assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi."); const TypeInt* iv_type = phase->_igvn.type(phi)->is_int(); ! int next_stride = cl->stride_con() * 2; // stride after this unroll if (next_stride > 0) { if (iv_type->_lo + next_stride <= iv_type->_lo || // overflow iv_type->_lo + next_stride > iv_type->_hi) { return false; // over-unrolling } --- 671,691 ---- return false; } Node *init_n = cl->init_trip(); Node *limit_n = cl->limit(); + int stride_con = cl->stride_con(); // Non-constant bounds. // Protect against over-unrolling when init or/and limit are not constant // (so that trip_count's init value is maxint) but iv range is known. if (init_n == NULL || !init_n->is_Con() || limit_n == NULL || !limit_n->is_Con()) { Node* phi = cl->phi(); if (phi != NULL) { assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi."); const TypeInt* iv_type = phase->_igvn.type(phi)->is_int(); ! int next_stride = stride_con * 2; // stride after this unroll if (next_stride > 0) { if (iv_type->_lo + next_stride <= iv_type->_lo || // overflow iv_type->_lo + next_stride > iv_type->_hi) { return false; // over-unrolling }
*** 697,715 **** } } } } // Adjust body_size to determine if we unroll or not uint body_size = _body.size(); - // Key test to unroll CaffeineMark's Logic test - int xors_in_loop = 0; // Also count ModL, DivL and MulL which expand mightly for (uint k = 0; k < _body.size(); k++) { Node* n = _body.at(k); switch (n->Opcode()) { - case Op_XorI: xors_in_loop++; break; // CaffeineMark's Logic test case Op_ModL: body_size += 30; break; case Op_DivL: body_size += 30; break; case Op_MulL: body_size += 10; break; case Op_StrComp: case Op_StrEquals: --- 696,718 ---- } } } } + // After unroll limit will be adjusted: new_limit = limit-stride. + // Bailout if adjustment overflow. + const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int(); + if (stride_con > 0 && ((limit_type->_hi - stride_con) >= limit_type->_hi) || + stride_con < 0 && ((limit_type->_lo - stride_con) <= limit_type->_lo)) + return false; // overflow + // Adjust body_size to determine if we unroll or not uint body_size = _body.size(); // Also count ModL, DivL and MulL which expand mightly for (uint k = 0; k < _body.size(); k++) { Node* n = _body.at(k); switch (n->Opcode()) { case Op_ModL: body_size += 30; break; case Op_DivL: body_size += 30; break; case Op_MulL: body_size += 10; break; case Op_StrComp: case Op_StrEquals:
*** 722,732 **** } // switch } // Check for being too big if (body_size > (uint)LoopUnrollLimit) { - if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true; // Normal case: loop too big return false; } // Unroll once! (Each trip will soon do double iterations) --- 725,734 ----
*** 745,776 **** //------------------------------policy_range_check----------------------------- // Return TRUE or FALSE if the loop should be range-check-eliminated. // Actually we do iteration-splitting, a more powerful form of RCE. bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const { ! if( !RangeCheckElimination ) return false; CountedLoopNode *cl = _head->as_CountedLoop(); // If we unrolled with no intention of doing RCE and we later // changed our minds, we got no pre-loop. Either we need to // make a new pre-loop, or we gotta disallow RCE. ! if( cl->is_main_no_pre_loop() ) return false; // Disallowed for now. Node *trip_counter = cl->phi(); // Check loop body for tests of trip-counter plus loop-invariant vs // loop-invariant. ! for( uint i = 0; i < _body.size(); i++ ) { Node *iff = _body[i]; ! if( iff->Opcode() == Op_If ) { // Test? // Comparing trip+off vs limit Node *bol = iff->in(1); ! if( bol->req() != 2 ) continue; // dead constant test if (!bol->is_Bool()) { assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only"); continue; } Node *cmp = bol->in(1); Node *rc_exp = cmp->in(1); Node *limit = cmp->in(2); --- 747,781 ---- //------------------------------policy_range_check----------------------------- // Return TRUE or FALSE if the loop should be range-check-eliminated. // Actually we do iteration-splitting, a more powerful form of RCE. bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const { ! if (!RangeCheckElimination) return false; CountedLoopNode *cl = _head->as_CountedLoop(); // If we unrolled with no intention of doing RCE and we later // changed our minds, we got no pre-loop. Either we need to // make a new pre-loop, or we gotta disallow RCE. ! if (cl->is_main_no_pre_loop()) return false; // Disallowed for now. Node *trip_counter = cl->phi(); // Check loop body for tests of trip-counter plus loop-invariant vs // loop-invariant. ! for (uint i = 0; i < _body.size(); i++) { Node *iff = _body[i]; ! if (iff->Opcode() == Op_If) { // Test? // Comparing trip+off vs limit Node *bol = iff->in(1); ! if (bol->req() != 2) continue; // dead constant test if (!bol->is_Bool()) { assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only"); continue; } + if (bol->as_Bool()->_test._test == BoolTest::ne) + continue; // not RC + Node *cmp = bol->in(1); Node *rc_exp = cmp->in(1); Node *limit = cmp->in(2);
*** 1062,1071 **** --- 1067,1077 ---- // direction: // positive stride use < // negative stride use > if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) { + assert(!LoopLimitCheck, "only canonical tests (lt or gt) are expected"); BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt; // Modify pre loop end condition Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool(); BoolNode* new_bol0 = new (C, 2) BoolNode(pre_bol->in(1), new_test);
*** 1088,1097 **** --- 1094,1106 ---- // Flag main loop main_head->set_main_loop(); if( peel_only ) main_head->set_main_no_pre_loop(); + // Subtract a trip count for the pre-loop. + main_head->set_trip_count(main_head->trip_count() - 1); + // It's difficult to be precise about the trip-counts // for the pre/post loops. They are usually very short, // so guess that 4 trips is a reasonable value. post_head->set_profile_trip_cnt(4.0); pre_head->set_profile_trip_cnt(4.0);
*** 1139,1172 **** Node *limit = loop_head->limit(); Node *init = loop_head->init_trip(); Node *stride = loop_head->stride(); Node *opaq = NULL; ! if( adjust_min_trip ) { // If not maximally unrolling, need adjustment assert( loop_head->is_main_loop(), "" ); assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" ); Node *iff = ctrl->in(0); assert( iff->Opcode() == Op_If, "" ); Node *bol = iff->in(1); assert( bol->Opcode() == Op_Bool, "" ); Node *cmp = bol->in(1); assert( cmp->Opcode() == Op_CmpI, "" ); opaq = cmp->in(2); ! // Occasionally it's possible for a pre-loop Opaque1 node to be // optimized away and then another round of loop opts attempted. // We can not optimize this particular loop in that case. ! if( opaq->Opcode() != Op_Opaque1 ) ! return; // Cannot find pre-loop! Bail out! } C->set_major_progress(); // Adjust max trip count. The trip count is intentionally rounded // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll, // the main, unrolled, part of the loop will never execute as it is protected // by the min-trip test. See bug 4834191 for a case where we over-unrolled // and later determined that part of the unrolled loop was dead. loop_head->set_trip_count(loop_head->trip_count() / 2); // Double the count of original iterations in the unrolled loop body. loop_head->double_unrolled_count(); --- 1148,1318 ---- Node *limit = loop_head->limit(); Node *init = loop_head->init_trip(); Node *stride = loop_head->stride(); Node *opaq = NULL; ! if (adjust_min_trip) { // If not maximally unrolling, need adjustment ! // Search for zero-trip guard. assert( loop_head->is_main_loop(), "" ); assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" ); Node *iff = ctrl->in(0); assert( iff->Opcode() == Op_If, "" ); Node *bol = iff->in(1); assert( bol->Opcode() == Op_Bool, "" ); Node *cmp = bol->in(1); assert( cmp->Opcode() == Op_CmpI, "" ); opaq = cmp->in(2); ! // Occasionally it's possible for a zero-trip guard Opaque1 node to be // optimized away and then another round of loop opts attempted. // We can not optimize this particular loop in that case. ! if (opaq->Opcode() != Op_Opaque1) ! return; // Cannot find zero-trip guard! Bail out! ! // Zero-trip test uses an 'opaque' node which is not shared. ! assert(opaq->outcnt() == 1 && opaq->in(1) == limit, ""); } C->set_major_progress(); + Node* new_limit = NULL; + if (UnrollLimitCheck) { + int stride_con = stride->get_int(); + int stride_p = (stride_con > 0) ? stride_con : -stride_con; + uint old_trip_count = loop_head->trip_count(); + // Verify that unroll policy result is still valid. + assert(old_trip_count > 1 && (!adjust_min_trip || stride_p <= (1<<3)), "sanity"); + + // Adjust loop limit to keep valid iterations number after unroll. + // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride + // which may overflow. + if (!adjust_min_trip) { + assert(old_trip_count > 1 && (old_trip_count & 1) == 0, + "odd trip count for maximally unroll"); + // Don't need to adjust limit for maximally unroll since trip count is even. + } else if (loop_head->has_exact_trip_count() && init->is_Con()) { + // Loop's limit is constant. Loop's init could be constant when pre-loop + // become peeled iteration. + long init_con = init->get_int(); + // We can keep old loop limit if iterations count stays the same: + // old_trip_count == new_trip_count * 2 + // Note: since old_trip_count >= 2 then new_trip_count >= 1 + // so we also don't need to adjust zero trip test. + long limit_con = limit->get_int(); + // (stride_con*2) not overflow since stride_con <= 8. + int new_stride_con = stride_con * 2; + int stride_m = new_stride_con - (stride_con > 0 ? 1 : -1); + long trip_count = (limit_con - init_con + stride_m)/new_stride_con; + // New trip count should satisfy next conditions. + assert(trip_count > 0 && (julong)trip_count < (julong)max_juint/2, "sanity"); + uint new_trip_count = (uint)trip_count; + adjust_min_trip = (old_trip_count != new_trip_count*2); + } + + if (adjust_min_trip) { + // Step 2: Adjust the trip limit if it is called for. + // The adjustment amount is -stride. Need to make sure if the + // adjustment underflows or overflows, then the main loop is skipped. + Node* cmp = loop_end->cmp_node(); + assert(cmp->in(2) == limit, "sanity"); + assert(opaq != NULL && opaq->in(1) == limit, "sanity"); + + // Verify that policy_unroll result is still valid. + const TypeInt* limit_type = _igvn.type(limit)->is_int(); + assert(stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) || + stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo), "sanity"); + + if (limit->is_Con()) { + // The check in policy_unroll and the assert above guaranty + // no underflow if limit is constant. + new_limit = _igvn.intcon(limit->get_int() - stride_con); + set_ctrl(new_limit, C->root()); + } else { + if (stride_con > 0 && ((limit_type->_lo - stride_con) < limit_type->_lo) || + stride_con < 0 && ((limit_type->_hi - stride_con) > limit_type->_hi)) { + // No underflow. + new_limit = new (C, 3) SubINode(limit, stride); + } else { + // (limit - stride) may underflow. + // Clamp the adjustment value with MININT or MAXINT: + // + // new_limit = limit-stride + // if (stride > 0) + // new_limit = (limit < new_limit) ? MININT : new_limit; + // else + // new_limit = (limit > new_limit) ? MAXINT : new_limit; + // + BoolTest::mask bt = loop_end->test_trip(); + assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected"); + Node* adj_max = _igvn.intcon((stride_con > 0) ? min_jint : max_jint); + set_ctrl(adj_max, C->root()); + Node* old_limit = NULL; + Node* adj_limit = NULL; + Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : NULL; + if (loop_head->unrolled_count() > 1 && + limit->is_CMove() && limit->Opcode() == Op_CMoveI && + limit->in(CMoveNode::IfTrue) == adj_max && + bol->as_Bool()->_test._test == bt && + bol->in(1)->Opcode() == Op_CmpI && + bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) { + // Loop was unrolled before. + // Optimize the limit to avoid nested CMove: + // Use not adjusted previous limit as old limit. + old_limit = bol->in(1)->in(1); + // Adjust previous adjusted limit. + adj_limit = limit->in(CMoveNode::IfFalse); + adj_limit = new (C, 3) SubINode(adj_limit, stride); + } else { + old_limit = limit; + adj_limit = new (C, 3) SubINode(limit, stride); + } + assert(old_limit != NULL && adj_limit != NULL, ""); + register_new_node( adj_limit, ctrl ); // adjust amount + Node* adj_cmp = new (C, 3) CmpINode(old_limit, adj_limit); + register_new_node( adj_cmp, ctrl ); + Node* adj_bool = new (C, 2) BoolNode(adj_cmp, bt); + register_new_node( adj_bool, ctrl ); + new_limit = new (C, 4) CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT); + } + register_new_node(new_limit, ctrl); + } + assert(new_limit != NULL, ""); + if (limit->outcnt() == 2) { + // Replace old limit if it is used only in loop tests. + _igvn.replace_node(limit, new_limit); + } else { + // Replace in loop test. + _igvn.hash_delete(cmp); + cmp->set_req(2, new_limit); + + // Step 3: Find the min-trip test guaranteed before a 'main' loop. + // Make it a 1-trip test (means at least 2 trips). + + // Guard test uses an 'opaque' node which is not shared. Hence I + // can edit it's inputs directly. Hammer in the new limit for the + // minimum-trip guard. + assert(opaq->outcnt() == 1, ""); + _igvn.hash_delete(opaq); + opaq->set_req(1, new_limit); + } + } + // Adjust max trip count. The trip count is intentionally rounded // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll, // the main, unrolled, part of the loop will never execute as it is protected // by the min-trip test. See bug 4834191 for a case where we over-unrolled // and later determined that part of the unrolled loop was dead. + loop_head->set_trip_count(old_trip_count / 2); + + // Double the count of original iterations in the unrolled loop body. + loop_head->double_unrolled_count(); + + } else { // LoopLimitCheck + + // Adjust max trip count. The trip count is intentionally rounded + // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll, + // the main, unrolled, part of the loop will never execute as it is protected + // by the min-trip test. See bug 4834191 for a case where we over-unrolled + // and later determined that part of the unrolled loop was dead. loop_head->set_trip_count(loop_head->trip_count() / 2); // Double the count of original iterations in the unrolled loop body. loop_head->double_unrolled_count();
*** 1184,1215 **** set_ctrl(mtwo, C->root()); Node *rond = new (C, 3) AndINode( trip, mtwo ); register_new_node( rond, ctrl ); Node *spn2 = new (C, 3) MulINode( rond, stride ); register_new_node( spn2, ctrl ); ! Node *lim2 = new (C, 3) AddINode( spn2, init ); ! register_new_node( lim2, ctrl ); // Hammer in the new limit Node *ctrl2 = loop_end->in(0); ! Node *cmp2 = new (C, 3) CmpINode( loop_head->incr(), lim2 ); register_new_node( cmp2, ctrl2 ); Node *bol2 = new (C, 2) BoolNode( cmp2, loop_end->test_trip() ); register_new_node( bol2, ctrl2 ); _igvn.hash_delete(loop_end); loop_end->set_req(CountedLoopEndNode::TestValue, bol2); // Step 3: Find the min-trip test guaranteed before a 'main' loop. // Make it a 1-trip test (means at least 2 trips). if( adjust_min_trip ) { // Guard test uses an 'opaque' node which is not shared. Hence I // can edit it's inputs directly. Hammer in the new limit for the // minimum-trip guard. assert( opaq->outcnt() == 1, "" ); _igvn.hash_delete(opaq); ! opaq->set_req(1, lim2); } // --------- // Step 4: Clone the loop body. Move it inside the loop. This loop body // represents the odd iterations; since the loop trips an even number of // times its backedge is never taken. Kill the backedge. --- 1330,1363 ---- set_ctrl(mtwo, C->root()); Node *rond = new (C, 3) AndINode( trip, mtwo ); register_new_node( rond, ctrl ); Node *spn2 = new (C, 3) MulINode( rond, stride ); register_new_node( spn2, ctrl ); ! new_limit = new (C, 3) AddINode( spn2, init ); ! register_new_node( new_limit, ctrl ); // Hammer in the new limit Node *ctrl2 = loop_end->in(0); ! Node *cmp2 = new (C, 3) CmpINode( loop_head->incr(), new_limit ); register_new_node( cmp2, ctrl2 ); Node *bol2 = new (C, 2) BoolNode( cmp2, loop_end->test_trip() ); register_new_node( bol2, ctrl2 ); _igvn.hash_delete(loop_end); loop_end->set_req(CountedLoopEndNode::TestValue, bol2); // Step 3: Find the min-trip test guaranteed before a 'main' loop. // Make it a 1-trip test (means at least 2 trips). if( adjust_min_trip ) { + assert( new_limit != NULL, "" ); // Guard test uses an 'opaque' node which is not shared. Hence I // can edit it's inputs directly. Hammer in the new limit for the // minimum-trip guard. assert( opaq->outcnt() == 1, "" ); _igvn.hash_delete(opaq); ! opaq->set_req(1, new_limit); } + } // LoopLimitCheck // --------- // Step 4: Clone the loop body. Move it inside the loop. This loop body // represents the odd iterations; since the loop trips an even number of // times its backedge is never taken. Kill the backedge.
*** 1261,1270 **** --- 1409,1419 ---- //------------------------------do_maximally_unroll---------------------------- void PhaseIdealLoop::do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new ) { CountedLoopNode *cl = loop->_head->as_CountedLoop(); + assert(cl->has_exact_trip_count(), "trip count is not exact"); assert(cl->trip_count() > 0, ""); #ifndef PRODUCT if (TraceLoopOpts) { tty->print("MaxUnroll %d ", cl->trip_count()); loop->dump_head();
*** 1277,1286 **** --- 1426,1436 ---- } // Now its tripping an even number of times remaining. Double loop body. // Do not adjust pre-guards; they are not needed and do not exist. if (cl->trip_count() > 0) { + assert((cl->trip_count() & 1) == 0, "missed peeling"); do_unroll(loop, old_new, false); } } //------------------------------dominates_backedge---------------------------------
*** 1290,1366 **** Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl); return _phase->dom_lca_internal(ctrl, backedge) == ctrl; } //------------------------------add_constraint--------------------------------- ! // Constrain the main loop iterations so the condition: ! // scale_con * I + offset < limit // always holds true. That is, either increase the number of iterations in // the pre-loop or the post-loop until the condition holds true in the main // loop. Stride, scale, offset and limit are all loop invariant. Further, // stride and scale are constants (offset and limit often are). ! void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) { ! ! // Compute "I :: (limit-offset)/scale_con" ! Node *con = new (C, 3) SubINode( limit, offset ); ! register_new_node( con, pre_ctrl ); ! Node *scale = _igvn.intcon(scale_con); ! set_ctrl(scale, C->root()); ! Node *X = new (C, 3) DivINode( 0, con, scale ); ! register_new_node( X, pre_ctrl ); ! // For positive stride, the pre-loop limit always uses a MAX function // and the main loop a MIN function. For negative stride these are // reversed. // Also for positive stride*scale the affine function is increasing, so the // pre-loop must check for underflow and the post-loop for overflow. // Negative stride*scale reverses this; pre-loop checks for overflow and // post-loop for underflow. ! if( stride_con*scale_con > 0 ) { ! // Compute I < (limit-offset)/scale_con ! // Adjust main-loop last iteration to be MIN/MAX(main_loop,X) ! *main_limit = (stride_con > 0) ! ? (Node*)(new (C, 3) MinINode( *main_limit, X )) ! : (Node*)(new (C, 3) MaxINode( *main_limit, X )); ! register_new_node( *main_limit, pre_ctrl ); } else { ! // Compute (limit-offset)/scale_con + SGN(-scale_con) <= I ! // Add the negation of the main-loop constraint to the pre-loop. ! // See footnote [++] below for a derivation of the limit expression. ! Node *incr = _igvn.intcon(scale_con > 0 ? -1 : 1); ! set_ctrl(incr, C->root()); ! Node *adj = new (C, 3) AddINode( X, incr ); ! register_new_node( adj, pre_ctrl ); ! *pre_limit = (scale_con > 0) ! ? (Node*)new (C, 3) MinINode( *pre_limit, adj ) ! : (Node*)new (C, 3) MaxINode( *pre_limit, adj ); ! register_new_node( *pre_limit, pre_ctrl ); ! // [++] Here's the algebra that justifies the pre-loop limit expression: ! // ! // NOT( scale_con * I + offset < limit ) ! // == ! // scale_con * I + offset >= limit ! // == ! // SGN(scale_con) * I >= (limit-offset)/|scale_con| ! // == ! // (limit-offset)/|scale_con| <= I * SGN(scale_con) ! // == ! // (limit-offset)/|scale_con|-1 < I * SGN(scale_con) ! // == ! // ( if (scale_con > 0) /*common case*/ ! // (limit-offset)/scale_con - 1 < I ! // else ! // (limit-offset)/scale_con + 1 > I ! // ) ! // ( if (scale_con > 0) /*common case*/ ! // (limit-offset)/scale_con + SGN(-scale_con) < I ! // else ! // (limit-offset)/scale_con + SGN(-scale_con) > I } } //------------------------------is_scaled_iv--------------------------------- // Return true if exp is a constant times an induction var --- 1440,1602 ---- Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl); return _phase->dom_lca_internal(ctrl, backedge) == ctrl; } //------------------------------add_constraint--------------------------------- ! // Constrain the main loop iterations so the conditions: ! // low_limit <= scale_con * I + offset < upper_limit // always holds true. That is, either increase the number of iterations in // the pre-loop or the post-loop until the condition holds true in the main // loop. Stride, scale, offset and limit are all loop invariant. Further, // stride and scale are constants (offset and limit often are). ! void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) { // For positive stride, the pre-loop limit always uses a MAX function // and the main loop a MIN function. For negative stride these are // reversed. // Also for positive stride*scale the affine function is increasing, so the // pre-loop must check for underflow and the post-loop for overflow. // Negative stride*scale reverses this; pre-loop checks for overflow and // post-loop for underflow. ! if (stride_con*scale_con > 0) { ! // The overflow limit: scale*I+offset < upper_limit ! // For main-loop compute ! // ( if (scale > 0) /* and stride > 0 */ ! // I < (upper_limit-offset)/scale ! // else /* scale < 0 and stride < 0 */ ! // I > (upper_limit-offset)/scale ! // ) ! // ! // (upper_limit-offset) may overflow when offset < 0. ! // But it is fine since main loop will either have ! // less iterations or will be skipped in such case. ! Node *con = new (C, 3) SubINode(upper_limit, offset); ! register_new_node(con, pre_ctrl); ! Node *scale = _igvn.intcon(scale_con); ! set_ctrl(scale, C->root()); ! Node *X = new (C, 3) DivINode(0, con, scale); ! register_new_node(X, pre_ctrl); + // Adjust main-loop last iteration + Node *loop_limit = *main_limit; + loop_limit = (stride_con > 0) // scale > 0 + ? (Node*)(new (C, 3) MinINode(loop_limit, X)) + : (Node*)(new (C, 3) MaxINode(loop_limit, X)); + register_new_node(loop_limit, pre_ctrl); + *main_limit = loop_limit; + + // The underflow limit: low_limit <= scale*I+offset. + // For pre-loop compute + // NOT(scale*I+offset >= low_limit) + // scale*I+offset < low_limit + // ( if (scale > 0) /* and stride > 0 */ + // I < (low_limit-offset)/scale + // else /* scale < 0 and stride < 0 */ + // I > (low_limit-offset)/scale + // ) + + if (low_limit->get_int() == -max_jint) { + if (!RangeLimitCheck) return; + // We need this guard when scale*pre_limit+offset >= limit + // due to underflow so we need execute pre-loop until + // scale*I+offset >= min_int. But (low_limit-offset) will + // underflow when offset > 0 and X will be > original_limit. + // To avoid it we replace offset = offset > 0 ? 0 : offset + // and add min(pre_limit, original_limit). + Node* shift = _igvn.intcon(31); + set_ctrl(shift, C->root()); + Node *neg_off = new (C, 3) RShiftINode(offset, shift); + register_new_node(neg_off, pre_ctrl); + offset = new (C, 3) AndINode(offset, neg_off); + register_new_node(offset, pre_ctrl); } else { ! assert(low_limit->get_int() == 0, "wrong low limit for range check"); ! // The only problem we have here when offset == min_int ! // since (0-min_int) == min_int. It may be fine for scale > 0 ! // but for scale < 0 X will be < original_limit. ! } ! con = new (C, 3) SubINode(low_limit, offset); ! register_new_node(con, pre_ctrl); ! scale = _igvn.intcon(scale_con); ! set_ctrl(scale, C->root()); ! X = new (C, 3) DivINode(0, con, scale); ! register_new_node(X, pre_ctrl); ! // Adjust pre-loop last iteration ! loop_limit = *pre_limit; ! loop_limit = (stride_con > 0) // scale > 0 ! ? (Node*)(new (C, 3) MaxINode(loop_limit, X)) ! : (Node*)(new (C, 3) MinINode(loop_limit, X)); ! register_new_node( loop_limit, pre_ctrl ); ! *pre_limit = loop_limit; ! ! } else { // stride_con*scale_con < 0 ! // For negative stride*scale pre-loop checks for overflow and ! // post-loop for underflow. ! // ! // The underflow limit: low_limit <= scale*I+offset. ! // For main-loop compute ! // scale*I+offset+1 > low_limit ! // ( if (scale < 0) /* and stride > 0 */ ! // I < (low_limit-(offset+1))/scale ! // else /* scale < 0 and stride < 0 */ ! // I > (low_limit-(offset+1))/scale ! // ) ! ! if (low_limit->get_int() == -max_jint) { ! if (!RangeLimitCheck) return; ! } else { ! assert(low_limit->get_int() == 0, "wrong low limit for range check"); } + + Node *one = _igvn.intcon(1); + set_ctrl(one, C->root()); + Node *plus_one = new (C, 3) AddINode(offset, one); + register_new_node( plus_one, pre_ctrl ); + Node *con = new (C, 3) SubINode(low_limit, plus_one); + register_new_node(con, pre_ctrl); + Node *scale = _igvn.intcon(scale_con); + set_ctrl(scale, C->root()); + Node *X = new (C, 3) DivINode(0, con, scale); + register_new_node(X, pre_ctrl); + + // Adjust main-loop last iteration + Node *loop_limit = *main_limit; + loop_limit = (stride_con > 0) // scale < 0 + ? (Node*)(new (C, 3) MinINode(loop_limit, X)) + : (Node*)(new (C, 3) MaxINode(loop_limit, X)); + register_new_node(loop_limit, pre_ctrl); + *main_limit = loop_limit; + + // The overflow limit: scale*I+offset < upper_limit + // For pre-loop compute + // NOT(scale*I+offset < upper_limit) + // scale*I+offset >= upper_limit + // scale*I+offset+1 > upper_limit + // ( if (scale < 0) /* and stride > 0 */ + // I < (upper_limit-(offset+1))/scale + // else /* scale < 0 and stride < 0 */ + // I > (upper_limit-(offset+1))/scale + // ) + plus_one = new (C, 3) AddINode(offset, one); + register_new_node( plus_one, pre_ctrl ); + con = new (C, 3) SubINode(upper_limit, plus_one); + register_new_node(con, pre_ctrl); + scale = _igvn.intcon(scale_con); + set_ctrl(scale, C->root()); + X = new (C, 3) DivINode(0, con, scale); + register_new_node(X, pre_ctrl); + + // Adjust pre-loop last iteration + loop_limit = *pre_limit; + loop_limit = (stride_con > 0) // scale < 0 + ? (Node*)(new (C, 3) MaxINode(loop_limit, X)) + : (Node*)(new (C, 3) MinINode(loop_limit, X)); + register_new_node( loop_limit, pre_ctrl ); + *pre_limit = loop_limit; + + } } //------------------------------is_scaled_iv--------------------------------- // Return true if exp is a constant times an induction var
*** 1486,1496 **** Node *bolzm = iffm->in(1); assert(bolzm->Opcode() == Op_Bool, ""); Node *cmpzm = bolzm->in(1); assert(cmpzm->is_Cmp(), ""); Node *opqzm = cmpzm->in(2); ! // Can not optimize a loop if pre-loop Opaque1 node is optimized // away and then another round of loop opts attempted. if (opqzm->Opcode() != Op_Opaque1) return; assert(opqzm->in(1) == main_limit, "do not understand situation"); --- 1722,1732 ---- Node *bolzm = iffm->in(1); assert(bolzm->Opcode() == Op_Bool, ""); Node *cmpzm = bolzm->in(1); assert(cmpzm->is_Cmp(), ""); Node *opqzm = cmpzm->in(2); ! // Can not optimize a loop if zero-trip Opaque1 node is optimized // away and then another round of loop opts attempted. if (opqzm->Opcode() != Op_Opaque1) return; assert(opqzm->in(1) == main_limit, "do not understand situation");
*** 1521,1532 **** --- 1757,1771 ---- // Must know if its a count-up or count-down loop int stride_con = cl->stride_con(); Node *zero = _igvn.intcon(0); Node *one = _igvn.intcon(1); + // Use symetrical int range [-max_jint,max_jint] + Node *mini = _igvn.intcon(-max_jint); set_ctrl(zero, C->root()); set_ctrl(one, C->root()); + set_ctrl(mini, C->root()); // Range checks that do not dominate the loop backedge (ie. // conditionally executed) can lengthen the pre loop limit beyond // the original loop limit. To prevent this, the pre limit is // (for stride > 0) MINed with the original loop limit (MAXed
*** 1597,1607 **** // As above for the 'limit', the 'offset' maybe pinned below the // zero trip test. if( offset_c == ctrl ) { continue; // Don't rce this check but continue looking for other candidates. } ! // At this point we have the expression as: // scale_con * trip_counter + offset :: limit // where scale_con, offset and limit are loop invariant. Trip_counter // monotonically increases by stride_con, a constant. Both (or either) // stride_con and scale_con can be negative which will flip about the --- 1836,1851 ---- // As above for the 'limit', the 'offset' maybe pinned below the // zero trip test. if( offset_c == ctrl ) { continue; // Don't rce this check but continue looking for other candidates. } ! #ifdef ASSERT ! if (TraceRangeLimitCheck) { ! tty->print_cr("RC bool node%s", flip ? " fliped:" : ":"); ! bol->dump(2); ! } ! #endif // At this point we have the expression as: // scale_con * trip_counter + offset :: limit // where scale_con, offset and limit are loop invariant. Trip_counter // monotonically increases by stride_con, a constant. Both (or either) // stride_con and scale_con can be negative which will flip about the
*** 1608,1628 **** // sense of the test. // Adjust pre and main loop limits to guard the correct iteration set if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests if( b_test._test == BoolTest::lt ) { // Range checks always use lt ! // The overflow limit: scale*I+offset < limit ! add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit ); ! // The underflow limit: 0 <= scale*I+offset. ! // Some math yields: -scale*I-(offset+1) < 0 ! Node *plus_one = new (C, 3) AddINode( offset, one ); ! register_new_node( plus_one, pre_ctrl ); ! Node *neg_offset = new (C, 3) SubINode( zero, plus_one ); ! register_new_node( neg_offset, pre_ctrl ); ! add_constraint( stride_con, -scale_con, neg_offset, zero, pre_ctrl, &pre_limit, &main_limit ); if (!conditional_rc) { conditional_rc = !loop->dominates_backedge(iff); } } else { #ifndef PRODUCT if( PrintOpto ) tty->print_cr("missed RCE opportunity"); --- 1852,1871 ---- // sense of the test. // Adjust pre and main loop limits to guard the correct iteration set if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests if( b_test._test == BoolTest::lt ) { // Range checks always use lt ! // The underflow and overflow limits: 0 <= scale*I+offset < limit ! add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit ); if (!conditional_rc) { conditional_rc = !loop->dominates_backedge(iff); + // It is also needed if offset->_lo == min_int since + // (0-min_int) == min_int. It may be fine for stride > 0 + // but for stride < 0 pre_limit will be < original_limit. + const TypeInt* offset_t = _igvn.type(offset)->is_int(); + conditional_rc |= RangeLimitCheck && (offset_t->_lo == min_jint) && + (scale_con<0) && (stride_con<0); } } else { #ifndef PRODUCT if( PrintOpto ) tty->print_cr("missed RCE opportunity");
*** 1629,1653 **** #endif continue; // In release mode, ignore it } } else { // Otherwise work on normal compares switch( b_test._test ) { ! case BoolTest::ge: // Convert X >= Y to -X <= -Y scale_con = -scale_con; offset = new (C, 3) SubINode( zero, offset ); register_new_node( offset, pre_ctrl ); limit = new (C, 3) SubINode( zero, limit ); register_new_node( limit, pre_ctrl ); // Fall into LE case ! case BoolTest::le: // Convert X <= Y to X < Y+1 limit = new (C, 3) AddINode( limit, one ); register_new_node( limit, pre_ctrl ); // Fall into LT case case BoolTest::lt: ! add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit ); if (!conditional_rc) { conditional_rc = !loop->dominates_backedge(iff); } break; default: #ifndef PRODUCT if( PrintOpto ) --- 1872,1910 ---- #endif continue; // In release mode, ignore it } } else { // Otherwise work on normal compares switch( b_test._test ) { ! case BoolTest::gt: ! // Fall into GE case ! case BoolTest::ge: ! // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit scale_con = -scale_con; offset = new (C, 3) SubINode( zero, offset ); register_new_node( offset, pre_ctrl ); limit = new (C, 3) SubINode( zero, limit ); register_new_node( limit, pre_ctrl ); // Fall into LE case ! case BoolTest::le: ! if (b_test._test != BoolTest::gt) { ! // Convert X <= Y to X < Y+1 limit = new (C, 3) AddINode( limit, one ); register_new_node( limit, pre_ctrl ); + } // Fall into LT case case BoolTest::lt: ! // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit ! add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit ); if (!conditional_rc) { conditional_rc = !loop->dominates_backedge(iff); + // It is also needed if scale*pre_limit+offset >= limit + // due to underflow so we need execute pre-loop until + // scale*I+offset >= min_int. But (low_limit-offset) will + // underflow when offset > 0 and X will be > original_limit. + const TypeInt* offset_t = _igvn.type(offset)->is_int(); + conditional_rc |= RangeLimitCheck && (offset_t->_hi > 0) && + (scale_con>0) && (stride_con>0); } break; default: #ifndef PRODUCT if( PrintOpto )
*** 1694,1704 **** _igvn.hash_delete(pre_opaq); pre_opaq->set_req(1, pre_limit); // Note:: we are making the main loop limit no longer precise; // need to round up based on stride. ! if( stride_con != 1 && stride_con != -1 ) { // Cutout for common case // "Standard" round-up logic: ([main_limit-init+(y-1)]/y)*y+init // Hopefully, compiler will optimize for powers of 2. Node *ctrl = get_ctrl(main_limit); Node *stride = cl->stride(); Node *init = cl->init_trip(); --- 1951,1962 ---- _igvn.hash_delete(pre_opaq); pre_opaq->set_req(1, pre_limit); // Note:: we are making the main loop limit no longer precise; // need to round up based on stride. ! cl->set_nonexact_trip_count(); ! if (!LoopLimitCheck && stride_con != 1 && stride_con != -1) { // Cutout for common case // "Standard" round-up logic: ([main_limit-init+(y-1)]/y)*y+init // Hopefully, compiler will optimize for powers of 2. Node *ctrl = get_ctrl(main_limit); Node *stride = cl->stride(); Node *init = cl->init_trip();
*** 1874,1884 **** // Replace the phi at loop head with the final value of the last // iteration. Then the CountedLoopEnd will collapse (backedge never // taken) and all loop-invariant uses of the exit values will be correct. Node *phi = cl->phi(); ! Node *final = new (phase->C, 3) SubINode( cl->limit(), cl->stride() ); phase->register_new_node(final,cl->in(LoopNode::EntryControl)); phase->_igvn.replace_node(phi,final); phase->C->set_major_progress(); return true; } --- 2132,2154 ---- // Replace the phi at loop head with the final value of the last // iteration. Then the CountedLoopEnd will collapse (backedge never // taken) and all loop-invariant uses of the exit values will be correct. Node *phi = cl->phi(); ! Node *exact_limit = phase->exact_limit(this); ! if (exact_limit != cl->limit()) { ! // We also need to replace the original limit to collapse loop exit. ! Node* cmp = cl->loopexit()->cmp_node(); ! assert(cl->limit() == cmp->in(2), "sanity"); ! phase->_igvn._worklist.push(cmp->in(2)); // put limit on worklist ! phase->_igvn.hash_delete(cmp); ! cmp->set_req(2, exact_limit); ! phase->_igvn._worklist.push(cmp); // put cmp on worklist ! } ! // Note: the final value after increment should not overflow since ! // counted loop has limit check predicate. ! Node *final = new (phase->C, 3) SubINode( exact_limit, cl->stride() ); phase->register_new_node(final,cl->in(LoopNode::EntryControl)); phase->_igvn.replace_node(phi,final); phase->C->set_major_progress(); return true; }
src/share/vm/opto/loopTransform.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File