< prev index next >
src/hotspot/share/opto/loopTransform.cpp
Print this page
*** 284,293 ****
--- 284,296 ----
if (n1->is_Add() && n1->in(2)->is_Con()) return NULL;
Node* inv1 = n1->in(inv1_idx);
Node* n2 = n1->in(3 - inv1_idx);
int inv2_idx = is_invariant_addition(n2, phase);
if (!inv2_idx) return NULL;
+
+ if (!phase->may_require_nodes(10, 10)) return NULL;
+
Node* x = n2->in(3 - inv2_idx);
Node* inv2 = n2->in(inv2_idx);
bool neg_x = n2->is_Sub() && inv2_idx == 1;
bool neg_inv2 = n2->is_Sub() && inv2_idx == 2;
*** 335,399 ****
Node *n = _body.at(i);
for (int j = 0; j < 5; j++) {
Node* nn = reassociate_add_sub(n, phase);
if (nn == NULL) break;
n = nn; // again
! };
}
}
//------------------------------policy_peeling---------------------------------
! // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can
! // make some loop-invariant test (usually a null-check) happen before the loop.
! bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) const {
! IdealLoopTree *loop = (IdealLoopTree*)this;
// If nodes are depleted, some transform has miscalculated its needs.
assert(!phase->exceeding_node_budget(), "sanity");
! uint body_size = loop->_body.size();
! // Peeling does loop cloning which can result in O(N^2) node construction
! if (body_size > 255) {
! return false; // Prevent overflow for large body size
! }
! uint estimate = body_size * body_size;
if (phase->exceeding_node_budget(estimate)) {
! return false; // Too large to safely clone
}
! // check for vectorized loops, any peeling done was already applied
if (_head->is_CountedLoop()) {
CountedLoopNode* cl = _head->as_CountedLoop();
if (cl->is_unroll_only() || cl->trip_count() == 1) {
! return false;
}
}
! Node* test = loop->tail();
while (test != _head) { // Scan till run off top of loop
if (test->is_If()) { // Test?
Node *ctrl = phase->get_ctrl(test->in(1));
if (ctrl->is_top()) {
! return false; // Found dead test on live IF? No peeling!
}
! // Standard IF only has one input value to check for loop invariance
assert(test->Opcode() == Op_If ||
test->Opcode() == Op_CountedLoopEnd ||
test->Opcode() == Op_RangeCheck,
"Check this code when new subtype is added");
// Condition is not a member of this loop?
if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) {
! // Found reason to peel!
! return phase->may_require_nodes(estimate);
}
}
! // Walk up dominators to loop _head looking for test which is
! // executed on every path thru loop.
test = phase->idom(test);
}
! return false;
}
//------------------------------peeled_dom_test_elim---------------------------
// If we got the effect of peeling, either by actually peeling or by making
// a pre-loop which must execute at least once, we can remove all
--- 338,413 ----
Node *n = _body.at(i);
for (int j = 0; j < 5; j++) {
Node* nn = reassociate_add_sub(n, phase);
if (nn == NULL) break;
n = nn; // again
! }
}
}
//------------------------------policy_peeling---------------------------------
! // Return TRUE if the loop should be peeled, otherwise return FALSE. Peeling
! // is applicable if we can make a loop-invariant test (usually a null-check)
! // execute before we enter the loop. When TRUE, the estimated node budget is
! // also requested.
! bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) {
! uint estimate = estimate_peeling(phase);
!
! return estimate == 0 ? false : phase->may_require_nodes(estimate);
! }
!
! // Perform actual policy and size estimate for the loop peeling transform, and
! // return the estimated loop size if peeling is applicable, otherwise return
! // zero. No node budget is allocated.
! uint IdealLoopTree::estimate_peeling(PhaseIdealLoop *phase) {
// If nodes are depleted, some transform has miscalculated its needs.
assert(!phase->exceeding_node_budget(), "sanity");
! // Peeling does loop cloning which can result in O(N^2) node construction.
! if (_body.size() > 255) {
! return 0; // Suppress too large body size.
! }
! // Optimistic estimate that approximates loop body complexity via data and
! // control flow fan-out (instead of using the more pessimistic: BodySize^2).
! uint estimate = est_loop_clone_sz(2);
!
if (phase->exceeding_node_budget(estimate)) {
! return 0; // Too large to safely clone.
}
! // Check for vectorized loops, any peeling done was already applied.
if (_head->is_CountedLoop()) {
CountedLoopNode* cl = _head->as_CountedLoop();
if (cl->is_unroll_only() || cl->trip_count() == 1) {
! return 0;
}
}
! Node* test = tail();
while (test != _head) { // Scan till run off top of loop
if (test->is_If()) { // Test?
Node *ctrl = phase->get_ctrl(test->in(1));
if (ctrl->is_top()) {
! return 0; // Found dead test on live IF? No peeling!
}
! // Standard IF only has one input value to check for loop invariance.
assert(test->Opcode() == Op_If ||
test->Opcode() == Op_CountedLoopEnd ||
test->Opcode() == Op_RangeCheck,
"Check this code when new subtype is added");
// Condition is not a member of this loop?
if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) {
! return estimate; // Found reason to peel!
}
}
! // Walk up dominators to loop _head looking for test which is executed on
! // every path through the loop.
test = phase->idom(test);
}
! return 0;
}
//------------------------------peeled_dom_test_elim---------------------------
// If we got the effect of peeling, either by actually peeling or by making
// a pre-loop which must execute at least once, we can remove all
*** 636,647 ****
_igvn.hash_delete(use);
use->set_req(LoopNode::LoopBackControl, C->top());
}
}
-
// Step 4: Correct dom-depth info. Set to loop-head depth.
int dd = dom_depth(head);
set_idom(head, head->in(1), dd);
for (uint j3 = 0; j3 < loop->_body.size(); j3++) {
Node *old = loop->_body.at(j3);
Node *nnn = old_new[old->_idx];
--- 650,661 ----
_igvn.hash_delete(use);
use->set_req(LoopNode::LoopBackControl, C->top());
}
}
// Step 4: Correct dom-depth info. Set to loop-head depth.
+
int dd = dom_depth(head);
set_idom(head, head->in(1), dd);
for (uint j3 = 0; j3 < loop->_body.size(); j3++) {
Node *old = loop->_body.at(j3);
Node *nnn = old_new[old->_idx];
*** 655,669 ****
peeled_dom_test_elim(loop,old_new);
loop->record_for_igvn();
}
! #define EMPTY_LOOP_SIZE 7 // number of nodes in an empty loop
//------------------------------policy_maximally_unroll------------------------
! // Calculate exact loop trip count and return true if loop can be maximally
! // unrolled.
bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop *phase) const {
CountedLoopNode *cl = _head->as_CountedLoop();
assert(cl->is_normal_loop(), "");
if (!cl->is_valid_counted_loop()) {
return false; // Malformed counted loop
--- 669,702 ----
peeled_dom_test_elim(loop,old_new);
loop->record_for_igvn();
}
! // The Estimated Loop Unroll Size: UnrollFactor * (106% * BodySize + BC) + CC,
! // where BC and CC are (totally) ad-hoc/magic "body" and "clone" constants,
! // respectively, used to ensure that node usage estimates made are on the safe
! // side, for the most part. This is a simplified version of the loop clone
! // size calculation in est_loop_clone_sz(), defined for unroll factors larger
! // than one (>1), performing an overflow check and returning 'UINT_MAX' in
! // case of an overflow.
! static uint est_loop_unroll_sz(uint factor, uint size) {
! precond(0 < factor);
!
! uint const bc = 5;
! uint const cc = 7;
! uint const sz = size + (size + 15) / 16;
! uint estimate = factor * (sz + bc) + cc;
!
! return (estimate - cc) / factor == sz + bc ? estimate : UINT_MAX;
! }
!
! #define EMPTY_LOOP_SIZE 7 // Number of nodes in an empty loop.
//------------------------------policy_maximally_unroll------------------------
! // Calculate the exact loop trip-count and return TRUE if loop can be fully,
! // i.e. maximally, unrolled, otherwise return FALSE. When TRUE, the estimated
! // node budget is also requested.
bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop *phase) const {
CountedLoopNode *cl = _head->as_CountedLoop();
assert(cl->is_normal_loop(), "");
if (!cl->is_valid_counted_loop()) {
return false; // Malformed counted loop
*** 691,701 ****
return false;
}
// Take into account that after unroll conjoined heads and tails will fold,
// otherwise policy_unroll() may allow more unrolling than max unrolling.
! uint new_body_size = est_loop_clone_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
return false;
}
--- 724,734 ----
return false;
}
// Take into account that after unroll conjoined heads and tails will fold,
// otherwise policy_unroll() may allow more unrolling than max unrolling.
! uint new_body_size = est_loop_unroll_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
return false;
}
*** 740,751 ****
return phase->may_require_nodes(new_body_size);
}
//------------------------------policy_unroll----------------------------------
! // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if the
! // loop is a CountedLoop and the body is small enough.
bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
CountedLoopNode *cl = _head->as_CountedLoop();
assert(cl->is_normal_loop() || cl->is_main_loop(), "");
--- 773,785 ----
return phase->may_require_nodes(new_body_size);
}
//------------------------------policy_unroll----------------------------------
! // Return TRUE or FALSE if the loop should be unrolled or not. Apply unroll if
! // the loop is a counted loop and the loop body is small enough. When TRUE,
! // the estimated node budget is also requested.
bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
CountedLoopNode *cl = _head->as_CountedLoop();
assert(cl->is_normal_loop() || cl->is_main_loop(), "");
*** 885,895 ****
int slp_max_unroll_factor = cl->slp_max_unroll();
if ((LoopMaxUnroll < slp_max_unroll_factor) && FLAG_IS_DEFAULT(LoopMaxUnroll) && UseSubwordForMaxVector) {
LoopMaxUnroll = slp_max_unroll_factor;
}
! uint estimate = est_loop_clone_sz(2, body_size);
if (cl->has_passed_slp()) {
if (slp_max_unroll_factor >= future_unroll_cnt) {
return phase->may_require_nodes(estimate);
}
--- 919,929 ----
int slp_max_unroll_factor = cl->slp_max_unroll();
if ((LoopMaxUnroll < slp_max_unroll_factor) && FLAG_IS_DEFAULT(LoopMaxUnroll) && UseSubwordForMaxVector) {
LoopMaxUnroll = slp_max_unroll_factor;
}
! uint estimate = est_loop_clone_sz(2);
if (cl->has_passed_slp()) {
if (slp_max_unroll_factor >= future_unroll_cnt) {
return phase->may_require_nodes(estimate);
}
*** 956,977 ****
bool IdealLoopTree::policy_align(PhaseIdealLoop *phase) const {
return false;
}
//------------------------------policy_range_check-----------------------------
! // Return TRUE or FALSE if the loop should be range-check-eliminated.
! // Actually we do iteration-splitting, a more powerful form of RCE.
bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const {
if (!RangeCheckElimination) return false;
// If nodes are depleted, some transform has miscalculated its needs.
assert(!phase->exceeding_node_budget(), "sanity");
CountedLoopNode *cl = _head->as_CountedLoop();
! // If we unrolled with no intention of doing RCE and we later
! // changed our minds, we got no pre-loop. Either we need to
! // make a new pre-loop, or we gotta disallow RCE.
if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
Node *trip_counter = cl->phi();
// check for vectorized loops, some opts are no longer needed
if (cl->is_unroll_only()) return false;
--- 990,1013 ----
bool IdealLoopTree::policy_align(PhaseIdealLoop *phase) const {
return false;
}
//------------------------------policy_range_check-----------------------------
! // Return TRUE or FALSE if the loop should be range-check-eliminated or not.
! // When TRUE, the estimated node budget is also requested.
! //
! // We will actually perform iteration-splitting, a more powerful form of RCE.
bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const {
if (!RangeCheckElimination) return false;
// If nodes are depleted, some transform has miscalculated its needs.
assert(!phase->exceeding_node_budget(), "sanity");
CountedLoopNode *cl = _head->as_CountedLoop();
! // If we unrolled with no intention of doing RCE and we later changed our
! // minds, we got no pre-loop. Either we need to make a new pre-loop, or we
! // have to disallow RCE.
if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
Node *trip_counter = cl->phi();
// check for vectorized loops, some opts are no longer needed
if (cl->is_unroll_only()) return false;
*** 1014,1030 ****
}
if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
continue;
}
! // Found a test like 'trip+off vs limit'. Test is an IfNode, has two
! // (2) projections. If BOTH are in the loop we need loop unswitching
! // instead of iteration splitting.
if (is_loop_exit(iff)) {
// Found valid reason to split iterations (if there is room).
// NOTE: Usually a gross overestimate.
! return phase->may_require_nodes(est_loop_clone_sz(2, _body.size()));
}
} // End of is IF
}
return false;
--- 1050,1066 ----
}
if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
continue;
}
! // Found a test like 'trip+off vs limit'. Test is an IfNode, has two (2)
! // projections. If BOTH are in the loop we need loop unswitching instead
! // of iteration splitting.
if (is_loop_exit(iff)) {
// Found valid reason to split iterations (if there is room).
// NOTE: Usually a gross overestimate.
! return phase->may_require_nodes(est_loop_clone_sz(2));
}
} // End of is IF
}
return false;
*** 1519,1531 ****
CountedLoopNode *cl = loop->_head->as_CountedLoop();
// only process vectorized main loops
if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
- if (!may_require_nodes(est_loop_clone_sz(2, loop->_body.size()))) {
- return;
- }
int slp_max_unroll_factor = cl->slp_max_unroll();
int cur_unroll = cl->unrolled_count();
if (slp_max_unroll_factor == 0) return;
--- 1555,1564 ----
*** 1533,1542 ****
--- 1566,1579 ----
if (cur_unroll != slp_max_unroll_factor) return;
// we only ever process this one time
if (cl->has_atomic_post_loop()) return;
+ if (!may_require_nodes(loop->est_loop_clone_sz(2))) {
+ return;
+ }
+
#ifndef PRODUCT
if (TraceLoopOpts) {
tty->print("PostVector ");
loop->dump_head();
}
*** 3176,3199 ****
return true; // Here we removed an empty loop
}
AutoNodeBudget node_budget(phase);
- bool should_peel = policy_peeling(phase);
- bool should_unswitch = policy_unswitching(phase);
-
// Non-counted loops may be peeled; exactly 1 iteration is peeled.
// This removes loop-invariant tests (usually null checks).
if (!_head->is_CountedLoop()) { // Non-counted loop
if (PartialPeelLoop && phase->partial_peel(this, old_new)) {
// Partial peel succeeded so terminate this round of loop opts
return false;
}
! if (should_peel) { // Should we peel?
if (PrintOpto) { tty->print_cr("should_peel"); }
! phase->do_peeling(this,old_new);
! } else if (should_unswitch) {
phase->do_unswitching(this, old_new);
}
return true;
}
CountedLoopNode *cl = _head->as_CountedLoop();
--- 3213,3233 ----
return true; // Here we removed an empty loop
}
AutoNodeBudget node_budget(phase);
// Non-counted loops may be peeled; exactly 1 iteration is peeled.
// This removes loop-invariant tests (usually null checks).
if (!_head->is_CountedLoop()) { // Non-counted loop
if (PartialPeelLoop && phase->partial_peel(this, old_new)) {
// Partial peel succeeded so terminate this round of loop opts
return false;
}
! if (policy_peeling(phase)) { // Should we peel?
if (PrintOpto) { tty->print_cr("should_peel"); }
! phase->do_peeling(this, old_new);
! } else if (policy_unswitching(phase)) {
phase->do_unswitching(this, old_new);
}
return true;
}
CountedLoopNode *cl = _head->as_CountedLoop();
*** 3207,3229 ****
compute_profile_trip_cnt(phase);
// Before attempting fancy unrolling, RCE or alignment, see if we want
// to completely unroll this loop or do loop unswitching.
if (cl->is_normal_loop()) {
! if (should_unswitch) {
phase->do_unswitching(this, old_new);
return true;
}
! bool should_maximally_unroll = policy_maximally_unroll(phase);
! if (should_maximally_unroll) {
// Here we did some unrolling and peeling. Eventually we will
// completely unroll this loop and it will no longer be a loop.
phase->do_maximally_unroll(this, old_new);
return true;
}
}
// Counted loops may be peeled, may need some iterations run up
// front for RCE, and may want to align loop refs to a cache
// line. Thus we clone a full loop up front whose trip count is
// at least 1 (if peeling), but may be several more.
--- 3241,3265 ----
compute_profile_trip_cnt(phase);
// Before attempting fancy unrolling, RCE or alignment, see if we want
// to completely unroll this loop or do loop unswitching.
if (cl->is_normal_loop()) {
! if (policy_unswitching(phase)) {
phase->do_unswitching(this, old_new);
return true;
}
! if (policy_maximally_unroll(phase)) {
// Here we did some unrolling and peeling. Eventually we will
// completely unroll this loop and it will no longer be a loop.
phase->do_maximally_unroll(this, old_new);
return true;
}
}
+ uint est_peeling = estimate_peeling(phase);
+ bool should_peel = 0 < est_peeling;
+
// Counted loops may be peeled, may need some iterations run up
// front for RCE, and may want to align loop refs to a cache
// line. Thus we clone a full loop up front whose trip count is
// at least 1 (if peeling), but may be several more.
*** 3250,3267 ****
// If we have any of these conditions (RCE, alignment, unrolling) met, then
// we switch to the pre-/main-/post-loop model. This model also covers
// peeling.
if (should_rce || should_align || should_unroll) {
if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops
! if (!phase->may_require_nodes(est_loop_clone_sz(3, _body.size()))) {
return false;
}
! phase->insert_pre_post_loops(this,old_new, !may_rce_align);
}
// Adjust the pre- and main-loop limits to let the pre and post loops run
! // with full checks, but the main-loop with no checks. Remove said
! // checks from the main body.
if (should_rce) {
if (phase->do_range_check(this, old_new) != 0) {
cl->mark_has_range_checks();
}
} else if (PostLoopMultiversioning) {
--- 3286,3304 ----
// If we have any of these conditions (RCE, alignment, unrolling) met, then
// we switch to the pre-/main-/post-loop model. This model also covers
// peeling.
if (should_rce || should_align || should_unroll) {
if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops
! uint estimate = est_loop_clone_sz(3);
! if (!phase->may_require_nodes(estimate)) {
return false;
}
! phase->insert_pre_post_loops(this, old_new, !may_rce_align);
}
// Adjust the pre- and main-loop limits to let the pre and post loops run
! // with full checks, but the main-loop with no checks. Remove said checks
! // from the main body.
if (should_rce) {
if (phase->do_range_check(this, old_new) != 0) {
cl->mark_has_range_checks();
}
} else if (PostLoopMultiversioning) {
*** 3291,3301 ****
if (should_align) {
Unimplemented();
}
} else { // Else we have an unchanged counted loop
if (should_peel) { // Might want to peel but do nothing else
! phase->do_peeling(this,old_new);
}
}
return true;
}
--- 3328,3340 ----
if (should_align) {
Unimplemented();
}
} else { // Else we have an unchanged counted loop
if (should_peel) { // Might want to peel but do nothing else
! if (phase->may_require_nodes(est_peeling)) {
! phase->do_peeling(this, old_new);
! }
}
}
return true;
}
< prev index next >