< prev index next >
src/share/vm/opto/loopTransform.cpp
Print this page
@@ -981,11 +981,11 @@
//------------------------------insert_pre_post_loops--------------------------
// Insert pre and post loops. If peel_only is set, the pre-loop can not have
// more iterations added. It acts as a 'peel' only, no lower-bound RCE, no
// alignment. Useful to unroll loops that do no array accesses.
-void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ) {
+void PhaseIdealLoop::insert_pre_post_loops(IdealLoopTree *loop, Node_List &old_new, bool peel_only) {
#ifndef PRODUCT
if (TraceLoopOpts) {
if (peel_only)
tty->print("PeelMainPost ");
@@ -996,14 +996,14 @@
#endif
C->set_major_progress();
// Find common pieces of the loop being guarded with pre & post loops
CountedLoopNode *main_head = loop->_head->as_CountedLoop();
- assert( main_head->is_normal_loop(), "" );
+ assert(main_head->is_normal_loop(), "");
CountedLoopEndNode *main_end = main_head->loopexit();
guarantee(main_end != NULL, "no loop exit node");
- assert( main_end->outcnt() == 2, "1 true, 1 false path only" );
+ assert(main_end->outcnt() == 2, "1 true, 1 false path only");
uint dd_main_head = dom_depth(main_head);
uint max = main_head->outcnt();
Node *pre_header= main_head->in(LoopNode::EntryControl);
Node *init = main_head->init_trip();
@@ -1013,167 +1013,99 @@
Node *cmp = main_end ->cmp_node();
BoolTest::mask b_test = main_end->test_trip();
// Need only 1 user of 'bol' because I will be hacking the loop bounds.
Node *bol = main_end->in(CountedLoopEndNode::TestValue);
- if( bol->outcnt() != 1 ) {
+ if (bol->outcnt() != 1) {
bol = bol->clone();
register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl));
_igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, bol);
}
// Need only 1 user of 'cmp' because I will be hacking the loop bounds.
- if( cmp->outcnt() != 1 ) {
+ if (cmp->outcnt() != 1) {
cmp = cmp->clone();
register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl));
_igvn.replace_input_of(bol, 1, cmp);
}
- //------------------------------
- // Step A: Create Post-Loop.
- Node* main_exit = main_end->proj_out(false);
- assert( main_exit->Opcode() == Op_IfFalse, "" );
- int dd_main_exit = dom_depth(main_exit);
-
- // Step A1: Clone the loop body. The clone becomes the post-loop. The main
- // loop pre-header illegally has 2 control users (old & new loops).
- clone_loop( loop, old_new, dd_main_exit );
- assert( old_new[main_end ->_idx]->Opcode() == Op_CountedLoopEnd, "" );
- CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
- post_head->set_post_loop(main_head);
-
- // Reduce the post-loop trip count.
- CountedLoopEndNode* post_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
- post_end->_prob = PROB_FAIR;
-
- // Build the main-loop normal exit.
- IfFalseNode *new_main_exit = new IfFalseNode(main_end);
- _igvn.register_new_node_with_optimizer( new_main_exit );
- set_idom(new_main_exit, main_end, dd_main_exit );
- set_loop(new_main_exit, loop->_parent);
-
- // Step A2: Build a zero-trip guard for the post-loop. After leaving the
- // main-loop, the post-loop may not execute at all. We 'opaque' the incr
- // (the main-loop trip-counter exit value) because we will be changing
- // the exit value (via unrolling) so we cannot constant-fold away the zero
- // trip guard until all unrolling is done.
- Node *zer_opaq = new Opaque1Node(C, incr);
- Node *zer_cmp = new CmpINode( zer_opaq, limit );
- Node *zer_bol = new BoolNode( zer_cmp, b_test );
- register_new_node( zer_opaq, new_main_exit );
- register_new_node( zer_cmp , new_main_exit );
- register_new_node( zer_bol , new_main_exit );
-
- // Build the IfNode
- IfNode *zer_iff = new IfNode( new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN );
- _igvn.register_new_node_with_optimizer( zer_iff );
- set_idom(zer_iff, new_main_exit, dd_main_exit);
- set_loop(zer_iff, loop->_parent);
-
- // Plug in the false-path, taken if we need to skip post-loop
- _igvn.replace_input_of(main_exit, 0, zer_iff);
- set_idom(main_exit, zer_iff, dd_main_exit);
- set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
- // Make the true-path, must enter the post loop
- Node *zer_taken = new IfTrueNode( zer_iff );
- _igvn.register_new_node_with_optimizer( zer_taken );
- set_idom(zer_taken, zer_iff, dd_main_exit);
- set_loop(zer_taken, loop->_parent);
- // Plug in the true path
- _igvn.hash_delete( post_head );
- post_head->set_req(LoopNode::EntryControl, zer_taken);
- set_idom(post_head, zer_taken, dd_main_exit);
-
- Arena *a = Thread::current()->resource_area();
- VectorSet visited(a);
- Node_Stack clones(a, main_head->back_control()->outcnt());
- // Step A3: Make the fall-in values to the post-loop come from the
- // fall-out values of the main-loop.
- for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
- Node* main_phi = main_head->fast_out(i);
- if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0 ) {
- Node *post_phi = old_new[main_phi->_idx];
- Node *fallmain = clone_up_backedge_goo(main_head->back_control(),
- post_head->init_control(),
- main_phi->in(LoopNode::LoopBackControl),
- visited, clones);
- _igvn.hash_delete(post_phi);
- post_phi->set_req( LoopNode::EntryControl, fallmain );
- }
- }
+ // Add the post loop
+ PostLoopInfo post_loop_info;
+ insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_loop_info);
// Update local caches for next stanza
- main_exit = new_main_exit;
+ Node *main_exit = post_loop_info.new_main_exit;
//------------------------------
// Step B: Create Pre-Loop.
// Step B1: Clone the loop body. The clone becomes the pre-loop. The main
// loop pre-header illegally has 2 control users (old & new loops).
- clone_loop( loop, old_new, dd_main_head );
+ clone_loop(loop, old_new, dd_main_head);
CountedLoopNode* pre_head = old_new[main_head->_idx]->as_CountedLoop();
CountedLoopEndNode* pre_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
pre_head->set_pre_loop(main_head);
Node *pre_incr = old_new[incr->_idx];
// Reduce the pre-loop trip count.
pre_end->_prob = PROB_FAIR;
// Find the pre-loop normal exit.
Node* pre_exit = pre_end->proj_out(false);
- assert( pre_exit->Opcode() == Op_IfFalse, "" );
+ assert(pre_exit->Opcode() == Op_IfFalse, "");
IfFalseNode *new_pre_exit = new IfFalseNode(pre_end);
- _igvn.register_new_node_with_optimizer( new_pre_exit );
+ _igvn.register_new_node_with_optimizer(new_pre_exit);
set_idom(new_pre_exit, pre_end, dd_main_head);
set_loop(new_pre_exit, loop->_parent);
// Step B2: Build a zero-trip guard for the main-loop. After leaving the
// pre-loop, the main-loop may not execute at all. Later in life this
// zero-trip guard will become the minimum-trip guard when we unroll
// the main-loop.
Node *min_opaq = new Opaque1Node(C, limit);
- Node *min_cmp = new CmpINode( pre_incr, min_opaq );
- Node *min_bol = new BoolNode( min_cmp, b_test );
- register_new_node( min_opaq, new_pre_exit );
- register_new_node( min_cmp , new_pre_exit );
- register_new_node( min_bol , new_pre_exit );
+ Node *min_cmp = new CmpINode(pre_incr, min_opaq);
+ Node *min_bol = new BoolNode(min_cmp, b_test);
+ register_new_node(min_opaq, new_pre_exit);
+ register_new_node(min_cmp , new_pre_exit);
+ register_new_node(min_bol , new_pre_exit);
// Build the IfNode (assume the main-loop is executed always).
- IfNode *min_iff = new IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
- _igvn.register_new_node_with_optimizer( min_iff );
+ IfNode *min_iff = new IfNode(new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN);
+ _igvn.register_new_node_with_optimizer(min_iff);
set_idom(min_iff, new_pre_exit, dd_main_head);
set_loop(min_iff, loop->_parent);
// Plug in the false-path, taken if we need to skip main-loop
- _igvn.hash_delete( pre_exit );
+ _igvn.hash_delete(pre_exit);
pre_exit->set_req(0, min_iff);
set_idom(pre_exit, min_iff, dd_main_head);
set_idom(pre_exit->unique_out(), min_iff, dd_main_head);
// Make the true-path, must enter the main loop
- Node *min_taken = new IfTrueNode( min_iff );
- _igvn.register_new_node_with_optimizer( min_taken );
+ Node *min_taken = new IfTrueNode(min_iff);
+ _igvn.register_new_node_with_optimizer(min_taken);
set_idom(min_taken, min_iff, dd_main_head);
set_loop(min_taken, loop->_parent);
// Plug in the true path
- _igvn.hash_delete( main_head );
+ _igvn.hash_delete(main_head);
main_head->set_req(LoopNode::EntryControl, min_taken);
set_idom(main_head, min_taken, dd_main_head);
- visited.Clear();
- clones.clear();
+ Arena *a = Thread::current()->resource_area();
+ VectorSet visited(a);
+ Node_Stack clones(a, main_head->back_control()->outcnt());
// Step B3: Make the fall-in values to the main-loop come from the
// fall-out values of the pre-loop.
for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) {
Node* main_phi = main_head->fast_out(i2);
- if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) {
+ if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0) {
Node *pre_phi = old_new[main_phi->_idx];
Node *fallpre = clone_up_backedge_goo(pre_head->back_control(),
main_head->init_control(),
pre_phi->in(LoopNode::LoopBackControl),
visited, clones);
_igvn.hash_delete(main_phi);
- main_phi->set_req( LoopNode::EntryControl, fallpre );
+ main_phi->set_req(LoopNode::EntryControl, fallpre);
}
}
// Nodes inside the loop may be control dependent on a predicate
// that was moved before the preloop. If the back branch of the main
@@ -1183,33 +1115,29 @@
// test that was guarding the loop nest. We add a special CastII on
// the if branch that enters the loop, between the input induction
// variable value and the induction variable Phi to preserve correct
// dependencies.
- // CastII for the post loop:
- bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
- assert(inserted, "no castII inserted");
-
// CastII for the main loop:
- inserted = cast_incr_before_loop(pre_incr, min_taken, main_head);
+ bool inserted = cast_incr_before_loop(pre_incr, min_taken, main_head);
assert(inserted, "no castII inserted");
// Step B4: Shorten the pre-loop to run only 1 iteration (for now).
// RCE and alignment may change this later.
Node *cmp_end = pre_end->cmp_node();
- assert( cmp_end->in(2) == limit, "" );
- Node *pre_limit = new AddINode( init, stride );
+ assert(cmp_end->in(2) == limit, "");
+ Node *pre_limit = new AddINode(init, stride);
// Save the original loop limit in this Opaque1 node for
// use by range check elimination.
Node *pre_opaq = new Opaque1Node(C, pre_limit, limit);
- register_new_node( pre_limit, pre_head->in(0) );
- register_new_node( pre_opaq , pre_head->in(0) );
+ register_new_node(pre_limit, pre_head->in(0));
+ register_new_node(pre_opaq , pre_head->in(0));
// Since no other users of pre-loop compare, I can hack limit directly
- assert( cmp_end->outcnt() == 1, "no other users" );
+ assert(cmp_end->outcnt() == 1, "no other users");
_igvn.hash_delete(cmp_end);
cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq);
// Special case for not-equal loop bounds:
// Change pre loop test, main loop test, and the
@@ -1225,36 +1153,36 @@
BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt;
// Modify pre loop end condition
Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool();
BoolNode* new_bol0 = new BoolNode(pre_bol->in(1), new_test);
- register_new_node( new_bol0, pre_head->in(0) );
+ register_new_node(new_bol0, pre_head->in(0));
_igvn.replace_input_of(pre_end, CountedLoopEndNode::TestValue, new_bol0);
// Modify main loop guard condition
assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay");
BoolNode* new_bol1 = new BoolNode(min_bol->in(1), new_test);
- register_new_node( new_bol1, new_pre_exit );
+ register_new_node(new_bol1, new_pre_exit);
_igvn.hash_delete(min_iff);
min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1);
// Modify main loop end condition
BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool();
BoolNode* new_bol2 = new BoolNode(main_bol->in(1), new_test);
- register_new_node( new_bol2, main_end->in(CountedLoopEndNode::TestControl) );
+ register_new_node(new_bol2, main_end->in(CountedLoopEndNode::TestControl));
_igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, new_bol2);
}
// Flag main loop
main_head->set_main_loop();
- if( peel_only ) main_head->set_main_no_pre_loop();
+ if (peel_only) main_head->set_main_no_pre_loop();
// Subtract a trip count for the pre-loop.
main_head->set_trip_count(main_head->trip_count() - 1);
// It's difficult to be precise about the trip-counts
// for the pre/post loops. They are usually very short,
// so guess that 4 trips is a reasonable value.
- post_head->set_profile_trip_cnt(4.0);
+ post_loop_info.post_head->set_profile_trip_cnt(4.0);
pre_head->set_profile_trip_cnt(4.0);
// Now force out all loop-invariant dominating tests. The optimizer
// finds some, but we _know_ they are all useless.
peeled_dom_test_elim(loop,old_new);
@@ -1296,23 +1224,85 @@
CountedLoopNode *main_head = loop->_head->as_CountedLoop();
CountedLoopEndNode *main_end = main_head->loopexit();
guarantee(main_end != NULL, "no loop exit node");
// diagnostic to show loop end is not properly formed
assert(main_end->outcnt() == 2, "1 true, 1 false path only");
- uint dd_main_head = dom_depth(main_head);
- uint max = main_head->outcnt();
// mark this loop as processed
main_head->mark_has_atomic_post_loop();
- Node *pre_header = main_head->in(LoopNode::EntryControl);
- Node *init = main_head->init_trip();
Node *incr = main_end->incr();
Node *limit = main_end->limit();
- Node *stride = main_end->stride();
- Node *cmp = main_end->cmp_node();
- BoolTest::mask b_test = main_end->test_trip();
+
+ // In this case we throw away the result as we are not using it to connect anything else.
+ PostLoopInfo post_loop_info;
+ insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_loop_info);
+
+ // It's difficult to be precise about the trip-counts
+ // for post loops. They are usually very short,
+ // so guess that unit vector trips is a reasonable value.
+ post_loop_info.post_head->set_profile_trip_cnt(cur_unroll);
+
+ // Now force out all loop-invariant dominating tests. The optimizer
+ // finds some, but we _know_ they are all useless.
+ peeled_dom_test_elim(loop, old_new);
+ loop->record_for_igvn();
+}
+
+
+//-------------------------insert_scalar_rced_post_loop------------------------
+// Insert a copy of the rce'd main loop as a post loop,
+// We have not unrolled the main loop, so this is the right time to inject this.
+// Later we will examine the partner of this post loop pair which still has range checks
+// to see inject code which tests at runtime if the range checks are applicable.
+void PhaseIdealLoop::insert_scalar_rced_post_loop(IdealLoopTree *loop, Node_List &old_new) {
+ if (!loop->_head->is_CountedLoop()) return;
+
+ CountedLoopNode *cl = loop->_head->as_CountedLoop();
+
+ // only process RCE'd main loops
+ if (cl->loop_has_range_checks() || !cl->is_main_loop()) return;
+
+#ifndef PRODUCT
+ if (TraceLoopOpts) {
+ tty->print("PostScalarRce ");
+ loop->dump_head();
+ }
+#endif
+ C->set_major_progress();
+
+ // Find common pieces of the loop being guarded with pre & post loops
+ CountedLoopNode *main_head = loop->_head->as_CountedLoop();
+ CountedLoopEndNode *main_end = main_head->loopexit();
+ guarantee(main_end != NULL, "no loop exit node");
+ // diagnostic to show loop end is not properly formed
+ assert(main_end->outcnt() == 2, "1 true, 1 false path only");
+
+ Node *incr = main_end->incr();
+ Node *limit = main_end->limit();
+
+ // In this case we throw away the result as we are not using it to connect anything else.
+ PostLoopInfo post_loop_info;
+ insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_loop_info);
+
+ // It's difficult to be precise about the trip-counts
+ // for post loops. They are usually very short,
+ // so guess that unit vector trips is a reasonable value.
+ post_loop_info.post_head->set_profile_trip_cnt(4.0);
+
+ // Now force out all loop-invariant dominating tests. The optimizer
+ // finds some, but we _know_ they are all useless.
+ peeled_dom_test_elim(loop, old_new);
+ loop->record_for_igvn();
+}
+
+
+//------------------------------insert_post_loop-------------------------------
+// Insert post loops. Add a post loop to the given loop passed.
+void PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
+ CountedLoopNode *main_head, CountedLoopEndNode *main_end,
+ Node *incr, Node *limit, PostLoopInfo &post_loop_info) {
//------------------------------
// Step A: Create a new post-Loop.
Node* main_exit = main_end->proj_out(false);
assert(main_exit->Opcode() == Op_IfFalse, "");
@@ -1323,10 +1313,11 @@
clone_loop(loop, old_new, dd_main_exit);
assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
post_head->set_normal_loop();
post_head->set_post_loop(main_head);
+ post_loop_info.post_head = post_head;
// Reduce the post-loop trip count.
CountedLoopEndNode* post_end = old_new[main_end->_idx]->as_CountedLoopEnd();
post_end->_prob = PROB_FAIR;
@@ -1334,33 +1325,33 @@
IfFalseNode *new_main_exit = new IfFalseNode(main_end);
_igvn.register_new_node_with_optimizer(new_main_exit);
set_idom(new_main_exit, main_end, dd_main_exit);
set_loop(new_main_exit, loop->_parent);
- // Step A2: Build a zero-trip guard for the vector post-loop. After leaving the
- // main-loop, the vector post-loop may not execute at all. We 'opaque' the incr
- // (the vectorized main-loop trip-counter exit value) because we will be changing
+ // Step A2: Build a zero-trip guard for the post-loop. After leaving the
+ // main-loop, the post-loop may not execute at all. We 'opaque' the incr
+ // (the previous loop trip-counter exit value) because we will be changing
// the exit value (via additional unrolling) so we cannot constant-fold away the zero
// trip guard until all unrolling is done.
Node *zer_opaq = new Opaque1Node(C, incr);
Node *zer_cmp = new CmpINode(zer_opaq, limit);
- Node *zer_bol = new BoolNode(zer_cmp, b_test);
+ Node *zer_bol = new BoolNode(zer_cmp, main_end->test_trip());
register_new_node(zer_opaq, new_main_exit);
register_new_node(zer_cmp, new_main_exit);
register_new_node(zer_bol, new_main_exit);
// Build the IfNode
IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN);
_igvn.register_new_node_with_optimizer(zer_iff);
set_idom(zer_iff, new_main_exit, dd_main_exit);
set_loop(zer_iff, loop->_parent);
- // Plug in the false-path, taken if we need to skip vector post-loop
+ // Plug in the false-path, taken if we need to skip this post-loop
_igvn.replace_input_of(main_exit, 0, zer_iff);
set_idom(main_exit, zer_iff, dd_main_exit);
set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
- // Make the true-path, must enter the vector post loop
+ // Make the true-path, must enter this post loop
Node *zer_taken = new IfTrueNode(zer_iff);
_igvn.register_new_node_with_optimizer(zer_taken);
set_idom(zer_taken, zer_iff, dd_main_exit);
set_loop(zer_taken, loop->_parent);
// Plug in the true path
@@ -1369,11 +1360,11 @@
set_idom(post_head, zer_taken, dd_main_exit);
Arena *a = Thread::current()->resource_area();
VectorSet visited(a);
Node_Stack clones(a, main_head->back_control()->outcnt());
- // Step A3: Make the fall-in values to the vector post-loop come from the
+ // Step A3: Make the fall-in values to the post-loop come from the
// fall-out values of the main-loop.
for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
Node* main_phi = main_head->fast_out(i);
if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0) {
Node *cur_phi = old_new[main_phi->_idx];
@@ -1388,21 +1379,14 @@
// CastII for the new post loop:
bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
assert(inserted, "no castII inserted");
- // It's difficult to be precise about the trip-counts
- // for post loops. They are usually very short,
- // so guess that unit vector trips is a reasonable value.
- post_head->set_profile_trip_cnt((float)slp_max_unroll_factor);
-
- // Now force out all loop-invariant dominating tests. The optimizer
- // finds some, but we _know_ they are all useless.
- peeled_dom_test_elim(loop, old_new);
- loop->record_for_igvn();
+ post_loop_info.new_main_exit = new_main_exit;
}
+
//------------------------------is_invariant-----------------------------
// Return true if n is invariant
bool IdealLoopTree::is_invariant(Node* n) const {
Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n;
if (n_c->is_top()) return false;
@@ -2095,11 +2079,11 @@
return false;
}
//------------------------------do_range_check---------------------------------
// Eliminate range-checks and other trip-counter vs loop-invariant tests.
-void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
+void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &range_check_list) {
#ifndef PRODUCT
if (PrintOpto && VerifyLoopOptimizations) {
tty->print("Range Check Elimination ");
loop->dump_head();
} else if (TraceLoopOpts) {
@@ -2139,13 +2123,13 @@
// Find the pre-loop limit; we will expand its iterations to
// not ever trip low tests.
Node *p_f = iffm->in(0);
// pre loop may have been optimized out
- if (p_f->Opcode() != Op_IfFalse) {
+ if (p_f->Opcode() != Op_IfFalse)
return;
- }
+
CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
assert(pre_end->loopnode()->is_pre_loop(), "");
Node *pre_opaq1 = pre_end->limit();
// Occasionally it's possible for a pre-loop Opaque1 node to be
// optimized away and then another round of loop opts attempted.
@@ -2183,27 +2167,28 @@
// executed.
bool conditional_rc = false;
// Check loop body for tests of trip-counter plus loop-invariant vs
// loop-invariant.
- for( uint i = 0; i < loop->_body.size(); i++ ) {
+ for (uint i = 0; i < loop->_body.size(); i++) {
Node *iff = loop->_body[i];
if (iff->Opcode() == Op_If ||
iff->Opcode() == Op_RangeCheck) { // Test?
// Test is an IfNode, has 2 projections. If BOTH are in the loop
// we need loop unswitching instead of iteration splitting.
+ range_check_list.push(iff);
Node *exit = loop->is_loop_exit(iff);
- if( !exit ) continue;
+ if (!exit) continue;
int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0;
// Get boolean condition to test
Node *i1 = iff->in(1);
- if( !i1->is_Bool() ) continue;
+ if (!i1->is_Bool()) continue;
BoolNode *bol = i1->as_Bool();
BoolTest b_test = bol->_test;
// Flip sense of test if exit condition is flipped
- if( flip )
+ if (flip)
b_test = b_test.negate();
// Get compare
Node *cmp = bol->in(1);
@@ -2211,45 +2196,43 @@
Node *rc_exp = cmp->in(1);
Node *limit = cmp->in(2);
jint scale_con= 1; // Assume trip counter not scaled
Node *limit_c = get_ctrl(limit);
- if( loop->is_member(get_loop(limit_c) ) ) {
+ if (loop->is_member(get_loop(limit_c))) {
// Compare might have operands swapped; commute them
b_test = b_test.commute();
rc_exp = cmp->in(2);
limit = cmp->in(1);
limit_c = get_ctrl(limit);
- if( loop->is_member(get_loop(limit_c) ) )
+ if (loop->is_member(get_loop(limit_c)))
continue; // Both inputs are loop varying; cannot RCE
}
// Here we know 'limit' is loop invariant
// 'limit' maybe pinned below the zero trip test (probably from a
// previous round of rce), in which case, it can't be used in the
// zero trip test expression which must occur before the zero test's if.
- if( limit_c == ctrl ) {
+ if (limit_c == ctrl)
continue; // Don't rce this check but continue looking for other candidates.
- }
// Check for scaled induction variable plus an offset
Node *offset = NULL;
- if (!is_scaled_iv_plus_offset(rc_exp, trip_counter, &scale_con, &offset)) {
+ if (!is_scaled_iv_plus_offset(rc_exp, trip_counter, &scale_con, &offset))
continue;
- }
Node *offset_c = get_ctrl(offset);
- if( loop->is_member( get_loop(offset_c) ) )
+ if (loop->is_member( get_loop(offset_c)))
continue; // Offset is not really loop invariant
// Here we know 'offset' is loop invariant.
// As above for the 'limit', the 'offset' maybe pinned below the
// zero trip test.
- if( offset_c == ctrl ) {
+ if (offset_c == ctrl)
continue; // Don't rce this check but continue looking for other candidates.
- }
+
#ifdef ASSERT
if (TraceRangeLimitCheck) {
tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
bol->dump(2);
}
@@ -2260,12 +2243,12 @@
// monotonically increases by stride_con, a constant. Both (or either)
// stride_con and scale_con can be negative which will flip about the
// sense of the test.
// Adjust pre and main loop limits to guard the correct iteration set
- if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
- if( b_test._test == BoolTest::lt ) { // Range checks always use lt
+ if (cmp->Opcode() == Op_CmpU) {// Unsigned compare is really 2 tests
+ if (b_test._test == BoolTest::lt) { // Range checks always use lt
// The underflow and overflow limits: 0 <= scale*I+offset < limit
add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
if (!conditional_rc) {
// (0-offset)/scale could be outside of loop iterations range.
conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
@@ -2281,27 +2264,27 @@
case BoolTest::gt:
// Fall into GE case
case BoolTest::ge:
// Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
scale_con = -scale_con;
- offset = new SubINode( zero, offset );
- register_new_node( offset, pre_ctrl );
- limit = new SubINode( zero, limit );
- register_new_node( limit, pre_ctrl );
+ offset = new SubINode(zero, offset);
+ register_new_node(offset, pre_ctrl);
+ limit = new SubINode(zero, limit);
+ register_new_node(limit, pre_ctrl);
// Fall into LE case
case BoolTest::le:
if (b_test._test != BoolTest::gt) {
// Convert X <= Y to X < Y+1
- limit = new AddINode( limit, one );
- register_new_node( limit, pre_ctrl );
+ limit = new AddINode(limit, one);
+ register_new_node(limit, pre_ctrl);
}
// Fall into LT case
case BoolTest::lt:
// The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
// Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
// to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
- add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit );
+ add_constraint(stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit);
if (!conditional_rc) {
// ((MIN_INT+1)-offset)/scale could be outside of loop iterations range.
// Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could
// still be outside of loop range.
conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
@@ -2315,11 +2298,11 @@
}
}
// Kill the eliminated test
C->set_major_progress();
- Node *kill_con = _igvn.intcon( 1-flip );
+ Node *kill_con = _igvn.intcon(1-flip);
set_ctrl(kill_con, C->root());
_igvn.replace_input_of(iff, 1, kill_con);
// Find surviving projection
assert(iff->is_If(), "");
ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
@@ -2332,10 +2315,11 @@
_igvn.replace_input_of(cd, 0, ctrl); // ctrl, not NULL
--i;
--imax;
}
}
+ range_check_list.pop();
} // End of is IF
}
@@ -2371,28 +2355,188 @@
}
Node *main_cle = cl->loopexit();
Node *main_bol = main_cle->in(1);
// Hacking loop bounds; need private copies of exit test
- if( main_bol->outcnt() > 1 ) {// BoolNode shared?
+ if (main_bol->outcnt() > 1) {// BoolNode shared?
main_bol = main_bol->clone();// Clone a private BoolNode
register_new_node( main_bol, main_cle->in(0) );
_igvn.replace_input_of(main_cle, 1, main_bol);
}
Node *main_cmp = main_bol->in(1);
- if( main_cmp->outcnt() > 1 ) { // CmpNode shared?
+ if (main_cmp->outcnt() > 1) { // CmpNode shared?
main_cmp = main_cmp->clone();// Clone a private CmpNode
- register_new_node( main_cmp, main_cle->in(0) );
+ register_new_node(main_cmp, main_cle->in(0));
_igvn.replace_input_of(main_bol, 1, main_cmp);
}
// Hack the now-private loop bounds
_igvn.replace_input_of(main_cmp, 2, main_limit);
// The OpaqueNode is unshared by design
assert( opqzm->outcnt() == 1, "cannot hack shared node" );
_igvn.replace_input_of(opqzm, 1, main_limit);
}
+//------------------------------has_range_checks-------------------------------
+// Check to see if RCE cleaned the current loop of range-checks.
+void PhaseIdealLoop::has_range_checks(IdealLoopTree *loop) {
+ assert(RangeCheckElimination, "");
+ CountedLoopNode *cl = loop->_head->as_CountedLoop();
+
+ // skip this loop if it is already marked
+ if (cl->loop_has_range_checks()) return;
+
+ if (cl->is_main_loop() || cl->is_post_loop()) {
+ // Now check for existance of range checks
+ for (uint i = 0; i < loop->_body.size(); i++) {
+ Node *iff = loop->_body[i];
+ if (iff->Opcode() == Op_If ||
+ iff->Opcode() == Op_RangeCheck) {
+ cl->mark_has_range_checks();
+ break;
+ }
+ }
+ }
+}
+
+//-------------------------multi_version_post_loops----------------------------
+// Check the range checks that remain, if simple, use the bounds to guard
+// which version to a post loop we execute, one with range checks or one without
+bool PhaseIdealLoop::multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop) {
+ bool multi_version_succeeded = false;
+ assert(RangeCheckElimination, "");
+ CountedLoopNode *legacy_cl = legacy_loop->_head->as_CountedLoop();
+ assert(legacy_cl->is_post_loop(), "");
+
+ // Check for existance of range checks using the unique instance to make a guard with
+ Unique_Node_List worklist;
+ for (uint i = 0; i < legacy_loop->_body.size(); i++) {
+ Node *iff = legacy_loop->_body[i];
+ if (iff->Opcode() == Op_If || iff->Opcode() == Op_RangeCheck) {
+ worklist.push(iff);
+ }
+ }
+
+ // Find RCE'd post loop so that we can stage its guard.
+ Node* ctrl = legacy_cl->in(LoopNode::EntryControl);
+ if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return multi_version_succeeded;
+ Node* iffm = ctrl->in(0);
+ if (!iffm->is_If()) return multi_version_succeeded;
+ Node* cur_bool = iffm->in(1);
+ if (!cur_bool->is_Bool()) return multi_version_succeeded;
+ Node* cur_cmp = cur_bool->in(1);
+ if (!cur_cmp->is_Cmp()) return multi_version_succeeded;
+ Node* cur_opq = cur_cmp->in(1);
+ // Can not optimize a loop if zero-trip Opaque1 node is optimized away.
+ if (cur_opq->Opcode() != Op_Opaque1) return multi_version_succeeded;
+
+ // Now we test that both the post loops are connected
+ Node* post_loop_region = iffm->in(0);
+ if (post_loop_region == NULL) return multi_version_succeeded;
+ if (!post_loop_region->is_Region()) return multi_version_succeeded;
+ Node* covering_region = post_loop_region->in(RegionNode::Control+1);
+ if (covering_region == NULL) return multi_version_succeeded;
+ if (!covering_region->is_Region()) return multi_version_succeeded;
+ Node* p_f = covering_region->in(RegionNode::Control);
+ if (p_f == NULL) return multi_version_succeeded;
+ if (!p_f->is_IfFalse()) return multi_version_succeeded;
+ if (!p_f->in(0)->is_CountedLoopEnd()) return multi_version_succeeded;
+ CountedLoopEndNode* rce_loop_end = p_f->in(0)->as_CountedLoopEnd();
+ if (rce_loop_end == NULL) return multi_version_succeeded;
+ CountedLoopNode* rce_cl = rce_loop_end->loopnode();
+ if (rce_cl == NULL || !rce_cl->is_post_loop()) return multi_version_succeeded;
+ CountedLoopNode *known_rce_cl = rce_loop->_head->as_CountedLoop();
+ if (rce_cl != known_rce_cl) return multi_version_succeeded;
+
+ // Then we fetch the cover entry test
+ ctrl = rce_cl->in(LoopNode::EntryControl);
+ if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return multi_version_succeeded;
+
+#ifndef PRODUCT
+ if (TraceLoopOpts) {
+ tty->print("PostMultiVersion\n");
+ rce_loop->dump_head();
+ legacy_loop->dump_head();
+ }
+#endif
+
+ // Now fetch the limit we want to compare against
+ Node *limit = rce_cl->limit();
+ bool first_time = true;
+
+ // If we got this far, we identified the post loop which has been RCE'd and
+ // we have a work list. Now we will try to transform the if guard to cause
+ // the loop pair to be multi version executed with the determination left to runtime
+ // or the optimizer if full information is known about the given arrays at compile time.
+ Node *last_min = NULL;
+ while (worklist.size()) {
+ Node* rc_iffm = worklist.pop();
+ if (rc_iffm->is_If()) {
+ Node *rc_bolzm = rc_iffm->in(1);
+ if (rc_bolzm->is_Bool()) {
+ Node *rc_cmpzm = rc_bolzm->in(1);
+ if (rc_cmpzm->is_Cmp()) {
+ Node *rc_left = rc_cmpzm->in(2);
+ if (first_time) {
+ last_min = rc_left;
+ first_time = false;
+ } else {
+ Node *cur_min = new MinINode(last_min, rc_left);
+ last_min = cur_min;
+ _igvn.register_new_node_with_optimizer(last_min);
+ }
+ }
+ }
+ }
+ }
+
+ // All we have to do is update the limit of the rce loop
+ // with the min of our expression and the current limit.
+ // We will use this expression to replace the current limit.
+ if (last_min) {
+ Node *cur_min = new MinINode(last_min, limit);
+ _igvn.register_new_node_with_optimizer(cur_min);
+ Node *cmp_node = rce_loop_end->cmp_node();
+ _igvn.replace_input_of(cmp_node, 2, cur_min);
+ set_idom(cmp_node, cur_min, dom_depth(ctrl));
+ set_ctrl(cur_min, ctrl);
+ set_loop(cur_min, rce_loop->_parent);
+
+ legacy_cl->mark_is_multiversioned();
+ rce_cl->mark_is_multiversioned();
+ multi_version_succeeded = true;
+
+ C->set_major_progress();
+ }
+
+ return multi_version_succeeded;
+}
+
+//-------------------------poison_rce_post_loop--------------------------------
+// Causes the rce'd post loop to be optimized away if multiverioning fails
+void PhaseIdealLoop::poison_rce_post_loop(IdealLoopTree *rce_loop) {
+ CountedLoopNode *rce_cl = rce_loop->_head->as_CountedLoop();
+ Node* ctrl = rce_cl->in(LoopNode::EntryControl);
+ if (ctrl->is_IfTrue() || ctrl->is_IfFalse()) {
+ Node* iffm = ctrl->in(0);
+ if (iffm->is_If()) {
+ Node* cur_bool = iffm->in(1);
+ if (cur_bool->is_Bool()) {
+ Node* cur_cmp = cur_bool->in(1);
+ if (cur_cmp->is_Cmp()) {
+ BoolTest::mask new_test = BoolTest::gt;
+ BoolNode *new_bool = new BoolNode(cur_cmp, new_test);
+ _igvn.replace_node(cur_bool, new_bool);
+ _igvn._worklist.push(new_bool);
+ Node* left_op = cur_cmp->in(1);
+ _igvn.replace_input_of(cur_cmp, 2, left_op);
+ C->set_major_progress();
+ }
+ }
+ }
+ }
+}
+
//------------------------------DCE_loop_body----------------------------------
// Remove simplistic dead code from loop body
void IdealLoopTree::DCE_loop_body() {
for( uint i = 0; i < _body.size(); i++ )
if( _body.at(i)->outcnt() == 0 )
@@ -2746,12 +2890,23 @@
phase->insert_pre_post_loops(this,old_new, !may_rce_align);
// Adjust the pre- and main-loop limits to let the pre and post loops run
// with full checks, but the main-loop with no checks. Remove said
// checks from the main body.
- if (should_rce)
- phase->do_range_check(this,old_new);
+ if (should_rce) {
+ Node_List range_check_list;
+ phase->do_range_check(this, range_check_list);
+ if (range_check_list.size() > 0) {
+ cl->mark_has_range_checks();
+ }
+ }
+
+ if (should_rce && should_unroll && !should_peel && PostLoopMultiversioning) {
+ // Try to setup multiversioning on main loops before they are unrolled
+ if (cl->is_main_loop() && (cl->unrolled_count() == 1))
+ phase->insert_scalar_rced_post_loop(this, old_new);
+ }
// Double loop body for unrolling. Adjust the minimum-trip test (will do
// twice as many iterations as before) and the main body limit (only do
// an even number of trips). If we are peeling, we might enable some RCE
// and we'd rather unroll the post-RCE'd loop SO... do not unroll if
< prev index next >