< prev index next >

src/share/vm/opto/loopTransform.cpp

Print this page

        

*** 1025,1109 **** cmp = cmp->clone(); register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl)); _igvn.replace_input_of(bol, 1, cmp); } ! //------------------------------ ! // Step A: Create Post-Loop. ! Node* main_exit = main_end->proj_out(false); ! assert( main_exit->Opcode() == Op_IfFalse, "" ); ! int dd_main_exit = dom_depth(main_exit); ! ! // Step A1: Clone the loop body. The clone becomes the post-loop. The main ! // loop pre-header illegally has 2 control users (old & new loops). ! clone_loop( loop, old_new, dd_main_exit ); ! assert( old_new[main_end ->_idx]->Opcode() == Op_CountedLoopEnd, "" ); ! CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop(); ! post_head->set_post_loop(main_head); ! ! // Reduce the post-loop trip count. ! CountedLoopEndNode* post_end = old_new[main_end ->_idx]->as_CountedLoopEnd(); ! post_end->_prob = PROB_FAIR; ! ! // Build the main-loop normal exit. ! IfFalseNode *new_main_exit = new IfFalseNode(main_end); ! _igvn.register_new_node_with_optimizer( new_main_exit ); ! set_idom(new_main_exit, main_end, dd_main_exit ); ! set_loop(new_main_exit, loop->_parent); ! ! // Step A2: Build a zero-trip guard for the post-loop. After leaving the ! // main-loop, the post-loop may not execute at all. We 'opaque' the incr ! // (the main-loop trip-counter exit value) because we will be changing ! // the exit value (via unrolling) so we cannot constant-fold away the zero ! // trip guard until all unrolling is done. ! Node *zer_opaq = new Opaque1Node(C, incr); ! Node *zer_cmp = new CmpINode( zer_opaq, limit ); ! Node *zer_bol = new BoolNode( zer_cmp, b_test ); ! register_new_node( zer_opaq, new_main_exit ); ! register_new_node( zer_cmp , new_main_exit ); ! register_new_node( zer_bol , new_main_exit ); ! ! // Build the IfNode ! IfNode *zer_iff = new IfNode( new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN ); ! _igvn.register_new_node_with_optimizer( zer_iff ); ! set_idom(zer_iff, new_main_exit, dd_main_exit); ! set_loop(zer_iff, loop->_parent); ! ! // Plug in the false-path, taken if we need to skip post-loop ! _igvn.replace_input_of(main_exit, 0, zer_iff); ! set_idom(main_exit, zer_iff, dd_main_exit); ! set_idom(main_exit->unique_out(), zer_iff, dd_main_exit); ! // Make the true-path, must enter the post loop ! Node *zer_taken = new IfTrueNode( zer_iff ); ! _igvn.register_new_node_with_optimizer( zer_taken ); ! set_idom(zer_taken, zer_iff, dd_main_exit); ! set_loop(zer_taken, loop->_parent); ! // Plug in the true path ! _igvn.hash_delete( post_head ); ! post_head->set_req(LoopNode::EntryControl, zer_taken); ! set_idom(post_head, zer_taken, dd_main_exit); ! ! Arena *a = Thread::current()->resource_area(); ! VectorSet visited(a); ! Node_Stack clones(a, main_head->back_control()->outcnt()); ! // Step A3: Make the fall-in values to the post-loop come from the ! // fall-out values of the main-loop. ! for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) { ! Node* main_phi = main_head->fast_out(i); ! if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0 ) { ! Node *post_phi = old_new[main_phi->_idx]; ! Node *fallmain = clone_up_backedge_goo(main_head->back_control(), ! post_head->init_control(), ! main_phi->in(LoopNode::LoopBackControl), ! visited, clones); ! _igvn.hash_delete(post_phi); ! post_phi->set_req( LoopNode::EntryControl, fallmain ); ! } ! } // Update local caches for next stanza ! main_exit = new_main_exit; //------------------------------ // Step B: Create Pre-Loop. --- 1025,1040 ---- cmp = cmp->clone(); register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl)); _igvn.replace_input_of(bol, 1, cmp); } ! // Add the post loop ! PostLoopInfo post_loop_info; ! insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_loop_info); // Update local caches for next stanza ! Node *main_exit = post_loop_info.new_main_exit; //------------------------------ // Step B: Create Pre-Loop.
*** 1156,1167 **** // Plug in the true path _igvn.hash_delete( main_head ); main_head->set_req(LoopNode::EntryControl, min_taken); set_idom(main_head, min_taken, dd_main_head); ! visited.Clear(); ! clones.clear(); // Step B3: Make the fall-in values to the main-loop come from the // fall-out values of the pre-loop. for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) { Node* main_phi = main_head->fast_out(i2); if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) { --- 1087,1099 ---- // Plug in the true path _igvn.hash_delete( main_head ); main_head->set_req(LoopNode::EntryControl, min_taken); set_idom(main_head, min_taken, dd_main_head); ! Arena *a = Thread::current()->resource_area(); ! VectorSet visited(a); ! Node_Stack clones(a, main_head->back_control()->outcnt()); // Step B3: Make the fall-in values to the main-loop come from the // fall-out values of the pre-loop. for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) { Node* main_phi = main_head->fast_out(i2); if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) {
*** 1183,1198 **** // test that was guarding the loop nest. We add a special CastII on // the if branch that enters the loop, between the input induction // variable value and the induction variable Phi to preserve correct // dependencies. - // CastII for the post loop: - bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head); - assert(inserted, "no castII inserted"); - // CastII for the main loop: ! inserted = cast_incr_before_loop(pre_incr, min_taken, main_head); assert(inserted, "no castII inserted"); // Step B4: Shorten the pre-loop to run only 1 iteration (for now). // RCE and alignment may change this later. Node *cmp_end = pre_end->cmp_node(); --- 1115,1126 ---- // test that was guarding the loop nest. We add a special CastII on // the if branch that enters the loop, between the input induction // variable value and the induction variable Phi to preserve correct // dependencies. // CastII for the main loop: ! bool inserted = cast_incr_before_loop( pre_incr, min_taken, main_head ); assert(inserted, "no castII inserted"); // Step B4: Shorten the pre-loop to run only 1 iteration (for now). // RCE and alignment may change this later. Node *cmp_end = pre_end->cmp_node();
*** 1250,1260 **** main_head->set_trip_count(main_head->trip_count() - 1); // It's difficult to be precise about the trip-counts // for the pre/post loops. They are usually very short, // so guess that 4 trips is a reasonable value. ! post_head->set_profile_trip_cnt(4.0); pre_head->set_profile_trip_cnt(4.0); // Now force out all loop-invariant dominating tests. The optimizer // finds some, but we _know_ they are all useless. peeled_dom_test_elim(loop,old_new); --- 1178,1188 ---- main_head->set_trip_count(main_head->trip_count() - 1); // It's difficult to be precise about the trip-counts // for the pre/post loops. They are usually very short, // so guess that 4 trips is a reasonable value. ! post_loop_info.post_head->set_profile_trip_cnt(4.0); pre_head->set_profile_trip_cnt(4.0); // Now force out all loop-invariant dominating tests. The optimizer // finds some, but we _know_ they are all useless. peeled_dom_test_elim(loop,old_new);
*** 1296,1318 **** CountedLoopNode *main_head = loop->_head->as_CountedLoop(); CountedLoopEndNode *main_end = main_head->loopexit(); guarantee(main_end != NULL, "no loop exit node"); // diagnostic to show loop end is not properly formed assert(main_end->outcnt() == 2, "1 true, 1 false path only"); - uint dd_main_head = dom_depth(main_head); - uint max = main_head->outcnt(); // mark this loop as processed main_head->mark_has_atomic_post_loop(); - Node *pre_header = main_head->in(LoopNode::EntryControl); - Node *init = main_head->init_trip(); Node *incr = main_end->incr(); Node *limit = main_end->limit(); ! Node *stride = main_end->stride(); ! Node *cmp = main_end->cmp_node(); ! BoolTest::mask b_test = main_end->test_trip(); //------------------------------ // Step A: Create a new post-Loop. Node* main_exit = main_end->proj_out(false); assert(main_exit->Opcode() == Op_IfFalse, ""); --- 1224,1308 ---- CountedLoopNode *main_head = loop->_head->as_CountedLoop(); CountedLoopEndNode *main_end = main_head->loopexit(); guarantee(main_end != NULL, "no loop exit node"); // diagnostic to show loop end is not properly formed assert(main_end->outcnt() == 2, "1 true, 1 false path only"); // mark this loop as processed main_head->mark_has_atomic_post_loop(); Node *incr = main_end->incr(); Node *limit = main_end->limit(); ! ! // In this case we throw away the result as we are not using it to connect anything else. ! PostLoopInfo post_loop_info; ! insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_loop_info); ! ! // It's difficult to be precise about the trip-counts ! // for post loops. They are usually very short, ! // so guess that unit vector trips is a reasonable value. ! post_loop_info.post_head->set_profile_trip_cnt(cur_unroll); ! ! // Now force out all loop-invariant dominating tests. The optimizer ! // finds some, but we _know_ they are all useless. ! peeled_dom_test_elim(loop, old_new); ! loop->record_for_igvn(); ! } ! ! ! //-------------------------insert_scalar_rced_post_loop------------------------ ! // Insert a copy of the rce'd main loop as a post loop, ! // We have not unrolled the main loop, so this is the right time to inject this. ! // Later we will examine the partner of this post loop pair which still has range checks ! // to see inject code which tests at runtime if the range checks are applicable. ! void PhaseIdealLoop::insert_scalar_rced_post_loop(IdealLoopTree *loop, Node_List &old_new) { ! if (!loop->_head->is_CountedLoop()) return; ! ! CountedLoopNode *cl = loop->_head->as_CountedLoop(); ! ! // only process RCE'd main loops ! if (cl->has_range_checks() || !cl->is_main_loop()) return; ! ! #ifndef PRODUCT ! if (TraceLoopOpts) { ! tty->print("PostScalarRce "); ! loop->dump_head(); ! } ! #endif ! C->set_major_progress(); ! ! // Find common pieces of the loop being guarded with pre & post loops ! CountedLoopNode *main_head = loop->_head->as_CountedLoop(); ! CountedLoopEndNode *main_end = main_head->loopexit(); ! guarantee(main_end != NULL, "no loop exit node"); ! // diagnostic to show loop end is not properly formed ! assert(main_end->outcnt() == 2, "1 true, 1 false path only"); ! ! Node *incr = main_end->incr(); ! Node *limit = main_end->limit(); ! ! // In this case we throw away the result as we are not using it to connect anything else. ! PostLoopInfo post_loop_info; ! insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_loop_info); ! ! // It's difficult to be precise about the trip-counts ! // for post loops. They are usually very short, ! // so guess that unit vector trips is a reasonable value. ! post_loop_info.post_head->set_profile_trip_cnt(4.0); ! ! // Now force out all loop-invariant dominating tests. The optimizer ! // finds some, but we _know_ they are all useless. ! peeled_dom_test_elim(loop, old_new); ! loop->record_for_igvn(); ! } ! ! ! //------------------------------insert_post_loop------------------------------- ! // Insert post loops. Add a post loop to the given loop passed. ! void PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new, ! CountedLoopNode *main_head, CountedLoopEndNode *main_end, ! Node *incr, Node *limit, PostLoopInfo &post_loop_info) { //------------------------------ // Step A: Create a new post-Loop. Node* main_exit = main_end->proj_out(false); assert(main_exit->Opcode() == Op_IfFalse, "");
*** 1323,1332 **** --- 1313,1323 ---- clone_loop(loop, old_new, dd_main_exit); assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, ""); CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop(); post_head->set_normal_loop(); post_head->set_post_loop(main_head); + post_loop_info.post_head = post_head; // Reduce the post-loop trip count. CountedLoopEndNode* post_end = old_new[main_end->_idx]->as_CountedLoopEnd(); post_end->_prob = PROB_FAIR;
*** 1334,1366 **** IfFalseNode *new_main_exit = new IfFalseNode(main_end); _igvn.register_new_node_with_optimizer(new_main_exit); set_idom(new_main_exit, main_end, dd_main_exit); set_loop(new_main_exit, loop->_parent); ! // Step A2: Build a zero-trip guard for the vector post-loop. After leaving the ! // main-loop, the vector post-loop may not execute at all. We 'opaque' the incr ! // (the vectorized main-loop trip-counter exit value) because we will be changing // the exit value (via additional unrolling) so we cannot constant-fold away the zero // trip guard until all unrolling is done. Node *zer_opaq = new Opaque1Node(C, incr); Node *zer_cmp = new CmpINode(zer_opaq, limit); ! Node *zer_bol = new BoolNode(zer_cmp, b_test); register_new_node(zer_opaq, new_main_exit); register_new_node(zer_cmp, new_main_exit); register_new_node(zer_bol, new_main_exit); // Build the IfNode IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN); _igvn.register_new_node_with_optimizer(zer_iff); set_idom(zer_iff, new_main_exit, dd_main_exit); set_loop(zer_iff, loop->_parent); ! // Plug in the false-path, taken if we need to skip vector post-loop _igvn.replace_input_of(main_exit, 0, zer_iff); set_idom(main_exit, zer_iff, dd_main_exit); set_idom(main_exit->unique_out(), zer_iff, dd_main_exit); ! // Make the true-path, must enter the vector post loop Node *zer_taken = new IfTrueNode(zer_iff); _igvn.register_new_node_with_optimizer(zer_taken); set_idom(zer_taken, zer_iff, dd_main_exit); set_loop(zer_taken, loop->_parent); // Plug in the true path --- 1325,1357 ---- IfFalseNode *new_main_exit = new IfFalseNode(main_end); _igvn.register_new_node_with_optimizer(new_main_exit); set_idom(new_main_exit, main_end, dd_main_exit); set_loop(new_main_exit, loop->_parent); ! // Step A2: Build a zero-trip guard for the post-loop. After leaving the ! // main-loop, the post-loop may not execute at all. We 'opaque' the incr ! // (the previous loop trip-counter exit value) because we will be changing // the exit value (via additional unrolling) so we cannot constant-fold away the zero // trip guard until all unrolling is done. Node *zer_opaq = new Opaque1Node(C, incr); Node *zer_cmp = new CmpINode(zer_opaq, limit); ! Node *zer_bol = new BoolNode(zer_cmp, main_end->test_trip()); register_new_node(zer_opaq, new_main_exit); register_new_node(zer_cmp, new_main_exit); register_new_node(zer_bol, new_main_exit); // Build the IfNode IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN); _igvn.register_new_node_with_optimizer(zer_iff); set_idom(zer_iff, new_main_exit, dd_main_exit); set_loop(zer_iff, loop->_parent); ! // Plug in the false-path, taken if we need to skip this post-loop _igvn.replace_input_of(main_exit, 0, zer_iff); set_idom(main_exit, zer_iff, dd_main_exit); set_idom(main_exit->unique_out(), zer_iff, dd_main_exit); ! // Make the true-path, must enter this post loop Node *zer_taken = new IfTrueNode(zer_iff); _igvn.register_new_node_with_optimizer(zer_taken); set_idom(zer_taken, zer_iff, dd_main_exit); set_loop(zer_taken, loop->_parent); // Plug in the true path
*** 1369,1379 **** set_idom(post_head, zer_taken, dd_main_exit); Arena *a = Thread::current()->resource_area(); VectorSet visited(a); Node_Stack clones(a, main_head->back_control()->outcnt()); ! // Step A3: Make the fall-in values to the vector post-loop come from the // fall-out values of the main-loop. for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) { Node* main_phi = main_head->fast_out(i); if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0) { Node *cur_phi = old_new[main_phi->_idx]; --- 1360,1370 ---- set_idom(post_head, zer_taken, dd_main_exit); Arena *a = Thread::current()->resource_area(); VectorSet visited(a); Node_Stack clones(a, main_head->back_control()->outcnt()); ! // Step A3: Make the fall-in values to the post-loop come from the // fall-out values of the main-loop. for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) { Node* main_phi = main_head->fast_out(i); if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0) { Node *cur_phi = old_new[main_phi->_idx];
*** 1388,1406 **** // CastII for the new post loop: bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head); assert(inserted, "no castII inserted"); ! // It's difficult to be precise about the trip-counts ! // for post loops. They are usually very short, ! // so guess that unit vector trips is a reasonable value. ! post_head->set_profile_trip_cnt((float)slp_max_unroll_factor); ! ! // Now force out all loop-invariant dominating tests. The optimizer ! // finds some, but we _know_ they are all useless. ! peeled_dom_test_elim(loop, old_new); ! loop->record_for_igvn(); } //------------------------------is_invariant----------------------------- // Return true if n is invariant bool IdealLoopTree::is_invariant(Node* n) const { --- 1379,1389 ---- // CastII for the new post loop: bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head); assert(inserted, "no castII inserted"); ! post_loop_info.new_main_exit = new_main_exit; } //------------------------------is_invariant----------------------------- // Return true if n is invariant bool IdealLoopTree::is_invariant(Node* n) const {
*** 2189,2198 **** --- 2172,2182 ---- Node *iff = loop->_body[i]; if (iff->Opcode() == Op_If || iff->Opcode() == Op_RangeCheck) { // Test? // Test is an IfNode, has 2 projections. If BOTH are in the loop // we need loop unswitching instead of iteration splitting. + old_new.push(iff); Node *exit = loop->is_loop_exit(iff); if( !exit ) continue; int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0; // Get boolean condition to test
*** 2332,2341 **** --- 2316,2328 ---- _igvn.replace_input_of(cd, 0, ctrl); // ctrl, not NULL --i; --imax; } } + if (limit->Opcode() == Op_LoadRange) { + old_new.pop(); + } } // End of is IF }
*** 2389,2398 **** --- 2376,2552 ---- // The OpaqueNode is unshared by design assert( opqzm->outcnt() == 1, "cannot hack shared node" ); _igvn.replace_input_of(opqzm, 1, main_limit); } + //------------------------------has_range_checks------------------------------- + // Check to see if RCE cleaned the current loop of range-checks. + void PhaseIdealLoop::has_range_checks(IdealLoopTree *loop) { + assert(RangeCheckElimination, ""); + + // skip if not a counted loop + if (!loop->is_counted()) return; + + CountedLoopNode *cl = loop->_head->as_CountedLoop(); + + // skip this loop if it is already marked + if (cl->has_range_checks()) return; + + // Now check for existance of range checks + for (uint i = 0; i < loop->_body.size(); i++) { + Node *iff = loop->_body[i]; + if (iff->Opcode() == Op_If || + iff->Opcode() == Op_RangeCheck) { + cl->mark_has_range_checks(); + break; + } + } + } + + //-------------------------multi_version_post_loops---------------------------- + // Check the range checks that remain, if simple, use the bounds to guard + // which version to a post loop we execute, one with range checks or one without + bool PhaseIdealLoop::multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop) { + bool multi_version_succeeded = false; + assert(RangeCheckElimination, ""); + CountedLoopNode *legacy_cl = legacy_loop->_head->as_CountedLoop(); + assert(legacy_cl->is_post_loop(), ""); + + // Check for existance of range checks using the unique instance to make a guard with + Unique_Node_List worklist; + for (uint i = 0; i < legacy_loop->_body.size(); i++) { + Node *iff = legacy_loop->_body[i]; + if (iff->Opcode() == Op_If || iff->Opcode() == Op_RangeCheck) { + worklist.push(iff); + } + } + + // Find RCE'd post loop so that we can stage its guard. + Node* ctrl = legacy_cl->in(LoopNode::EntryControl); + if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return multi_version_succeeded; + Node* iffm = ctrl->in(0); + if (!iffm->is_If()) return multi_version_succeeded; + Node* cur_bool = iffm->in(1); + if (!cur_bool->is_Bool()) return multi_version_succeeded; + Node* cur_cmp = cur_bool->in(1); + if (!cur_cmp->is_Cmp()) return multi_version_succeeded; + Node* cur_opq = cur_cmp->in(1); + // Can not optimize a loop if zero-trip Opaque1 node is optimized away. + if (cur_opq->Opcode() != Op_Opaque1) return multi_version_succeeded; + + // Now we test that both the post loops are connected + Node* post_loop_region = iffm->in(0); + if (post_loop_region == NULL) return multi_version_succeeded; + if (!post_loop_region->is_Region()) return multi_version_succeeded; + Node* covering_region = post_loop_region->in(RegionNode::Control+1); + if (covering_region == NULL) return multi_version_succeeded; + if (!covering_region->is_Region()) return multi_version_succeeded; + Node* p_f = covering_region->in(RegionNode::Control); + if (p_f == NULL) return multi_version_succeeded; + if (!p_f->is_IfFalse()) return multi_version_succeeded; + if (!p_f->in(0)->is_CountedLoopEnd()) return multi_version_succeeded; + CountedLoopEndNode* rce_loop_end = p_f->in(0)->as_CountedLoopEnd(); + if (rce_loop_end == NULL) return multi_version_succeeded; + CountedLoopNode* rce_cl = rce_loop_end->loopnode(); + if (rce_cl == NULL || !rce_cl->is_post_loop()) return multi_version_succeeded; + CountedLoopNode *known_rce_cl = rce_loop->_head->as_CountedLoop(); + if (rce_cl != known_rce_cl) return multi_version_succeeded; + + // Then we fetch the cover entry test + ctrl = rce_cl->in(LoopNode::EntryControl); + if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return multi_version_succeeded; + + #ifndef PRODUCT + if (TraceLoopOpts) { + tty->print("PostMultiVersion\n"); + rce_loop->dump_head(); + legacy_loop->dump_head(); + } + #endif + + // Now fetch the limit we want to compare against + Node *limit = rce_cl->limit(); + bool first_time = true; + + // If we got this far, we identified the post loop which has been RCE'd and + // we have a work list. Now we will try to transform the if guard to cause + // the loop pair to be multi version executed with the determination left to runtime + // or the optimizer if full information is known about the given arrays at compile time. + Node *last_min = NULL; + multi_version_succeeded = true; + while (worklist.size()) { + Node* rc_iffm = worklist.pop(); + if (rc_iffm->is_If()) { + Node *rc_bolzm = rc_iffm->in(1); + if (rc_bolzm->is_Bool()) { + Node *rc_cmpzm = rc_bolzm->in(1); + if (rc_cmpzm->is_Cmp()) { + Node *rc_left = rc_cmpzm->in(2); + if (rc_left->Opcode() != Op_LoadRange) { + multi_version_succeeded = false; + break; + } + if (first_time) { + last_min = rc_left; + first_time = false; + } else { + Node *cur_min = new MinINode(last_min, rc_left); + last_min = cur_min; + _igvn.register_new_node_with_optimizer(last_min); + } + } + } + } + } + + // All we have to do is update the limit of the rce loop + // with the min of our expression and the current limit. + // We will use this expression to replace the current limit. + if (last_min && multi_version_succeeded) { + Node *cur_min = new MinINode(last_min, limit); + _igvn.register_new_node_with_optimizer(cur_min); + Node *cmp_node = rce_loop_end->cmp_node(); + _igvn.replace_input_of(cmp_node, 2, cur_min); + set_idom(cmp_node, cur_min, dom_depth(ctrl)); + set_ctrl(cur_min, ctrl); + set_loop(cur_min, rce_loop->_parent); + + legacy_cl->mark_is_multiversioned(); + rce_cl->mark_is_multiversioned(); + multi_version_succeeded = true; + + C->set_major_progress(); + } + + return multi_version_succeeded; + } + + //-------------------------poison_rce_post_loop-------------------------------- + // Causes the rce'd post loop to be optimized away if multiverioning fails + void PhaseIdealLoop::poison_rce_post_loop(IdealLoopTree *rce_loop) { + CountedLoopNode *rce_cl = rce_loop->_head->as_CountedLoop(); + Node* ctrl = rce_cl->in(LoopNode::EntryControl); + if (ctrl->is_IfTrue() || ctrl->is_IfFalse()) { + Node* iffm = ctrl->in(0); + if (iffm->is_If()) { + Node* cur_bool = iffm->in(1); + if (cur_bool->is_Bool()) { + Node* cur_cmp = cur_bool->in(1); + if (cur_cmp->is_Cmp()) { + BoolTest::mask new_test = BoolTest::gt; + BoolNode *new_bool = new BoolNode(cur_cmp, new_test); + _igvn.replace_node(cur_bool, new_bool); + _igvn._worklist.push(new_bool); + Node* left_op = cur_cmp->in(1); + _igvn.replace_input_of(cur_cmp, 2, left_op); + C->set_major_progress(); + } + } + } + } + } + //------------------------------DCE_loop_body---------------------------------- // Remove simplistic dead code from loop body void IdealLoopTree::DCE_loop_body() { for( uint i = 0; i < _body.size(); i++ ) if( _body.at(i)->outcnt() == 0 )
*** 2746,2757 **** phase->insert_pre_post_loops(this,old_new, !may_rce_align); // Adjust the pre- and main-loop limits to let the pre and post loops run // with full checks, but the main-loop with no checks. Remove said // checks from the main body. ! if (should_rce) ! phase->do_range_check(this,old_new); // Double loop body for unrolling. Adjust the minimum-trip test (will do // twice as many iterations as before) and the main body limit (only do // an even number of trips). If we are peeling, we might enable some RCE // and we'd rather unroll the post-RCE'd loop SO... do not unroll if --- 2900,2923 ---- phase->insert_pre_post_loops(this,old_new, !may_rce_align); // Adjust the pre- and main-loop limits to let the pre and post loops run // with full checks, but the main-loop with no checks. Remove said // checks from the main body. ! if (should_rce) { ! old_new.clear(); ! phase->do_range_check(this, old_new); ! if (old_new.size() > 0) { ! cl->mark_has_range_checks(); ! old_new.clear(); ! } ! } ! ! if (should_rce && should_unroll && !should_peel && PostLoopMultiversioning) { ! // Try to setup multiversioning on main loops before they are unrolled ! if (cl->is_main_loop() && (cl->unrolled_count() == 1)) ! phase->insert_scalar_rced_post_loop(this, old_new); ! } // Double loop body for unrolling. Adjust the minimum-trip test (will do // twice as many iterations as before) and the main body limit (only do // an even number of trips). If we are peeling, we might enable some RCE // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
< prev index next >