--- old/src/hotspot/share/opto/c2_globals.hpp 2017-10-03 11:49:54.996519063 +0200 +++ new/src/hotspot/share/opto/c2_globals.hpp 2017-10-03 11:49:53.935519906 +0200 @@ -740,6 +740,14 @@ \ develop(bool, RenumberLiveNodes, true, \ "Renumber live nodes") \ + \ + product(uintx, LoopStripMiningIter, 0, \ + "Number of iterations in strip mined loop") \ + range(0, max_juint) \ + \ + product(uintx, LoopStripMiningIterShortLoop, 0, \ + "Loop with fewer iterations are not strip mined") \ + range(0, max_juint) \ C2_FLAGS(DECLARE_DEVELOPER_FLAG, \ DECLARE_PD_DEVELOPER_FLAG, \ --- old/src/hotspot/share/opto/cfgnode.cpp 2017-10-03 11:49:56.359517980 +0200 +++ new/src/hotspot/share/opto/cfgnode.cpp 2017-10-03 11:49:55.092518987 +0200 @@ -571,6 +571,18 @@ return NULL; } else if (can_reshape) { // Optimization phase - remove the node PhaseIterGVN *igvn = phase->is_IterGVN(); + // Strip mined (inner) loop is going away, remove outer loop. + if (is_CountedLoop() && + as_Loop()->is_strip_mined()) { + Node* outer_sfpt = as_CountedLoop()->outer_safepoint(); + Node* outer_out = as_CountedLoop()->outer_loop_exit(); + if (outer_sfpt != NULL && outer_out != NULL) { + Node* in = outer_sfpt->in(0); + igvn->replace_node(outer_out, in); + LoopNode* outer = as_CountedLoop()->outer_loop(); + igvn->replace_input_of(outer, LoopNode::LoopBackControl, igvn->C->top()); + } + } Node *parent_ctrl; if( cnt == 0 ) { assert( req() == 1, "no inputs expected" ); --- old/src/hotspot/share/opto/classes.hpp 2017-10-03 11:49:57.708516907 +0200 +++ new/src/hotspot/share/opto/classes.hpp 2017-10-03 11:49:56.473517889 +0200 @@ -223,6 +223,7 @@ macro(Opaque2) macro(Opaque3) macro(Opaque4) +macro(Opaque5) macro(ProfileBoolean) macro(OrI) macro(OrL) --- old/src/hotspot/share/opto/compile.cpp 2017-10-03 11:49:59.001515879 +0200 +++ new/src/hotspot/share/opto/compile.cpp 2017-10-03 11:49:57.786516845 +0200 @@ -3247,6 +3247,7 @@ if (n->as_Loop()->is_inner_loop()) { frc.inc_inner_loop_count(); } + n->as_Loop()->verify_strip_mined(0); break; case Op_LShiftI: case Op_RShiftI: --- old/src/hotspot/share/opto/ifnode.cpp 2017-10-03 11:50:00.164514955 +0200 +++ new/src/hotspot/share/opto/ifnode.cpp 2017-10-03 11:49:59.117515787 +0200 @@ -117,6 +117,7 @@ // No intervening control, like a simple Call Node *r = iff->in(0); if( !r->is_Region() ) return NULL; + if (r->is_Loop() && r->in(LoopNode::LoopBackControl)->is_top()) return NULL; // going away anyway if( phi->region() != r ) return NULL; // No other users of the cmp/bool if (b->outcnt() != 1 || cmp->outcnt() != 1) { @@ -1618,6 +1619,16 @@ // whether they are testing a 'gt' or 'lt' condition. The 'gt' condition // happens in count-down loops if (iff->is_CountedLoopEnd()) return NULL; + Node* proj_true = iff->proj_out(true); + if (proj_true->outcnt() == 1) { + Node* c = proj_true->unique_out(); + // Leave test of outer strip mined loop alone + if (c != NULL && c->is_Loop() && + c->in(LoopNode::LoopBackControl) == proj_true && + c->as_Loop()->is_strip_mined()) { + return NULL; + } + } if (!iff->in(1)->is_Bool()) return NULL; // Happens for partially optimized IF tests BoolNode *b = iff->in(1)->as_Bool(); BoolTest bt = b->_test; --- old/src/hotspot/share/opto/loopPredicate.cpp 2017-10-03 11:50:01.437513943 +0200 +++ new/src/hotspot/share/opto/loopPredicate.cpp 2017-10-03 11:50:00.258514880 +0200 @@ -515,8 +515,8 @@ _visited(area), _invariant(area), _stack(area, 10 /* guess */), _clone_visited(area), _old_new(area) { - Node* head = _lpt->_head; - Node* entry = head->in(LoopNode::EntryControl); + LoopNode* head = _lpt->_head->as_Loop(); + Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl); if (entry->outcnt() != 1) { // If a node is pinned between the predicates and the loop // entry, we won't be able to move any node in the loop that @@ -801,6 +801,10 @@ return false; } + if (head->Opcode() == Op_Loop && head->is_strip_mined()) { + return false; + } + CountedLoopNode *cl = NULL; if (head->is_valid_counted_loop()) { cl = head->as_CountedLoop(); @@ -812,7 +816,7 @@ cl = NULL; } - Node* entry = head->in(LoopNode::EntryControl); + Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl); ProjNode *predicate_proj = NULL; // Loop limit check predicate should be near the loop. predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); @@ -1007,6 +1011,8 @@ } #endif + head->verify_strip_mined(1); + return hoisted; } --- old/src/hotspot/share/opto/loopTransform.cpp 2017-10-03 11:50:02.567513045 +0200 +++ new/src/hotspot/share/opto/loopTransform.cpp 2017-10-03 11:50:01.521513876 +0200 @@ -67,6 +67,19 @@ Node *n = _body.at(i); _phase->_igvn._worklist.push(n); } + // put body of outer strip mined loop on igvn work list as well + if (_head->is_CountedLoop() && _head->as_Loop()->is_strip_mined()) { + CountedLoopNode* l = _head->as_CountedLoop(); + _phase->_igvn._worklist.push(l->outer_loop()); + _phase->_igvn._worklist.push(l->outer_loop_tail()); + _phase->_igvn._worklist.push(l->outer_loop_end()); + _phase->_igvn._worklist.push(l->outer_safepoint()); + _phase->_igvn._worklist.push(l->outer_bol()); + _phase->_igvn._worklist.push(l->outer_cmp()); + _phase->_igvn._worklist.push(l->outer_opaq()); + Node* cle_out = _head->as_CountedLoop()->loopexit()->proj_out(false); + _phase->_igvn._worklist.push(cle_out); + } } //------------------------------compute_exact_trip_count----------------------- @@ -494,7 +507,7 @@ loop->dump_head(); } #endif - Node* head = loop->_head; + LoopNode* head = loop->_head->as_Loop(); bool counted_loop = head->is_CountedLoop(); if (counted_loop) { CountedLoopNode *cl = head->as_CountedLoop(); @@ -514,7 +527,7 @@ // Step 1: Clone the loop body. The clone becomes the peeled iteration. // The pre-loop illegally has 2 control users (old & new loops). - clone_loop( loop, old_new, dom_depth(head) ); + clone_loop(loop, old_new, dom_depth(head->skip_strip_mined()), ControlAroundStripMined); // Step 2: Make the old-loop fall-in edges point to the peeled iteration. // Do this by making the old-loop fall-in edges act as if they came @@ -523,8 +536,8 @@ // the pre-loop with only 1 user (the new peeled iteration), but the // peeled-loop backedge has 2 users. Node* new_entry = old_new[head->in(LoopNode::LoopBackControl)->_idx]; - _igvn.hash_delete(head); - head->set_req(LoopNode::EntryControl, new_entry); + _igvn.hash_delete(head->skip_strip_mined()); + head->skip_strip_mined()->set_req(LoopNode::EntryControl, new_entry); for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) { Node* old = head->fast_out(j); if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) { @@ -1009,8 +1022,6 @@ CountedLoopEndNode *main_end = main_head->loopexit(); guarantee(main_end != NULL, "no loop exit node"); assert( main_end->outcnt() == 2, "1 true, 1 false path only" ); - uint dd_main_head = dom_depth(main_head); - uint max = main_head->outcnt(); Node *pre_header= main_head->in(LoopNode::EntryControl); Node *init = main_head->init_trip(); @@ -1043,7 +1054,16 @@ // Step B1: Clone the loop body. The clone becomes the pre-loop. The main // loop pre-header illegally has 2 control users (old & new loops). - clone_loop( loop, old_new, dd_main_head ); + LoopNode* outer_main_head = main_head; + IdealLoopTree* outer_loop = loop; + if (main_head->is_strip_mined()) { + main_head->verify_strip_mined(1); + outer_main_head = main_head->outer_loop(); + outer_loop = loop->_parent; + assert(outer_loop->_head == outer_main_head, "broken loop tree"); + } + uint dd_main_head = dom_depth(outer_main_head); + clone_loop(loop, old_new, dd_main_head, ControlAroundStripMined); CountedLoopNode* pre_head = old_new[main_head->_idx]->as_CountedLoop(); CountedLoopEndNode* pre_end = old_new[main_end ->_idx]->as_CountedLoopEnd(); pre_head->set_pre_loop(main_head); @@ -1058,7 +1078,7 @@ IfFalseNode *new_pre_exit = new IfFalseNode(pre_end); _igvn.register_new_node_with_optimizer( new_pre_exit ); set_idom(new_pre_exit, pre_end, dd_main_head); - set_loop(new_pre_exit, loop->_parent); + set_loop(new_pre_exit, outer_loop->_parent); // Step B2: Build a zero-trip guard for the main-loop. After leaving the // pre-loop, the main-loop may not execute at all. Later in life this @@ -1075,22 +1095,22 @@ IfNode *min_iff = new IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN ); _igvn.register_new_node_with_optimizer( min_iff ); set_idom(min_iff, new_pre_exit, dd_main_head); - set_loop(min_iff, loop->_parent); + set_loop(min_iff, outer_loop->_parent); // Plug in the false-path, taken if we need to skip main-loop _igvn.hash_delete( pre_exit ); pre_exit->set_req(0, min_iff); set_idom(pre_exit, min_iff, dd_main_head); - set_idom(pre_exit->unique_out(), min_iff, dd_main_head); + set_idom(pre_exit->unique_ctrl_out(), min_iff, dd_main_head); // Make the true-path, must enter the main loop Node *min_taken = new IfTrueNode( min_iff ); _igvn.register_new_node_with_optimizer( min_taken ); set_idom(min_taken, min_iff, dd_main_head); - set_loop(min_taken, loop->_parent); + set_loop(min_taken, outer_loop->_parent); // Plug in the true path - _igvn.hash_delete( main_head ); - main_head->set_req(LoopNode::EntryControl, min_taken); - set_idom(main_head, min_taken, dd_main_head); + _igvn.hash_delete(outer_main_head); + outer_main_head->set_req(LoopNode::EntryControl, min_taken); + set_idom(outer_main_head, min_taken, dd_main_head); Arena *a = Thread::current()->resource_area(); VectorSet visited(a); @@ -1102,7 +1122,7 @@ if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) { Node *pre_phi = old_new[main_phi->_idx]; Node *fallpre = clone_up_backedge_goo(pre_head->back_control(), - main_head->init_control(), + main_head->skip_strip_mined()->in(LoopNode::EntryControl), pre_phi->in(LoopNode::LoopBackControl), visited, clones); _igvn.hash_delete(main_phi); @@ -1171,6 +1191,14 @@ BoolNode* new_bol2 = new BoolNode(main_bol->in(1), new_test); register_new_node( new_bol2, main_end->in(CountedLoopEndNode::TestControl) ); _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, new_bol2); + if (main_head->is_strip_mined()) { + Node* le = outer_main_head->outer_loop_end(); + Node* bol = outer_main_head->outer_bol(); + Node* new_bol3 = new_bol2->clone(); + new_bol3->set_req(1, bol->in(1)); + register_new_node(new_bol3, le->in(0)); + _igvn.replace_input_of(le, 1, new_bol3); + } } // Flag main loop @@ -1305,16 +1333,24 @@ Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new, CountedLoopNode *main_head, CountedLoopEndNode *main_end, Node *incr, Node *limit, CountedLoopNode *&post_head) { + IfNode* outer_main_end = main_end; + IdealLoopTree* outer_loop = loop; + if (main_head->is_strip_mined()) { + main_head->verify_strip_mined(1); + outer_main_end = main_head->outer_loop_end(); + outer_loop = loop->_parent; + assert(outer_loop->_head == main_head->in(LoopNode::EntryControl), "broken loop tree"); + } //------------------------------ // Step A: Create a new post-Loop. - Node* main_exit = main_end->proj_out(false); + Node* main_exit = outer_main_end->proj_out(false); assert(main_exit->Opcode() == Op_IfFalse, ""); int dd_main_exit = dom_depth(main_exit); // Step A1: Clone the loop body of main. The clone becomes the post-loop. // The main loop pre-header illegally has 2 control users (old & new loops). - clone_loop(loop, old_new, dd_main_exit); + clone_loop(loop, old_new, dd_main_exit, ControlAroundStripMined); assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, ""); post_head = old_new[main_head->_idx]->as_CountedLoop(); post_head->set_normal_loop(); @@ -1325,10 +1361,10 @@ post_end->_prob = PROB_FAIR; // Build the main-loop normal exit. - IfFalseNode *new_main_exit = new IfFalseNode(main_end); + IfFalseNode *new_main_exit = new IfFalseNode(outer_main_end); _igvn.register_new_node_with_optimizer(new_main_exit); - set_idom(new_main_exit, main_end, dd_main_exit); - set_loop(new_main_exit, loop->_parent); + set_idom(new_main_exit, outer_main_end, dd_main_exit); + set_loop(new_main_exit, outer_loop->_parent); // Step A2: Build a zero-trip guard for the post-loop. After leaving the // main-loop, the post-loop may not execute at all. We 'opaque' the incr @@ -1346,7 +1382,7 @@ IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN); _igvn.register_new_node_with_optimizer(zer_iff); set_idom(zer_iff, new_main_exit, dd_main_exit); - set_loop(zer_iff, loop->_parent); + set_loop(zer_iff, outer_loop->_parent); // Plug in the false-path, taken if we need to skip this post-loop _igvn.replace_input_of(main_exit, 0, zer_iff); @@ -1356,7 +1392,7 @@ Node *zer_taken = new IfTrueNode(zer_iff); _igvn.register_new_node_with_optimizer(zer_taken); set_idom(zer_taken, zer_iff, dd_main_exit); - set_loop(zer_taken, loop->_parent); + set_loop(zer_taken, outer_loop->_parent); // Plug in the true path _igvn.hash_delete(post_head); post_head->set_req(LoopNode::EntryControl, zer_taken); @@ -1431,7 +1467,7 @@ // if rounds of unroll,optimize are making progress loop_head->set_node_count_before_unroll(loop->_body.size()); - Node *ctrl = loop_head->in(LoopNode::EntryControl); + Node *ctrl = loop_head->skip_strip_mined()->in(LoopNode::EntryControl); Node *limit = loop_head->limit(); Node *init = loop_head->init_trip(); Node *stride = loop_head->stride(); @@ -1610,7 +1646,7 @@ // represents the odd iterations; since the loop trips an even number of // times its backedge is never taken. Kill the backedge. uint dd = dom_depth(loop_head); - clone_loop( loop, old_new, dd ); + clone_loop(loop, old_new, dd, IgnoreStripMined); // Make backedges of the clone equal to backedges of the original. // Make the fall-in from the original come from the fall-out of the clone. @@ -1653,6 +1689,7 @@ } loop->record_for_igvn(); + loop_head->clear_strip_mined(); #ifndef PRODUCT if (C->do_vector_loop() && (PrintOpto && (VerifyLoopOptimizations || TraceLoopOpts))) { @@ -2047,7 +2084,7 @@ } // Need to find the main-loop zero-trip guard - Node *ctrl = cl->in(LoopNode::EntryControl); + Node *ctrl = cl->skip_strip_mined()->in(LoopNode::EntryControl); Node *iffm = ctrl->in(0); Node *opqzm = iffm->in(1)->in(1)->in(2); assert(opqzm->in(1) == main_limit, "do not understand situation"); @@ -2413,7 +2450,6 @@ _igvn.register_new_node_with_optimizer(cur_min); Node *cmp_node = rce_loop_end->cmp_node(); _igvn.replace_input_of(cmp_node, 2, cur_min); - set_idom(cmp_node, cur_min, dom_depth(ctrl)); set_ctrl(cur_min, ctrl); set_loop(cur_min, rce_loop->_parent); @@ -2519,7 +2555,7 @@ #ifdef ASSERT static CountedLoopNode* locate_pre_from_main(CountedLoopNode *cl) { - Node *ctrl = cl->in(LoopNode::EntryControl); + Node *ctrl = cl->skip_strip_mined()->in(LoopNode::EntryControl); assert(ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, ""); Node *iffm = ctrl->in(0); assert(iffm->Opcode() == Op_If, ""); @@ -2558,7 +2594,7 @@ } assert(locate_pre_from_main(main_head) == cl, "bad main loop"); - Node* main_iff = main_head->in(LoopNode::EntryControl)->in(0); + Node* main_iff = main_head->skip_strip_mined()->in(LoopNode::EntryControl)->in(0); // Remove the Opaque1Node of the pre loop and make it execute all iterations phase->_igvn.replace_input_of(pre_cmp, 2, pre_cmp->in(2)->in(2)); @@ -2619,7 +2655,7 @@ } if (needs_guard) { // Check for an obvious zero trip guard. - Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->in(LoopNode::EntryControl)); + Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->skip_strip_mined()->in(LoopNode::EntryControl)); if (inctrl->Opcode() == Op_IfTrue || inctrl->Opcode() == Op_IfFalse) { bool maybe_swapped = (inctrl->Opcode() == Op_IfFalse); // The test should look like just the backedge of a CountedLoop @@ -3167,6 +3203,8 @@ return false; } + head->verify_strip_mined(1); + // Check that the body only contains a store of a loop invariant // value that is indexed by the loop phi. Node* store = NULL; @@ -3288,6 +3326,16 @@ } */ + if (head->is_strip_mined()) { + // Inner strip mined loop goes away so get rid of outer strip + // mined loop + Node* outer_sfpt = head->outer_safepoint(); + Node* in = outer_sfpt->in(0); + Node* outer_out = head->outer_loop_exit(); + lazy_replace(outer_out, in); + _igvn.replace_input_of(outer_sfpt, 0, C->top()); + } + // Redirect the old control and memory edges that are outside the loop. // Sometimes the memory phi of the head is used as the outgoing // state of the loop. It's safe in this case to replace it with the --- old/src/hotspot/share/opto/loopUnswitch.cpp 2017-10-03 11:50:03.750512104 +0200 +++ new/src/hotspot/share/opto/loopUnswitch.cpp 2017-10-03 11:50:02.681512954 +0200 @@ -132,11 +132,11 @@ head->as_CountedLoop()->set_normal_loop(); } - ProjNode* proj_true = create_slow_version_of_loop(loop, old_new, unswitch_iff->Opcode()); + ProjNode* proj_true = create_slow_version_of_loop(loop, old_new, unswitch_iff->Opcode(), CloneIncludesStripMined); #ifdef ASSERT Node* uniqc = proj_true->unique_ctrl_out(); - Node* entry = head->in(LoopNode::EntryControl); + Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl); Node* predicate = find_predicate(entry); if (predicate != NULL && UseLoopPredicate) { // We may have two predicates, find first. @@ -145,7 +145,8 @@ } if (predicate != NULL) predicate = predicate->in(0); assert(proj_true->is_IfTrue() && - (predicate == NULL && uniqc == head || + (predicate == NULL && uniqc == head && !head->is_strip_mined() || + predicate == NULL && uniqc == head->in(LoopNode::EntryControl) && head->is_strip_mined() || predicate != NULL && uniqc == predicate), "by construction"); #endif // Increment unswitch count @@ -223,13 +224,16 @@ // Return control projection of the entry to the fast version. ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop, Node_List &old_new, - int opcode) { + int opcode, + CloneLoopMode mode) { LoopNode* head = loop->_head->as_Loop(); bool counted_loop = head->is_CountedLoop(); - Node* entry = head->in(LoopNode::EntryControl); + Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl); _igvn.rehash_node_delayed(entry); IdealLoopTree* outer_loop = loop->_parent; + head->verify_strip_mined(1); + Node *cont = _igvn.intcon(1); set_ctrl(cont, C->root()); Node* opq = new Opaque1Node(C, cont); @@ -247,19 +251,21 @@ // Clone the loop body. The clone becomes the fast loop. The // original pre-header will (illegally) have 3 control users // (old & new loops & new if). - clone_loop(loop, old_new, dom_depth(head), iff); + clone_loop(loop, old_new, dom_depth(head->skip_strip_mined()), mode, iff); assert(old_new[head->_idx]->is_Loop(), "" ); // Fast (true) control Node* iffast_pred = clone_loop_predicates(entry, iffast, !counted_loop); - _igvn.replace_input_of(head, LoopNode::EntryControl, iffast_pred); - set_idom(head, iffast_pred, dom_depth(head)); // Slow (false) control Node* ifslow_pred = clone_loop_predicates(entry, ifslow, !counted_loop); - LoopNode* slow_head = old_new[head->_idx]->as_Loop(); - _igvn.replace_input_of(slow_head, LoopNode::EntryControl, ifslow_pred); - set_idom(slow_head, ifslow_pred, dom_depth(slow_head)); + + Node* l = head->skip_strip_mined(); + _igvn.replace_input_of(l, LoopNode::EntryControl, iffast_pred); + set_idom(l, iffast_pred, dom_depth(l)); + LoopNode* slow_l = old_new[head->_idx]->as_Loop()->skip_strip_mined(); + _igvn.replace_input_of(slow_l, LoopNode::EntryControl, ifslow_pred); + set_idom(slow_l, ifslow_pred, dom_depth(l)); recompute_dom_depth(); @@ -270,9 +276,9 @@ Node_List old_new; LoopNode* head = loop->_head->as_Loop(); bool counted_loop = head->is_CountedLoop(); - Node* entry = head->in(LoopNode::EntryControl); + Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl); _igvn.rehash_node_delayed(entry); - IdealLoopTree* outer_loop = loop->_parent; + IdealLoopTree* outer_loop = head->is_strip_mined() ? loop->_parent->_parent : loop->_parent; ConINode* const_1 = _igvn.intcon(1); set_ctrl(const_1, C->root()); @@ -286,7 +292,7 @@ // Clone the loop body. The clone becomes the fast loop. The // original pre-header will (illegally) have 3 control users // (old & new loops & new if). - clone_loop(loop, old_new, dom_depth(head), iff); + clone_loop(loop, old_new, dom_depth(head), CloneIncludesStripMined, iff); assert(old_new[head->_idx]->is_Loop(), "" ); LoopNode* slow_head = old_new[head->_idx]->as_Loop(); @@ -303,9 +309,9 @@ #endif // Fast (true) control - _igvn.replace_input_of(head, LoopNode::EntryControl, iffast); + _igvn.replace_input_of(head->skip_strip_mined(), LoopNode::EntryControl, iffast); // Slow (false) control - _igvn.replace_input_of(slow_head, LoopNode::EntryControl, ifslow); + _igvn.replace_input_of(slow_head->skip_strip_mined(), LoopNode::EntryControl, ifslow); recompute_dom_depth(); @@ -394,7 +400,7 @@ return false; } - Node* ifslow_pred = _lp_reserved->as_CountedLoop()->in(LoopNode::EntryControl); + Node* ifslow_pred = _lp_reserved->skip_strip_mined()->in(LoopNode::EntryControl); if (!ifslow_pred->is_IfFalse()) { return false; --- old/src/hotspot/share/opto/loopnode.cpp 2017-10-03 11:50:04.960511142 +0200 +++ new/src/hotspot/share/opto/loopnode.cpp 2017-10-03 11:50:03.860512017 +0200 @@ -36,6 +36,7 @@ #include "opto/idealGraphPrinter.hpp" #include "opto/loopnode.hpp" #include "opto/mulnode.hpp" +#include "opto/opaquenode.hpp" #include "opto/rootnode.hpp" #include "opto/superword.hpp" @@ -261,8 +262,75 @@ set_early_ctrl( n ); } +// Create a skeleton strip mined outer loop: a Loop head before the +// inner strip mined loop, a safepoint and an exit condition guarded +// by an opaque node after the inner strip mined loop with a backedge +// to the loop head. The inner strip mined loop is left as it is. Only +// once loop optimizations are over, do we adjust the inner loop exit +// condition to limit its number of iterations, set the outer loop +// exit condition and add Phis to the outer loop head. Some loop +// optimizations that operate on the inner strip mined loop need to be +// aware of the outer strip mined loop: loop unswitching needs to +// clone the outer loop as well as the inner, unrolling needs to only +// clone the inner loop etc. No optimizations need to change the outer +// strip mined loop as it is only a skeleton. +IdealLoopTree* PhaseIdealLoop::create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control, + IdealLoopTree* loop, float cl_prob, float le_fcnt, + Node*& entry_control, Node*& iffalse) { + Node* outer_test = test->clone(); + Node* outer_cmp = cmp->clone(); + Node* outer_limit = new Opaque5Node(C, outer_cmp->in(2)); + outer_cmp->set_req(2, outer_limit); + outer_test->set_req(1, outer_cmp); + Node *orig = iffalse; + iffalse = iffalse->clone(); + _igvn.register_new_node_with_optimizer(iffalse); + set_idom(iffalse, idom(orig), dom_depth(orig)); + + IfNode *outer_le = new IfNode(iffalse, outer_test, cl_prob, le_fcnt); + Node *outer_ift = new IfTrueNode (outer_le); + Node* outer_iff = orig; + _igvn.replace_input_of(outer_iff, 0, outer_le); + + LoopNode *outer_l = new LoopNode(init_control, outer_ift); + entry_control = outer_l; + + IdealLoopTree* outer_ilt = new IdealLoopTree(this, outer_l, outer_ift); + IdealLoopTree* parent = loop->_parent; + IdealLoopTree* sibling = parent->_child; + if (sibling == loop) { + parent->_child = outer_ilt; + } else { + while (sibling->_next != loop) { + sibling = sibling->_next; + } + sibling->_next = outer_ilt; + } + outer_ilt->_next = loop->_next; + outer_ilt->_parent = parent; + outer_ilt->_child = loop; + outer_ilt->_nest = loop->_nest; + loop->_parent = outer_ilt; + loop->_next = NULL; + loop->_nest++; + + set_loop(iffalse, outer_ilt); + register_new_node(outer_test, iffalse); + register_new_node(outer_cmp, iffalse); + register_new_node(outer_limit, iffalse); + register_control(outer_le, outer_ilt, iffalse); + register_control(outer_ift, outer_ilt, outer_le); + set_idom(outer_iff, outer_le, dom_depth(outer_le)); + _igvn.register_new_node_with_optimizer(outer_l); + set_loop(outer_l, outer_ilt); + set_idom(outer_l, init_control, dom_depth(init_control)+1); + outer_l->mark_strip_mined(); + + return outer_ilt; +} + //------------------------------is_counted_loop-------------------------------- -bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) { +bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*& loop) { PhaseGVN *gvn = &_igvn; // Counted loop head must be a good RegionNode with only 3 not NULL @@ -280,7 +348,7 @@ // Allow funny placement of Safepoint if (back_control->Opcode() == Op_SafePoint) { - if (UseCountedLoopSafepoints) { + if (LoopStripMiningIter != 0) { // Leaving the safepoint on the backedge and creating a // CountedLoop will confuse optimizations. We can't move the // safepoint around because its jvm state wouldn't match a new @@ -600,7 +668,7 @@ } set_subtree_ctrl( limit ); - if (!UseCountedLoopSafepoints) { + if (LoopStripMiningIter == 0) { // Check for SafePoint on backedge and remove Node *sfpt = x->in(LoopNode::LoopBackControl); if (sfpt->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt)) { @@ -683,8 +751,20 @@ assert(iff->outcnt() == 0, "should be dead now"); lazy_replace( iff, le ); // fix 'get_ctrl' + Node *sfpt2 = le->in(0); + + Node* entry_control = init_control; + bool strip_mine_loop = LoopStripMiningIter > 1 && loop->_child == NULL && + sfpt2->Opcode() == Op_SafePoint && !loop->_has_call; + IdealLoopTree* outer_ilt = NULL; + if (strip_mine_loop) { + outer_ilt = create_outer_strip_mined_loop(test, cmp, init_control, loop, + cl_prob, le->_fcnt, entry_control, + iffalse); + } + // Now setup a new CountedLoopNode to replace the existing LoopNode - CountedLoopNode *l = new CountedLoopNode(init_control, back_control); + CountedLoopNode *l = new CountedLoopNode(entry_control, back_control); l->set_unswitch_count(x->as_Loop()->unswitch_count()); // Preserve // The following assert is approximately true, and defines the intention // of can_be_counted_loop. It fails, however, because phase->type @@ -696,12 +776,22 @@ // Fix all data nodes placed at the old loop head. // Uses the lazy-update mechanism of 'get_ctrl'. lazy_replace( x, l ); - set_idom(l, init_control, dom_depth(x)); + set_idom(l, entry_control, dom_depth(entry_control) + 1); - if (!UseCountedLoopSafepoints) { + if (LoopStripMiningIter == 0 || strip_mine_loop) { // Check for immediately preceding SafePoint and remove - Node *sfpt2 = le->in(0); - if (sfpt2->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt2)) { + if (sfpt2->Opcode() == Op_SafePoint && (LoopStripMiningIter != 0 || is_deleteable_safept(sfpt2))) { + if (strip_mine_loop) { + Node* outer_le = outer_ilt->_tail->in(0); + Node* outer_limit = outer_le->in(1)->in(1)->in(2); + assert(outer_limit->Opcode() == Op_Opaque5, "where's the opaque node?"); + Node* sfpt = sfpt2->clone(); + sfpt->set_req(0, iffalse); + outer_le->set_req(0, sfpt); + outer_limit->set_req(0, sfpt); + register_control(sfpt, outer_ilt, iffalse); + set_idom(outer_le, sfpt, dom_depth(sfpt)); + } lazy_replace( sfpt2, sfpt2->in(TypeFunc::Control)); if (loop->_safepts != NULL) { loop->_safepts->yank(sfpt2); @@ -730,6 +820,13 @@ // bounds l->phi()->as_Phi()->set_type(l->phi()->Value(&_igvn)); + if (strip_mine_loop) { + l->mark_strip_mined(); + l->verify_strip_mined(1); + outer_ilt->_head->as_Loop()->verify_strip_mined(1); + loop = outer_ilt; + } + return true; } @@ -776,12 +873,94 @@ // Return a node which is more "ideal" than the current node. // Attempt to convert into a counted-loop. Node *LoopNode::Ideal(PhaseGVN *phase, bool can_reshape) { - if (!can_be_counted_loop(phase)) { + if (!can_be_counted_loop(phase) && !is_strip_mined()) { phase->C->set_major_progress(); } return RegionNode::Ideal(phase, can_reshape); } +void LoopNode::verify_strip_mined(int expect_opaq) const { +#ifdef ASSERT + if (is_strip_mined()) { + const LoopNode* outer = NULL; + const CountedLoopNode* inner = NULL; + if (is_CountedLoop()) { + inner = as_CountedLoop(); + outer = inner->in(LoopNode::EntryControl)->as_Loop(); + } else { + outer = this; + inner = outer->unique_ctrl_out()->as_CountedLoop(); + } + assert(outer->Opcode() == Op_Loop, "no counted loop here"); + assert(outer->is_strip_mined(), "incorrect outer loop"); + Node* outer_tail = outer->in(LoopNode::LoopBackControl); + Node* outer_le = outer_tail->in(0); + assert(outer_le->Opcode() == Op_If, "tail of outer loop should be an If"); + Node* sfpt = outer_le->in(0); + assert(sfpt->Opcode() == Op_SafePoint, "where's the safepoint?"); + Node* inner_out = sfpt->in(0); + if (inner_out->outcnt() != 1) { + ResourceMark rm; + Unique_Node_List wq; + + for (DUIterator_Fast imax, i = inner_out->fast_outs(imax); i < imax; i++) { + Node* u = inner_out->fast_out(i); + if (u == sfpt) { + continue; + } + wq.clear(); + wq.push(u); + bool found_sfpt = false; + for (uint next = 0; next < wq.size() && !found_sfpt; next++) { + Node *n = wq.at(next); + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax && !found_sfpt; i++) { + Node* u = n->fast_out(i); + if (u == sfpt) { + found_sfpt = true; + } + if (!u->is_CFG()) { + wq.push(u); + } + } + } + assert(found_sfpt, "no node in loop that's not input to safepoint"); + } + } + CountedLoopEndNode* cle = inner_out->in(0)->as_CountedLoopEnd(); + assert(cle == inner->loopexit(), "mismatch"); + Node* cmp = outer_le->in(1)->in(1); + bool has_opaque = cmp->in(2)->Opcode() == Op_Opaque5; + assert(cmp->in(1) == inner->incr(), "strange exit condition"); + if (has_opaque) { + assert(expect_opaq == 1 || expect_opaq == -1, "unexpected opaque node"); + assert(outer->outcnt() == 2, "only phis"); + } else { + assert(expect_opaq == 0 || expect_opaq == -1, "no opaque node?"); + uint phis = 0; + for (DUIterator_Fast imax, i = inner->fast_outs(imax); i < imax; i++) { + Node* u = inner->fast_out(i); + if (u->is_Phi()) { + phis++; + } + } + for (DUIterator_Fast imax, i = outer->fast_outs(imax); i < imax; i++) { + Node* u = outer->fast_out(i); + assert(u == outer || u == inner || u->is_Phi(), "nothing between inner and outer loop"); + } + uint stores = 0; + for (DUIterator_Fast imax, i = inner_out->fast_outs(imax); i < imax; i++) { + Node* u = inner_out->fast_out(i); + if (u->is_Store()) { + stores++; + } + } + assert(outer->outcnt() >= phis + 2 && outer->outcnt() <= phis + 2 + stores + 1, "only phis"); + } + assert(sfpt->outcnt() == 1 + (has_opaque ? 1 : 0), "no data node"); + assert(outer_tail->outcnt() == 1 || !has_opaque, "no data node"); + } +#endif +} //============================================================================= //------------------------------Ideal------------------------------------------ @@ -990,6 +1169,165 @@ return NULL; } +LoopNode* CountedLoopNode::skip_strip_mined(int expect_opaq) { + if (is_strip_mined()) { + verify_strip_mined(expect_opaq); + return in(EntryControl)->as_Loop(); + } + return this; +} + +LoopNode* CountedLoopNode::outer_loop() const { + assert(is_strip_mined(), "not a strip mined loop"); + Node* c = in(EntryControl); + if (c == NULL || c->is_top() || c->Opcode() != Op_Loop) { + return NULL; + } + LoopNode* l = c->as_Loop(); + assert(l->is_strip_mined(), "where's the outer loop?"); + return l; +} + +IfTrueNode* LoopNode::outer_loop_tail() const { + assert(is_strip_mined() && Opcode() == Op_Loop, "not a strip mined loop"); + Node* c = in(LoopBackControl); + if (c == NULL || c->is_top()) { + return NULL; + } + return c->as_IfTrue(); +} + +IfTrueNode* CountedLoopNode::outer_loop_tail() const { + LoopNode* l = outer_loop(); + if (l == NULL) { + return NULL; + } + return l->outer_loop_tail(); +} + +IfNode* LoopNode::outer_loop_end() const { + IfTrueNode* proj = outer_loop_tail(); + if (proj == NULL) { + return NULL; + } + Node* c = proj->in(0); + if (c == NULL || c->is_top() || c->outcnt() != 2) { + return NULL; + } + assert(c->Opcode() == Op_If, "broken outer loop"); + return c->as_If(); +} + +IfNode* CountedLoopNode::outer_loop_end() const { + LoopNode* l = outer_loop(); + if (l == NULL) { + return NULL; + } + return l->outer_loop_end(); +} + +IfFalseNode* LoopNode::outer_loop_exit() const { + IfNode* le = outer_loop_end(); + if (le == NULL) { + return NULL; + } + Node* c = le->proj_out(false); + if (c == NULL) { + return NULL; + } + return c->as_IfFalse(); +} + +IfFalseNode* CountedLoopNode::outer_loop_exit() const { + LoopNode* l = outer_loop(); + if (l == NULL) { + return NULL; + } + return l->outer_loop_exit(); +} + +SafePointNode* LoopNode::outer_safepoint() const { + IfNode* le = outer_loop_end(); + if (le == NULL) { + return NULL; + } + Node* c = le->in(0); + if (c == NULL || c->is_top()) { + return NULL; + } + assert(c->Opcode() == Op_SafePoint, "broken outer loop"); + return c->as_SafePoint(); +} + +SafePointNode* CountedLoopNode::outer_safepoint() const { + LoopNode* l = outer_loop(); + if (l == NULL) { + return NULL; + } + return l->outer_safepoint(); +} + +BoolNode* LoopNode::outer_bol() const { + IfNode* le = outer_loop_end(); + if (le == NULL) { + return NULL; + } + Node* n = le->in(1); + if (n == NULL || n->is_top()) { + return NULL; + } + return n->as_Bool(); +} + +BoolNode* CountedLoopNode::outer_bol() const { + LoopNode* l = outer_loop(); + if (l == NULL) { + return NULL; + } + return l->outer_bol(); +} + +CmpINode* LoopNode::outer_cmp() const { + Node* bol = outer_bol(); + if (bol == NULL) { + return NULL; + } + Node* n = bol->in(1); + if (n == NULL || n->is_top()) { + return NULL; + } + assert(n->Opcode() == Op_CmpI, "broken strip mined loop"); + return (CmpINode*)n; +} + +CmpINode* CountedLoopNode::outer_cmp() const { + LoopNode* l = outer_loop(); + if (l == NULL) { + return NULL; + } + return l->outer_cmp(); +} + +Opaque5Node* LoopNode::outer_opaq() const { + Node* cmp = outer_cmp(); + if (cmp == NULL) { + return NULL; + } + Node* n = cmp->in(2); + if (n == NULL || n->is_top()) { + return NULL; + } + assert(n->Opcode() == Op_Opaque5, "broken strip mined loop"); + return (Opaque5Node*)n; +} + +Opaque5Node* CountedLoopNode::outer_opaq() const { + LoopNode* l = outer_loop(); + if (l == NULL) { + return NULL; + } + return l->outer_opaq(); +} //------------------------------filtered_type-------------------------------- // Return a type based on condition control flow @@ -1778,10 +2116,11 @@ if (_head->is_Loop()) _head->as_Loop()->set_inner_loop(); } + IdealLoopTree* loop = this; if (_head->is_CountedLoop() || - phase->is_counted_loop(_head, this)) { + phase->is_counted_loop(_head, loop)) { - if (!UseCountedLoopSafepoints) { + if (LoopStripMiningIter == 0 || (LoopStripMiningIter > 1 && _child == NULL)) { // Indicate we do not need a safepoint here _has_sfpt = 1; } @@ -1800,8 +2139,10 @@ } // Recursively - if (_child) _child->counted_loop( phase ); - if (_next) _next ->counted_loop( phase ); + assert(loop->_child != this || (loop->_head->as_Loop()->is_strip_mined() && _head->as_CountedLoop()->is_strip_mined()), "what kind of loop was added?"); + assert(loop->_child != this || (loop->_child->_child == NULL && loop->_child->_next == NULL), "would miss some loops"); + if (loop->_child && loop->_child != this) loop->_child->counted_loop(phase); + if (loop->_next) loop->_next ->counted_loop(phase); } #ifndef PRODUCT @@ -1812,7 +2153,7 @@ tty->print(" "); tty->print("Loop: N%d/N%d ",_head->_idx,_tail->_idx); if (_irreducible) tty->print(" IRREDUCIBLE"); - Node* entry = _head->in(LoopNode::EntryControl); + Node* entry = _head->as_Loop()->skip_strip_mined(-1)->in(LoopNode::EntryControl); Node* predicate = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); if (predicate != NULL ) { tty->print(" limit_check"); @@ -1863,6 +2204,9 @@ if (Verbose) { tty->print(" body={"); _body.dump_simple(); tty->print(" }"); } + if (_head->as_Loop()->is_strip_mined()) { + tty->print(" strip mined"); + } tty->cr(); } @@ -3232,7 +3576,7 @@ if (!cl->is_main_loop() && !cl->is_post_loop()) { return false; } - Node* ctrl = cl->in(LoopNode::EntryControl); + Node* ctrl = cl->skip_strip_mined()->in(LoopNode::EntryControl); if (ctrl == NULL || (!ctrl->is_IfTrue() && !ctrl->is_IfFalse())) { return false; } @@ -3292,7 +3636,7 @@ } while(worklist.size() != 0 && LCA != early) { Node* s = worklist.pop(); - if (s->is_Load()) { + if (s->is_Load() || s->Opcode() == Op_SafePoint) { continue; } else if (s->is_MergeMem()) { for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) { @@ -3471,6 +3815,42 @@ } } +// Verify that no data node is schedules in the outer loop of a strip +// mined loop. +void PhaseIdealLoop::verify_strip_mined_scheduling(Node *n, Node* least) { +#ifdef ASSERT + if (get_loop(least)->_nest == 0) { + return; + } + IdealLoopTree* loop = get_loop(least); + Node* head = loop->_head; + if (head->Opcode() == Op_Loop && head->as_Loop()->is_strip_mined()) { + if (n != head->as_Loop()->outer_bol() && + n != head->as_Loop()->outer_cmp() && + n != head->as_Loop()->outer_opaq()) { + Node* sfpt = head->as_Loop()->outer_safepoint(); + ResourceMark rm; + Unique_Node_List wq; + wq.push(sfpt); + for (uint i = 0; i < wq.size(); i++) { + Node *m = wq.at(i); + for (uint i = 1; i < m->req(); i++) { + Node* nn = m->in(i); + if (nn == n) { + return; + } + if (nn != NULL && has_ctrl(nn) && get_loop(get_ctrl(nn)) == loop) { + wq.push(nn); + } + } + } + ShouldNotReachHere(); + } + } +#endif +} + + //------------------------------build_loop_late_post--------------------------- // Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping. // Second pass finds latest legal placement, and ideal loop placement. @@ -3580,8 +3960,9 @@ // which can inhibit range check elimination. if (least != early) { Node* ctrl_out = least->unique_ctrl_out(); - if (ctrl_out && ctrl_out->is_CountedLoop() && - least == ctrl_out->in(LoopNode::EntryControl)) { + if (ctrl_out && ctrl_out->is_Loop() && + least == ctrl_out->in(LoopNode::EntryControl) && + (ctrl_out->is_CountedLoop() || ctrl_out->as_Loop()->is_strip_mined())) { Node* least_dom = idom(least); if (get_loop(least_dom)->is_member(get_loop(least))) { least = least_dom; @@ -3606,6 +3987,7 @@ // Assign discovered "here or above" point least = find_non_split_ctrl(least); + verify_strip_mined_scheduling(n, least); set_ctrl(n, least); // Collect inner loop bodies --- old/src/hotspot/share/opto/loopnode.hpp 2017-10-03 11:50:06.365510026 +0200 +++ new/src/hotspot/share/opto/loopnode.hpp 2017-10-03 11:50:05.117511018 +0200 @@ -37,6 +37,7 @@ class IdealLoopTree; class LoopNode; class Node; +class Opaque5Node; class PhaseIdealLoop; class CountedLoopReserveKit; class VectorSet; @@ -71,7 +72,8 @@ VectorizedLoop=2048, HasAtomicPostLoop=4096, HasRangeChecks=8192, - IsMultiversioned=16384}; + IsMultiversioned=16384, + StripMined=32768}; char _unswitch_count; enum { _unswitch_max=3 }; char _postloop_flags; @@ -81,15 +83,16 @@ // Names for edge indices enum { Self=0, EntryControl, LoopBackControl }; - int is_inner_loop() const { return _loop_flags & InnerLoop; } + uint is_inner_loop() const { return _loop_flags & InnerLoop; } void set_inner_loop() { _loop_flags |= InnerLoop; } - int range_checks_present() const { return _loop_flags & HasRangeChecks; } - int is_multiversioned() const { return _loop_flags & IsMultiversioned; } - int is_vectorized_loop() const { return _loop_flags & VectorizedLoop; } - int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; } + uint range_checks_present() const { return _loop_flags & HasRangeChecks; } + uint is_multiversioned() const { return _loop_flags & IsMultiversioned; } + uint is_vectorized_loop() const { return _loop_flags & VectorizedLoop; } + uint is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; } void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; } - int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; } + uint partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; } + uint is_strip_mined() const { return _loop_flags & StripMined; } void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; } void mark_has_reductions() { _loop_flags |= HasReductions; } @@ -100,6 +103,8 @@ void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; } void mark_has_range_checks() { _loop_flags |= HasRangeChecks; } void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; } + void mark_strip_mined() { _loop_flags |= StripMined; } + void clear_strip_mined() { _loop_flags &= ~StripMined; } int unswitch_max() { return _unswitch_max; } int unswitch_count() { return _unswitch_count; } @@ -131,6 +136,16 @@ #ifndef PRODUCT virtual void dump_spec(outputStream *st) const; #endif + + void verify_strip_mined(int expect_opaq) const; + virtual LoopNode* skip_strip_mined(int expect_opaq = 1) { return this; } + virtual IfTrueNode* outer_loop_tail() const; + virtual IfNode* outer_loop_end() const; + virtual IfFalseNode* outer_loop_exit() const; + virtual SafePointNode* outer_safepoint() const; + virtual BoolNode* outer_bol() const; + virtual CmpINode* outer_cmp() const; + virtual Opaque5Node* outer_opaq() const; }; //------------------------------Counted Loops---------------------------------- @@ -232,16 +247,16 @@ // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or // Aligned, may be missing it's pre-loop. - int is_normal_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; } - int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; } - int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; } - int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; } - int is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; } - int was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; } - int has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; } - int do_unroll_only () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; } - int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; } - int has_atomic_post_loop () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; } + uint is_normal_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; } + uint is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; } + uint is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; } + uint is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; } + uint is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; } + uint was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; } + uint has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; } + uint do_unroll_only () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; } + uint is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; } + uint has_atomic_post_loop () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; } void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; } int main_idx() const { return _main_idx; } @@ -278,6 +293,16 @@ void set_slp_max_unroll(int unroll_factor) { _slp_maximum_unroll_factor = unroll_factor; } int slp_max_unroll() const { return _slp_maximum_unroll_factor; } + virtual LoopNode* skip_strip_mined(int expect_opaq = 1); + LoopNode* outer_loop() const; + virtual IfTrueNode* outer_loop_tail() const; + virtual IfNode* outer_loop_end() const; + virtual IfFalseNode* outer_loop_exit() const; + virtual SafePointNode* outer_safepoint() const; + virtual BoolNode* outer_bol() const; + virtual CmpINode* outer_cmp() const; + virtual Opaque5Node* outer_opaq() const; + #ifndef PRODUCT virtual void dump_spec(outputStream *st) const; #endif @@ -780,6 +805,7 @@ void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ); void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ); void build_loop_late_post ( Node* n ); + void verify_strip_mined_scheduling(Node *n, Node* least); // Array of immediate dominance info for each CFG node indexed by node idx private: @@ -877,7 +903,10 @@ // Per-Node transform virtual Node *transform( Node *a_node ) { return 0; } - bool is_counted_loop( Node *x, IdealLoopTree *loop ); + bool is_counted_loop(Node* x, IdealLoopTree*& loop); + IdealLoopTree* create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control, + IdealLoopTree* loop, float cl_prob, float le_fcnt, + Node*& entry_control, Node*& iffalse); Node* exact_limit( IdealLoopTree *loop ); @@ -908,8 +937,24 @@ // When nonnull, the clone and original are side-by-side, both are // dominated by the passed in side_by_side_idom node. Used in // construction of unswitched loops. + enum CloneLoopMode { + IgnoreStripMined = 0, // Only clone inner strip mined loop + CloneIncludesStripMined = 1, // clone both inner and outer strip mined loops + ControlAroundStripMined = 2 // Only clone inner strip mined loop, + // result control flow branches + // either to inner clone or outer + // strip mined loop. + }; void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth, - Node* side_by_side_idom = NULL); + CloneLoopMode mode, Node* side_by_side_idom = NULL); + void clone_loop_handle_data_uses(Node* old, Node_List &old_new, + IdealLoopTree* loop, IdealLoopTree* companion_loop, + Node_List*& split_if_set, Node_List*& split_bool_set, + Node_List*& split_cex_set, Node_List& worklist, + uint new_counter, CloneLoopMode mode); + void clone_outer_loop(LoopNode* head, CloneLoopMode mode, IdealLoopTree *loop, + IdealLoopTree* outer_loop, int dd, Node_List &old_new, + Node_List& extra_data_nodes); // If we got the effect of peeling, either by actually peeling or by // making a pre-loop which must execute at least once, we can remove @@ -1020,7 +1065,8 @@ // and inserting an if to select fast-slow versions. ProjNode* create_slow_version_of_loop(IdealLoopTree *loop, Node_List &old_new, - int opcode); + int opcode, + CloneLoopMode mode); // Clone a loop and return the clone head (clone_loop_head). // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse, --- old/src/hotspot/share/opto/loopopts.cpp 2017-10-03 11:50:07.543509089 +0200 +++ new/src/hotspot/share/opto/loopopts.cpp 2017-10-03 11:50:06.478509936 +0200 @@ -306,7 +306,12 @@ get_ctrl(m->in(2)) != n_ctrl && get_ctrl(m->in(3)) != n_ctrl) { // Move the AddP up to dominating point - set_ctrl_and_loop(m, find_non_split_ctrl(idom(n_ctrl))); + Node* c = find_non_split_ctrl(idom(n_ctrl)); + if (c->Opcode() == Op_Loop && c->as_Loop()->is_strip_mined()) { + c->as_Loop()->verify_strip_mined(1); + c = c->in(LoopNode::EntryControl); + } + set_ctrl_and_loop(m, c); continue; } return NULL; @@ -744,14 +749,13 @@ if (ctrl_ok) { // move the Store _igvn.replace_input_of(mem, LoopNode::LoopBackControl, mem); - _igvn.replace_input_of(n, 0, n_loop->_head->in(LoopNode::EntryControl)); + _igvn.replace_input_of(n, 0, n_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl)); _igvn.replace_input_of(n, MemNode::Memory, mem->in(LoopNode::EntryControl)); // Disconnect the phi now. An empty phi can confuse other // optimizations in this pass of loop opts. _igvn.replace_node(mem, mem->in(LoopNode::EntryControl)); n_loop->_body.yank(mem); - IdealLoopTree* new_loop = get_loop(n->in(0)); set_ctrl_and_loop(n, n->in(0)); return n; @@ -823,6 +827,14 @@ // Move the Store out of the loop creating clones along // all paths out of the loop that observe the stored value _igvn.rehash_node_delayed(phi); + IdealLoopTree* outer_loop = n_loop; + if (n_loop->_head->is_Loop() && n_loop->_head->as_Loop()->is_strip_mined()) { + assert(n_loop->_head->Opcode() == Op_CountedLoop, "outer loop is a strip mined"); + n_loop->_head->as_Loop()->verify_strip_mined(1); + Node* outer = n_loop->_head->as_CountedLoop()->outer_loop(); + outer_loop = get_loop(outer); + assert(n_loop->_parent == outer_loop, "broken loop tree"); + } int count = phi->replace_edge(n, n->in(MemNode::Memory)); assert(count > 0, "inconsistent phi"); for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { @@ -836,7 +848,7 @@ assert (!n_loop->is_member(u_loop), "only the phi should have been a use in the loop"); while(true) { Node* next_c = find_non_split_ctrl(idom(c)); - if (n_loop->is_member(get_loop(next_c))) { + if (outer_loop->is_member(get_loop(next_c))) { break; } c = next_c; @@ -891,7 +903,8 @@ if( n->is_CFG() || n->is_LoadStore() ) return n; if( n_op == Op_Opaque1 || // Opaque nodes cannot be mod'd - n_op == Op_Opaque2 ) { + n_op == Op_Opaque2 || + n_op == Op_Opaque5) { if( !C->major_progress() ) // If chance of no more loop opts... _igvn._worklist.push(n); // maybe we'll remove them return n; @@ -1029,7 +1042,7 @@ IdealLoopTree *u_loop = get_loop( useblock ); return (u_loop->_irreducible || u_loop->_child) ? useblock - : u_loop->_head->in(LoopNode::EntryControl); + : u_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl); } @@ -1565,6 +1578,273 @@ } } +void PhaseIdealLoop::clone_loop_handle_data_uses(Node* old, Node_List &old_new, + IdealLoopTree* loop, IdealLoopTree* outer_loop, + Node_List*& split_if_set, Node_List*& split_bool_set, + Node_List*& split_cex_set, Node_List& worklist, + uint new_counter, CloneLoopMode mode) { + Node* nnn = old_new[old->_idx]; + // Copy uses to a worklist, so I can munge the def-use info + // with impunity. + for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++) + worklist.push(old->fast_out(j)); + + while( worklist.size() ) { + Node *use = worklist.pop(); + if (!has_node(use)) continue; // Ignore dead nodes + if (use->in(0) == C->top()) continue; + IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use ); + // Check for data-use outside of loop - at least one of OLD or USE + // must not be a CFG node. +#ifdef ASSERT + if (loop->_head->as_Loop()->is_strip_mined() && outer_loop->is_member(use_loop) && !loop->is_member(use_loop) && old_new[use->_idx] == NULL) { + Node* cmp = loop->_head->as_CountedLoop()->outer_cmp(); + Node* sfpt = loop->_head->as_CountedLoop()->outer_safepoint(); + assert(mode == ControlAroundStripMined && (use == cmp || use == sfpt), "missed a node"); + } +#endif + if (!loop->is_member(use_loop) && !outer_loop->is_member(use_loop) && (!old->is_CFG() || !use->is_CFG())) { + + // If the Data use is an IF, that means we have an IF outside of the + // loop that is switching on a condition that is set inside of the + // loop. Happens if people set a loop-exit flag; then test the flag + // in the loop to break the loop, then test is again outside of the + // loop to determine which way the loop exited. + // Loop predicate If node connects to Bool node through Opaque1 node. + if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use)) { + // Since this code is highly unlikely, we lazily build the worklist + // of such Nodes to go split. + if (!split_if_set) { + ResourceArea *area = Thread::current()->resource_area(); + split_if_set = new Node_List(area); + } + split_if_set->push(use); + } + if (use->is_Bool()) { + if (!split_bool_set) { + ResourceArea *area = Thread::current()->resource_area(); + split_bool_set = new Node_List(area); + } + split_bool_set->push(use); + } + if (use->Opcode() == Op_CreateEx) { + if (!split_cex_set) { + ResourceArea *area = Thread::current()->resource_area(); + split_cex_set = new Node_List(area); + } + split_cex_set->push(use); + } + + + // Get "block" use is in + uint idx = 0; + while( use->in(idx) != old ) idx++; + Node *prev = use->is_CFG() ? use : get_ctrl(use); + assert(!loop->is_member(get_loop(prev)) && !outer_loop->is_member(get_loop(prev)), "" ); + Node *cfg = prev->_idx >= new_counter + ? prev->in(2) + : idom(prev); + if( use->is_Phi() ) // Phi use is in prior block + cfg = prev->in(idx); // NOT in block of Phi itself + if (cfg->is_top()) { // Use is dead? + _igvn.replace_input_of(use, idx, C->top()); + continue; + } + + while(!outer_loop->is_member(get_loop(cfg))) { + prev = cfg; + cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg); + } + // If the use occurs after merging several exits from the loop, then + // old value must have dominated all those exits. Since the same old + // value was used on all those exits we did not need a Phi at this + // merge point. NOW we do need a Phi here. Each loop exit value + // is now merged with the peeled body exit; each exit gets its own + // private Phi and those Phis need to be merged here. + Node *phi; + if( prev->is_Region() ) { + if( idx == 0 ) { // Updating control edge? + phi = prev; // Just use existing control + } else { // Else need a new Phi + phi = PhiNode::make( prev, old ); + // Now recursively fix up the new uses of old! + for( uint i = 1; i < prev->req(); i++ ) { + worklist.push(phi); // Onto worklist once for each 'old' input + } + } + } else { + // Get new RegionNode merging old and new loop exits + prev = old_new[prev->_idx]; + assert( prev, "just made this in step 7" ); + if( idx == 0) { // Updating control edge? + phi = prev; // Just use existing control + } else { // Else need a new Phi + // Make a new Phi merging data values properly + phi = PhiNode::make( prev, old ); + phi->set_req( 1, nnn ); + } + } + // If inserting a new Phi, check for prior hits + if( idx != 0 ) { + Node *hit = _igvn.hash_find_insert(phi); + if( hit == NULL ) { + _igvn.register_new_node_with_optimizer(phi); // Register new phi + } else { // or + // Remove the new phi from the graph and use the hit + _igvn.remove_dead_node(phi); + phi = hit; // Use existing phi + } + set_ctrl(phi, prev); + } + // Make 'use' use the Phi instead of the old loop body exit value + _igvn.replace_input_of(use, idx, phi); + if( use->_idx >= new_counter ) { // If updating new phis + // Not needed for correctness, but prevents a weak assert + // in AddPNode from tripping (when we end up with different + // base & derived Phis that will become the same after + // IGVN does CSE). + Node *hit = _igvn.hash_find_insert(use); + if( hit ) // Go ahead and re-hash for hits. + _igvn.replace_node( use, hit ); + } + + // If 'use' was in the loop-exit block, it now needs to be sunk + // below the post-loop merge point. + sink_use( use, prev ); + } + } +} + +void PhaseIdealLoop::clone_outer_loop(LoopNode* head, CloneLoopMode mode, IdealLoopTree *loop, + IdealLoopTree* outer_loop, int dd, Node_List &old_new, + Node_List& extra_data_nodes) { + if (head->is_strip_mined() && mode != IgnoreStripMined) { + CountedLoopNode* cl = head->as_CountedLoop(); + Node* l = cl->outer_loop(); + Node* tail = cl->outer_loop_tail(); + IfNode* le = cl->outer_loop_end(); + Node* sfpt = cl->outer_safepoint(); + Node* bol = cl->outer_bol(); + Node* cmp = cl->outer_cmp(); + Node* opaq = cl->outer_opaq(); + CountedLoopEndNode* cle = cl->loopexit(); + CountedLoopNode* new_cl = old_new[cl->_idx]->as_CountedLoop(); + CountedLoopEndNode* new_cle = new_cl->as_CountedLoop()->loopexit(); + Node* cle_out = cle->proj_out(false); + + Node* new_sfpt = NULL; + Node* new_cle_out = cle_out->clone(); + old_new.map(cle_out->_idx, new_cle_out); + if (mode == CloneIncludesStripMined) { + // clone outer loop body + Node* new_l = l->clone(); + Node* new_tail = tail->clone(); + IfNode* new_le = le->clone()->as_If(); + new_sfpt = sfpt->clone(); + Node* new_bol = bol->clone(); + Node* new_cmp = cmp->clone(); + Node* new_opaq = opaq->clone(); + + set_loop(new_l, outer_loop->_parent); + set_idom(new_l, new_l->in(LoopNode::EntryControl), dd); + set_loop(new_cle_out, outer_loop->_parent); + set_idom(new_cle_out, new_cle, dd); + set_loop(new_sfpt, outer_loop->_parent); + set_idom(new_sfpt, new_cle_out, dd); + set_loop(new_le, outer_loop->_parent); + set_idom(new_le, new_sfpt, dd); + set_loop(new_tail, outer_loop->_parent); + set_idom(new_tail, new_le, dd); + set_ctrl(new_bol, new_sfpt); + set_ctrl(new_cmp, new_sfpt); + set_ctrl(new_cmp, new_sfpt); + set_idom(new_cl, new_l, dd); + + old_new.map(l->_idx, new_l); + old_new.map(tail->_idx, new_tail); + old_new.map(le->_idx, new_le); + old_new.map(sfpt->_idx, new_sfpt); + old_new.map(bol->_idx, new_bol); + old_new.map(cmp->_idx, new_cmp); + old_new.map(opaq->_idx, new_opaq); + + new_l->set_req(LoopNode::LoopBackControl, new_tail); + new_l->set_req(0, new_l); + new_tail->set_req(0, new_le); + new_le->set_req(0, new_sfpt); + new_sfpt->set_req(0, new_cle_out); + new_le->set_req(1, new_bol); + new_bol->set_req(1, new_cmp); + new_cmp->set_req(2, new_opaq); + new_cmp->set_req(1, new_cle->incr()); + new_cle_out->set_req(0, new_cle); + new_cl->set_req(LoopNode::EntryControl, new_l); + new_opaq->set_req(0, new_sfpt); + + _igvn.register_new_node_with_optimizer(new_l); + _igvn.register_new_node_with_optimizer(new_tail); + _igvn.register_new_node_with_optimizer(new_le); + _igvn.register_new_node_with_optimizer(new_bol); + _igvn.register_new_node_with_optimizer(new_cmp); + _igvn.register_new_node_with_optimizer(new_opaq); + } else { + Node *newhead = old_new[loop->_head->_idx]; + newhead->as_Loop()->clear_strip_mined(); + _igvn.replace_input_of(newhead, LoopNode::EntryControl, newhead->in(LoopNode::EntryControl)->in(LoopNode::EntryControl)); + set_idom(newhead, newhead->in(LoopNode::EntryControl), dd); + } + // Look at data node that were assigned a control in the outer + // loop: they are kept in the outer loop by the safepoint so start + // from the safepoint node's inputs. + IdealLoopTree* outer_loop = get_loop(l); + Node_Stack stack(2); + stack.push(sfpt, 1); + uint new_counter = C->unique(); + while (stack.size() > 0) { + Node* n = stack.node(); + uint i = stack.index(); + while (i < n->req() && + (n->in(i) == NULL || + !has_ctrl(n->in(i)) || + get_loop(get_ctrl(n->in(i))) != outer_loop || + (old_new[n->in(i)->_idx] != NULL && old_new[n->in(i)->_idx]->_idx >= new_counter))) { + i++; + } + if (i < n->req()) { + stack.set_index(i+1); + stack.push(n->in(i), 0); + } else { + assert(old_new[n->_idx] == NULL || n == sfpt || old_new[n->_idx]->_idx < new_counter, "no clone yet"); + Node* m = n == sfpt ? new_sfpt : n->clone(); + if (m != NULL) { + for (uint i = 0; i < n->req(); i++) { + if (m->in(i) != NULL && old_new[m->in(i)->_idx] != NULL) { + m->set_req(i, old_new[m->in(i)->_idx]); + } + } + } else { + assert(n == sfpt && mode != CloneIncludesStripMined, "where's the safepoint clone?"); + } + if (n != sfpt) { + extra_data_nodes.push(n); + _igvn.register_new_node_with_optimizer(m); + assert(get_ctrl(n) == cle_out, "what other control?"); + set_ctrl(m, new_cle_out); + old_new.map(n->_idx, m); + } + stack.pop(); + } + } + if (mode == CloneIncludesStripMined) { + _igvn.register_new_node_with_optimizer(new_sfpt); + _igvn.register_new_node_with_optimizer(new_cle_out); + } + } else { + Node *newhead = old_new[loop->_head->_idx]; + set_idom(newhead, newhead->in(LoopNode::EntryControl), dd); + } +} + //------------------------------clone_loop------------------------------------- // // C L O N E A L O O P B O D Y @@ -1593,7 +1873,10 @@ // dominated by the side_by_side_idom node. Used in construction of // unswitched loops. void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd, - Node* side_by_side_idom) { + CloneLoopMode mode, Node* side_by_side_idom) { + + LoopNode* head = loop->_head->as_Loop(); + head->verify_strip_mined(1); if (C->do_vector_loop() && PrintOpto) { const char* mname = C->method()->name()->as_quoted_ascii(); @@ -1626,6 +1909,7 @@ _igvn.register_new_node_with_optimizer(nnn); } + IdealLoopTree* outer_loop = (head->is_strip_mined() && mode != IgnoreStripMined) ? get_loop(head->as_CountedLoop()->outer_loop()) : loop; // Step 2: Fix the edges in the new body. If the old input is outside the // loop use it. If the old input is INside the loop, use the corresponding @@ -1637,7 +1921,7 @@ if (has_ctrl(old)) { set_ctrl(nnn, old_new[get_ctrl(old)->_idx]); } else { - set_loop(nnn, loop->_parent); + set_loop(nnn, outer_loop->_parent); if (old->outcnt() > 0) { set_idom( nnn, old_new[idom(old)->_idx], dd ); } @@ -1653,22 +1937,21 @@ } _igvn.hash_find_insert(nnn); } - Node *newhead = old_new[loop->_head->_idx]; - set_idom(newhead, newhead->in(LoopNode::EntryControl), dd); + ResourceArea *area = Thread::current()->resource_area(); + Node_List extra_data_nodes(area); + clone_outer_loop(head, mode, loop, outer_loop, dd, old_new, extra_data_nodes); // Step 3: Now fix control uses. Loop varying control uses have already // been fixed up (as part of all input edges in Step 2). Loop invariant // control uses must be either an IfFalse or an IfTrue. Make a merge // point to merge the old and new IfFalse/IfTrue nodes; make the use // refer to this. - ResourceArea *area = Thread::current()->resource_area(); Node_List worklist(area); uint new_counter = C->unique(); for( i = 0; i < loop->_body.size(); i++ ) { Node* old = loop->_body.at(i); if( !old->is_CFG() ) continue; - Node* nnn = old_new[old->_idx]; // Copy uses to a worklist, so I can munge the def-use info // with impunity. @@ -1682,9 +1965,29 @@ if( !loop->is_member( use_loop ) && use->is_CFG() ) { // Both OLD and USE are CFG nodes here. assert( use->is_Proj(), "" ); + Node* nnn = old_new[old->_idx]; + + Node* newuse = NULL; + if (head->is_strip_mined() && mode != IgnoreStripMined) { + CountedLoopNode* cl = head->as_CountedLoop(); + CountedLoopEndNode* cle = cl->loopexit(); + Node* cle_out = cle->proj_out(false); + if (use == cle_out) { + IfNode* le = cl->outer_loop_end(); + use = le->proj_out(false); + use_loop = get_loop(use); + if (mode == CloneIncludesStripMined) { + nnn = old_new[le->_idx]; + } else { + newuse = old_new[cle_out->_idx]; + } + } + } + if (newuse == NULL) { + newuse = use->clone(); + } // Clone the loop exit control projection - Node *newuse = use->clone(); if (C->do_vector_loop()) { cm.verify_insert_and_clone(use, newuse, cm.clone_idx()); } @@ -1718,6 +2021,10 @@ if( useuse->in(k) == use ) { useuse->set_req(k, r); uses_found++; + if (useuse->is_Loop() && k == LoopNode::EntryControl) { + assert(dom_depth(useuse) > dd_r , ""); + set_idom(useuse, r, dom_depth(useuse)); + } } } l -= uses_found; // we deleted 1 or more copies of this edge @@ -1741,123 +2048,16 @@ Node_List *split_cex_set = NULL; for( i = 0; i < loop->_body.size(); i++ ) { Node* old = loop->_body.at(i); - Node* nnn = old_new[old->_idx]; - // Copy uses to a worklist, so I can munge the def-use info - // with impunity. - for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++) - worklist.push(old->fast_out(j)); - - while( worklist.size() ) { - Node *use = worklist.pop(); - if (!has_node(use)) continue; // Ignore dead nodes - if (use->in(0) == C->top()) continue; - IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use ); - // Check for data-use outside of loop - at least one of OLD or USE - // must not be a CFG node. - if( !loop->is_member( use_loop ) && (!old->is_CFG() || !use->is_CFG())) { - - // If the Data use is an IF, that means we have an IF outside of the - // loop that is switching on a condition that is set inside of the - // loop. Happens if people set a loop-exit flag; then test the flag - // in the loop to break the loop, then test is again outside of the - // loop to determine which way the loop exited. - // Loop predicate If node connects to Bool node through Opaque1 node. - if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use)) { - // Since this code is highly unlikely, we lazily build the worklist - // of such Nodes to go split. - if( !split_if_set ) - split_if_set = new Node_List(area); - split_if_set->push(use); - } - if( use->is_Bool() ) { - if( !split_bool_set ) - split_bool_set = new Node_List(area); - split_bool_set->push(use); - } - if( use->Opcode() == Op_CreateEx ) { - if( !split_cex_set ) - split_cex_set = new Node_List(area); - split_cex_set->push(use); - } - + clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set, + split_bool_set, split_cex_set, worklist, new_counter, + mode); + } - // Get "block" use is in - uint idx = 0; - while( use->in(idx) != old ) idx++; - Node *prev = use->is_CFG() ? use : get_ctrl(use); - assert( !loop->is_member( get_loop( prev ) ), "" ); - Node *cfg = prev->_idx >= new_counter - ? prev->in(2) - : idom(prev); - if( use->is_Phi() ) // Phi use is in prior block - cfg = prev->in(idx); // NOT in block of Phi itself - if (cfg->is_top()) { // Use is dead? - _igvn.replace_input_of(use, idx, C->top()); - continue; - } - - while( !loop->is_member( get_loop( cfg ) ) ) { - prev = cfg; - cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg); - } - // If the use occurs after merging several exits from the loop, then - // old value must have dominated all those exits. Since the same old - // value was used on all those exits we did not need a Phi at this - // merge point. NOW we do need a Phi here. Each loop exit value - // is now merged with the peeled body exit; each exit gets its own - // private Phi and those Phis need to be merged here. - Node *phi; - if( prev->is_Region() ) { - if( idx == 0 ) { // Updating control edge? - phi = prev; // Just use existing control - } else { // Else need a new Phi - phi = PhiNode::make( prev, old ); - // Now recursively fix up the new uses of old! - for( uint i = 1; i < prev->req(); i++ ) { - worklist.push(phi); // Onto worklist once for each 'old' input - } - } - } else { - // Get new RegionNode merging old and new loop exits - prev = old_new[prev->_idx]; - assert( prev, "just made this in step 7" ); - if( idx == 0 ) { // Updating control edge? - phi = prev; // Just use existing control - } else { // Else need a new Phi - // Make a new Phi merging data values properly - phi = PhiNode::make( prev, old ); - phi->set_req( 1, nnn ); - } - } - // If inserting a new Phi, check for prior hits - if( idx != 0 ) { - Node *hit = _igvn.hash_find_insert(phi); - if( hit == NULL ) { - _igvn.register_new_node_with_optimizer(phi); // Register new phi - } else { // or - // Remove the new phi from the graph and use the hit - _igvn.remove_dead_node(phi); - phi = hit; // Use existing phi - } - set_ctrl(phi, prev); - } - // Make 'use' use the Phi instead of the old loop body exit value - _igvn.replace_input_of(use, idx, phi); - if( use->_idx >= new_counter ) { // If updating new phis - // Not needed for correctness, but prevents a weak assert - // in AddPNode from tripping (when we end up with different - // base & derived Phis that will become the same after - // IGVN does CSE). - Node *hit = _igvn.hash_find_insert(use); - if( hit ) // Go ahead and re-hash for hits. - _igvn.replace_node( use, hit ); - } - - // If 'use' was in the loop-exit block, it now needs to be sunk - // below the post-loop merge point. - sink_use( use, prev ); - } - } + for (i = 0; i < extra_data_nodes.size(); i++) { + Node* old = extra_data_nodes.at(i); + clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set, + split_bool_set, split_cex_set, worklist, new_counter, + mode); } // Check for IFs that need splitting/cloning. Happens if an IF outside of @@ -2949,7 +3149,7 @@ assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition"); - clone_loop( loop, old_new, dd ); + clone_loop(loop, old_new, dd, IgnoreStripMined); const uint clone_exit_idx = 1; const uint orig_exit_idx = 2; --- old/src/hotspot/share/opto/macro.cpp 2017-10-03 11:50:08.738508139 +0200 +++ new/src/hotspot/share/opto/macro.cpp 2017-10-03 11:50:07.674508985 +0200 @@ -2665,7 +2665,8 @@ n->Opcode() == Op_Opaque1 || n->Opcode() == Op_Opaque2 || n->Opcode() == Op_Opaque3 || - n->Opcode() == Op_Opaque4, "unknown node type in macro list"); + n->Opcode() == Op_Opaque4 || + n->Opcode() == Op_Opaque5, "unknown node type in macro list"); } assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count"); progress = progress || success; @@ -2733,6 +2734,11 @@ } else if (n->Opcode() == Op_Opaque4) { _igvn.replace_node(n, n->in(2)); success = true; + } else if (n->Opcode() == Op_Opaque5) { + Node* res = ((Opaque5Node*)n)->adjust_strip_mined_loop(&_igvn); + guarantee(res != NULL, "strip mined adjustment failed"); + _igvn.replace_node(n, res); + success = true; } assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count"); progress = progress || success; --- old/src/hotspot/share/opto/opaquenode.cpp 2017-10-03 11:50:09.953507173 +0200 +++ new/src/hotspot/share/opto/opaquenode.cpp 2017-10-03 11:50:08.847508053 +0200 @@ -23,6 +23,11 @@ */ #include "precompiled.hpp" +#include "opto/addnode.hpp" +#include "opto/cfgnode.hpp" +#include "opto/connode.hpp" +#include "opto/divnode.hpp" +#include "opto/loopnode.hpp" #include "opto/opaquenode.hpp" #include "opto/phaseX.hpp" @@ -68,6 +73,297 @@ return phase->type(in(1)); } +CountedLoopNode* Opaque5Node::inner_loop() const { + if (outcnt() != 1) { + return NULL; + } + Node* cmp = unique_out(); + if (cmp == NULL || cmp->outcnt() != 1 || cmp->Opcode() != Op_CmpI) { + return NULL; + } + Node* test = cmp->unique_out(); + if (test == NULL || test->outcnt() != 1 || test->Opcode() != Op_Bool) { + return NULL; + } + Node* lex = test->unique_out(); + if (lex == NULL || lex->Opcode() != Op_If) { + return NULL; + } + IfNode* le = lex->as_If(); + Node* le_tail = le->proj_out(true); + if (le_tail == NULL) { + return NULL; + } + Node* lx = le_tail->unique_ctrl_out(); + if (lx == NULL || !lx->is_Loop()) { + return NULL; + } + LoopNode* l = lx->as_Loop(); + if (!lx->as_Loop()->is_strip_mined() || + le->in(0) == NULL || + le->in(0)->in(0) == NULL) { + return NULL; + } + Node* inner_clex = le->in(0)->in(0)->in(0); + if (inner_clex == NULL || !inner_clex->is_CountedLoopEnd()) { + return NULL; + } + CountedLoopEndNode* inner_cle = inner_clex->as_CountedLoopEnd(); + Node* inner_clx = l->unique_ctrl_out(); + if (inner_clx == NULL || !inner_clx->is_CountedLoop()) { + return NULL; + } + CountedLoopNode* inner_cl = inner_clx->as_CountedLoop(); + assert(inner_cl->is_strip_mined(), "inner loop should be strip mined"); + return inner_cl; +} + + +Node* Opaque5Node::adjust_strip_mined_loop(PhaseGVN* phase) { + // Look for the outer & inner strip mined loop, reduce number of + // iterations of the inner loop, set exit condition of outer loop, + // construct required phi nodes for outer loop. + CountedLoopNode* inner_cl = inner_loop(); + PhaseIterGVN *igvn = phase->is_IterGVN(); + Node* inner_iv_phi = inner_cl->phi(); + if (inner_iv_phi == NULL) { + return NULL; + } + CountedLoopEndNode* inner_cle = inner_cl->loopexit(); + Node* cmp = inner_cl->outer_cmp(); + BoolNode* bol = inner_cl->outer_bol(); + assert(cmp->in(1) == inner_cle->cmp_node()->in(1), "broken comparison"); + assert(bol->_test._test == inner_cle->test_trip(), "broken comparison"); + + int stride = inner_cl->stride_con(); + jlong scaled_iters_long = ((jlong)LoopStripMiningIter) * ABS(stride); + int scaled_iters = (int)scaled_iters_long; + int short_scaled_iters = LoopStripMiningIterShortLoop* ABS(stride); + const TypeInt* inner_iv_t = phase->type(inner_iv_phi)->is_int(); + jlong iter_estimate = (jlong)inner_iv_t->_hi - (jlong)inner_iv_t->_lo; + assert(iter_estimate > 0, "broken"); + if ((jlong)scaled_iters != scaled_iters_long || iter_estimate <= short_scaled_iters) { + // Remove outer loop and safepoint (too few iterations) + Node* outer_sfpt = inner_cl->outer_safepoint(); + Node* outer_out = inner_cl->outer_loop_exit(); + igvn->replace_node(outer_out, outer_sfpt->in(0)); + igvn->replace_input_of(outer_sfpt, 0, igvn->C->top()); + inner_cl->clear_strip_mined(); + return igvn->C->top(); + } + + Node* cle_tail = inner_cle->proj_out(true); + ResourceMark rm; + Node_List old_new; + if (cle_tail->outcnt() > 1) { + // Look for nodes on backedge of inner loop and clone them + Unique_Node_List backedge_nodes; + for (DUIterator_Fast imax, i = cle_tail->fast_outs(imax); i < imax; i++) { + Node* u = cle_tail->fast_out(i); + if (u != inner_cl) { + assert(!u->is_CFG(), "control flow on the backedge?"); + backedge_nodes.push(u); + } + } + uint last = igvn->C->unique(); + for (uint next = 0; next < backedge_nodes.size(); next++) { + Node* n = backedge_nodes.at(next); + old_new.map(n->_idx, n->clone()); + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node* u = n->fast_out(i); + assert(!u->is_CFG(), "broken"); + if (u->_idx >= last) { + continue; + } + if (!u->is_Phi()) { + backedge_nodes.push(u); + } else { + assert(u->in(0) == inner_cl, "strange phi on the backedge"); + } + } + } + // Put the clones on the outer loop backedge + Node* le_tail = inner_cl->outer_loop_tail(); + for (uint next = 0; next < backedge_nodes.size(); next++) { + Node *n = old_new[backedge_nodes.at(next)->_idx]; + for (uint i = 1; i < n->req(); i++) { + if (n->in(i) != NULL && old_new[n->in(i)->_idx] != NULL) { + n->set_req(i, old_new[n->in(i)->_idx]); + } + } + if (n->in(0) != NULL) { + assert(n->in(0) == cle_tail, "node not on backedge?"); + n->set_req(0, le_tail); + } + igvn->register_new_node_with_optimizer(n); + } + } + + Node* iv_phi = NULL; + // Make a clone of each phi in the inner loop + // for the outer loop + Node* l = inner_cl->outer_loop(); + for (uint i = 0; i < inner_cl->outcnt(); i++) { + Node* u = inner_cl->raw_out(i); + if (u->is_Phi()) { + assert(u->in(0) == inner_cl, "inconsistent"); + Node* phi = u->clone(); + phi->set_req(0, l); + Node* be = old_new[phi->in(LoopNode::LoopBackControl)->_idx]; + if (be != NULL) { + phi->set_req(LoopNode::LoopBackControl, be); + } + phi = igvn->transform(phi); + igvn->replace_input_of(u, LoopNode::EntryControl, phi); + if (u == inner_iv_phi) { + iv_phi = phi; + } + } + } + Node* cle_out = inner_cle->proj_out(false); + if (cle_out->outcnt() > 1) { + // Look for chains of stores that were sunk + // out of the inner loop and are in the outer loop + for (DUIterator_Fast imax, i = cle_out->fast_outs(imax); i < imax; i++) { + Node* u = cle_out->fast_out(i); + if (u->is_Store()) { + Node* first = u; + for(;;) { + Node* next = first->in(MemNode::Memory); + if (!next->is_Store() || next->in(0) != cle_out) { + break; + } + first = next; + } + Node* last = u; + for(;;) { + Node* next = NULL; + for (DUIterator_Fast jmax, j = last->fast_outs(jmax); j < jmax; j++) { + Node* uu = last->fast_out(j); + if (uu->is_Store() && uu->in(0) == cle_out) { + assert(next == NULL, "only one in the outer loop"); + next = uu; + } + } + if (next == NULL) { + break; + } + last = next; + } + Node* phi = NULL; + for (DUIterator_Fast jmax, j = l->fast_outs(jmax); j < jmax; j++) { + Node* uu = l->fast_out(j); + if (uu->is_Phi()) { + Node* be = uu->in(LoopNode::LoopBackControl); + while (be->is_Store() && old_new[be->_idx] != NULL) { + ShouldNotReachHere(); + be = be->in(MemNode::Memory); + } + if (be == last || be == first->in(MemNode::Memory)) { + assert(phi == NULL, "only one phi"); + phi = uu; + } + } + } +#ifdef ASSERT + for (DUIterator_Fast jmax, j = l->fast_outs(jmax); j < jmax; j++) { + Node* uu = l->fast_out(j); + if (uu->is_Phi() && uu->bottom_type() == Type::MEMORY) { + if (uu->adr_type() == igvn->C->get_adr_type(igvn->C->get_alias_index(u->adr_type()))) { + assert(phi == uu, "what's that phi?"); + } else if (uu->adr_type() == TypePtr::BOTTOM) { + Node* n = uu->in(LoopNode::LoopBackControl); + uint limit = igvn->C->live_nodes(); + uint i = 0; + while (n != uu) { + i++; + assert(i < limit, "infinite loop"); + if (n->is_Proj()) { + n = n->in(0); + } else if (n->is_SafePoint() || n->is_MemBar()) { + n = n->in(TypeFunc::Memory); + } else if (n->is_Phi()) { + n = n->in(1); + } else if (n->is_MergeMem()) { + n = n->as_MergeMem()->memory_at(igvn->C->get_alias_index(u->adr_type())); + } else if (n->is_Store() || n->is_LoadStore() || n->is_ClearArray()) { + n = n->in(MemNode::Memory); + } else { + n->dump(); + ShouldNotReachHere(); + } + } + } + } + } +#endif + if (phi == NULL) { + // If the an entire chains was sunk, the + // inner loop has no phi for that memory + // slice, create one for the outer loop + phi = PhiNode::make(l, first->in(MemNode::Memory), Type::MEMORY, + igvn->C->get_adr_type(igvn->C->get_alias_index(u->adr_type()))); + phi->set_req(LoopNode::LoopBackControl, last); + phi = igvn->transform(phi); + igvn->replace_input_of(first, MemNode::Memory, phi); + } else { + // Or fix the outer loop fix to include + // that chain of stores. + Node* be = phi->in(LoopNode::LoopBackControl); + while (be->is_Store() && old_new[be->_idx] != NULL) { + ShouldNotReachHere(); + be = be->in(MemNode::Memory); + } + if (be == first->in(MemNode::Memory)) { + if (be == phi->in(LoopNode::LoopBackControl)) { + igvn->replace_input_of(phi, LoopNode::LoopBackControl, last); + } else { + igvn->replace_input_of(be, MemNode::Memory, last); + } + } else { +#ifdef ASSERT + if (be == phi->in(LoopNode::LoopBackControl)) { + assert(phi->in(LoopNode::LoopBackControl) == last, ""); + } else { + assert(be->in(MemNode::Memory) == last, ""); + } +#endif + } + } + } + } + } + + if (iv_phi != NULL) { + // Now adjust the inner loop's exit condition + Node* limit = inner_cl->limit(); + Node* sub = NULL; + if (stride > 0) { + sub = igvn->transform(new SubINode(limit, iv_phi)); + } else { + sub = igvn->transform(new SubINode(iv_phi, limit)); + } + Node* min = igvn->transform(new MinINode(sub, igvn->intcon(scaled_iters))); + Node* new_limit = NULL; + if (stride > 0) { + new_limit = igvn->transform(new AddINode(min, iv_phi)); + } else { + new_limit = igvn->transform(new SubINode(iv_phi, min)); + } + igvn->replace_input_of(inner_cle->cmp_node(), 2, new_limit); + if (iter_estimate <= scaled_iters_long) { + // We would only go through one iteration of + // the outer loop: drop the outer loop but + // keep the safepoint so we don't run for + // too long without a safepoint + igvn->replace_input_of(inner_cl->outer_loop_end(), 1, igvn->intcon(0)); + inner_cl->clear_strip_mined(); + } + return limit; + } + return NULL; +} + //============================================================================= uint ProfileBooleanNode::hash() const { return NO_HASH; } --- old/src/hotspot/share/opto/opaquenode.hpp 2017-10-03 11:50:11.289506111 +0200 +++ new/src/hotspot/share/opto/opaquenode.hpp 2017-10-03 11:50:10.049507097 +0200 @@ -109,6 +109,18 @@ }; +// For loop strip mining +class Opaque5Node : public Opaque2Node { + private: + CountedLoopNode* inner_loop() const; + + public: + Opaque5Node(Compile* C, Node *n) : Opaque2Node(C, n) {} + virtual int Opcode() const; + + Node* adjust_strip_mined_loop(PhaseGVN* phase); +}; + //------------------------------ProfileBooleanNode------------------------------- // A node represents value profile for a boolean during parsing. // Once parsing is over, the node goes away (during IGVN). --- old/src/hotspot/share/opto/superword.cpp 2017-10-03 11:50:12.709504983 +0200 +++ new/src/hotspot/share/opto/superword.cpp 2017-10-03 11:50:11.384506036 +0200 @@ -1337,6 +1337,7 @@ for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) { Node* t2 = s2->fast_out(j); if (!in_bb(t2)) continue; + if (t2->Opcode() == Op_AddI && t2 == _lp->as_CountedLoop()->incr()) continue; // don't mess with the iv if (!opnd_positions_match(s1, t1, s2, t2)) continue; if (stmts_can_pack(t1, t2, align)) { @@ -3287,7 +3288,7 @@ return NULL; } - Node* p_f = cl->in(LoopNode::EntryControl)->in(0)->in(0); + Node* p_f = cl->skip_strip_mined()->in(LoopNode::EntryControl)->in(0)->in(0); if (!p_f->is_IfFalse()) return NULL; if (!p_f->in(0)->is_CountedLoopEnd()) return NULL; CountedLoopEndNode* pre_end = p_f->in(0)->as_CountedLoopEnd(); --- old/src/hotspot/share/runtime/arguments.cpp 2017-10-03 11:50:14.092503883 +0200 +++ new/src/hotspot/share/runtime/arguments.cpp 2017-10-03 11:50:12.837504881 +0200 @@ -2531,6 +2531,21 @@ } FLAG_SET_CMDLINE(bool, PostLoopMultiversioning, false); } + if (UseCountedLoopSafepoints && LoopStripMiningIter == 0) { + if (!FLAG_IS_DEFAULT(UseCountedLoopSafepoints) || !FLAG_IS_DEFAULT(LoopStripMiningIter)) { + warning("When counted loop safepoints are enabled, LoopStripMiningIter must be at least 1 (a safepoint every 1 iteration): setting it to 1"); + } + LoopStripMiningIter = 1; + } else if (!UseCountedLoopSafepoints && LoopStripMiningIter > 0) { + if (!FLAG_IS_DEFAULT(UseCountedLoopSafepoints) || !FLAG_IS_DEFAULT(LoopStripMiningIter)) { + warning("Disabling counted safepoints implies no loop strip mining: setting LoopStripMiningIter to 0"); + } + LoopStripMiningIter = 0; + } + if (FLAG_IS_DEFAULT(LoopStripMiningIterShortLoop)) { + // blind guess + LoopStripMiningIterShortLoop = LoopStripMiningIter / 10; + } #endif return status; } --- old/test/hotspot/jtreg/compiler/loopopts/UseCountedLoopSafepointsTest.java 2017-10-03 11:50:15.454502801 +0200 +++ new/test/hotspot/jtreg/compiler/loopopts/UseCountedLoopSafepointsTest.java 2017-10-03 11:50:14.213503787 +0200 @@ -61,7 +61,8 @@ OutputAnalyzer oa; try { oa = ProcessTools.executeTestJvm("-XX:+UnlockDiagnosticVMOptions", "-Xbootclasspath/a:.", - "-XX:" + (enabled ? "+" : "-") + "UseCountedLoopSafepoints", "-XX:+WhiteBoxAPI", + "-XX:" + (enabled ? "+" : "-") + "UseCountedLoopSafepoints", + "-XX:LoopStripMiningIter=" + (enabled ? "1" : "0"), "-XX:+WhiteBoxAPI", "-XX:-Inline", "-Xbatch", "-XX:+PrintIdeal", "-XX:LoopUnrollLimit=0", "-XX:CompileOnly=" + UseCountedLoopSafepoints.class.getName() + "::testMethod", UseCountedLoopSafepoints.class.getName());