--- old/src/share/vm/includeDB_compiler2 Thu Jan 7 16:01:50 2010 +++ new/src/share/vm/includeDB_compiler2 Thu Jan 7 16:01:50 2010 @@ -601,6 +601,7 @@ loopTransform.cpp addnode.hpp loopTransform.cpp allocation.inline.hpp +loopTransform.cpp callnode.hpp loopTransform.cpp connode.hpp loopTransform.cpp compileLog.hpp loopTransform.cpp divnode.hpp --- old/src/share/vm/opto/c2_globals.hpp Thu Jan 7 16:01:51 2010 +++ new/src/share/vm/opto/c2_globals.hpp Thu Jan 7 16:01:51 2010 @@ -154,6 +154,12 @@ notproduct(bool, TraceProfileTripCount, false, \ "Trace profile loop trip count information") \ \ + product(bool, UseLoopPredicate, true, \ + "Generate a predicate to select fast/slow loop versions") \ + \ + develop(bool, TraceLoopPredicate, false, \ + "Trace generation of loop predicates") \ + \ develop(bool, OptoCoalesce, true, \ "Use Conservative Copy Coalescing in the Register Allocator") \ \ --- old/src/share/vm/opto/compile.cpp Thu Jan 7 16:01:51 2010 +++ new/src/share/vm/opto/compile.cpp Thu Jan 7 16:01:51 2010 @@ -932,6 +932,7 @@ _intrinsics = NULL; _macro_nodes = new GrowableArray(comp_arena(), 8, 0, NULL); + _predicate_opaqs = new GrowableArray(comp_arena(), 8, 0, NULL); register_library_intrinsics(); } @@ -1553,6 +1554,19 @@ } } +//---------------------cleanup_loop_predicates----------------------- +// Remove the opaque nodes that protect the predicates so that all unused +// checks and uncommon_traps will be eliminated from the ideal graph +void Compile::cleanup_loop_predicates(PhaseIterGVN &igvn) { + if (predicate_count()==0 ) return; + for (int i = predicate_count(); i > 0; i--) { + Node * n = predicate_opaque1_node(i-1); + assert(n->Opcode() == Op_Opaque1, "must be"); + igvn.replace_node(n, n->in(1)); + } + assert(predicate_count()==0, "should be clean!"); + igvn.optimize(); +} //------------------------------Optimize--------------------------------------- // Given a graph, optimize it. @@ -1588,13 +1602,13 @@ // Loop transforms on the ideal graph. Range Check Elimination, // peeling, unrolling, etc. - + // Set loop opts counter loop_opts_cnt = num_loop_opts(); if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) { { TracePhase t2("idealLoop", &_t_idealLoop, true); - PhaseIdealLoop ideal_loop( igvn, true ); + PhaseIdealLoop ideal_loop( igvn, true, UseLoopPredicate); loop_opts_cnt--; if (major_progress()) print_method("PhaseIdealLoop 1", 2); if (failing()) return; @@ -1602,7 +1616,7 @@ // Loop opts pass if partial peeling occurred in previous pass if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) { TracePhase t3("idealLoop", &_t_idealLoop, true); - PhaseIdealLoop ideal_loop( igvn, false ); + PhaseIdealLoop ideal_loop( igvn, false, UseLoopPredicate); loop_opts_cnt--; if (major_progress()) print_method("PhaseIdealLoop 2", 2); if (failing()) return; @@ -1610,7 +1624,7 @@ // Loop opts pass for loop-unrolling before CCP if(major_progress() && (loop_opts_cnt > 0)) { TracePhase t4("idealLoop", &_t_idealLoop, true); - PhaseIdealLoop ideal_loop( igvn, false ); + PhaseIdealLoop ideal_loop( igvn, false, UseLoopPredicate); loop_opts_cnt--; if (major_progress()) print_method("PhaseIdealLoop 3", 2); } @@ -1648,13 +1662,21 @@ // peeling, unrolling, etc. if(loop_opts_cnt > 0) { debug_only( int cnt = 0; ); + bool loop_predication = UseLoopPredicate; while(major_progress() && (loop_opts_cnt > 0)) { TracePhase t2("idealLoop", &_t_idealLoop, true); assert( cnt++ < 40, "infinite cycle in loop optimization" ); - PhaseIdealLoop ideal_loop( igvn, true ); + PhaseIdealLoop ideal_loop( igvn, true, loop_predication); loop_opts_cnt--; if (major_progress()) print_method("PhaseIdealLoop iterations", 2); if (failing()) return; + // Perform loop predication optimization during first iteration after CCP. + // After that switch it off and cleanup unused loop predicates. + if (loop_predication) { + loop_predication = false; + cleanup_loop_predicates(igvn); + if (failing()) return; + } } } --- old/src/share/vm/opto/compile.hpp Thu Jan 7 16:01:52 2010 +++ new/src/share/vm/opto/compile.hpp Thu Jan 7 16:01:52 2010 @@ -38,6 +38,7 @@ class OptoReg; class PhaseCFG; class PhaseGVN; +class PhaseIterGVN; class PhaseRegAlloc; class PhaseCCP; class PhaseCCP_DCE; @@ -172,6 +173,7 @@ const char* _failure_reason; // for record_failure/failing pattern GrowableArray* _intrinsics; // List of intrinsics. GrowableArray* _macro_nodes; // List of nodes which need to be expanded before matching. + GrowableArray* _predicate_opaqs; // List of Opaque1 nodes for the loop predicates. ConnectionGraph* _congraph; #ifndef PRODUCT IdealGraphPrinter* _printer; @@ -351,7 +353,9 @@ } int macro_count() { return _macro_nodes->length(); } + int predicate_count() { return _predicate_opaqs->length();} Node* macro_node(int idx) { return _macro_nodes->at(idx); } + Node* predicate_opaque1_node(int idx) { return _predicate_opaqs->at(idx);} ConnectionGraph* congraph() { return _congraph;} void add_macro_node(Node * n) { //assert(n->is_macro(), "must be a macro node"); @@ -363,7 +367,19 @@ // that the node is in the array before attempting to remove it if (_macro_nodes->contains(n)) _macro_nodes->remove(n); + // remove from _predicate_opaqs list also if it is there + if (predicate_count() > 0 && _predicate_opaqs->contains(n)){ + _predicate_opaqs->remove(n); + } } + void add_predicate_opaq(Node * n) { + assert(!_predicate_opaqs->contains(n), " duplicate entry in predicate opaque1"); + assert(_macro_nodes->contains(n), "should have already been in macro list"); + _predicate_opaqs->append(n); + } + //remove the opaque nodes that protect the predicates so that the unused checks and + //uncommon traps will be eliminated from the graph. + void cleanup_loop_predicates(PhaseIterGVN &igvn); // Compilation environment. Arena* comp_arena() { return &_comp_arena; } --- old/src/share/vm/opto/loopTransform.cpp Thu Jan 7 16:01:53 2010 +++ new/src/share/vm/opto/loopTransform.cpp Thu Jan 7 16:01:53 2010 @@ -549,6 +549,10 @@ // Comparing trip+off vs limit Node *bol = iff->in(1); if( bol->req() != 2 ) continue; // dead constant test + if (!bol->is_Bool()) { + assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only"); + continue; + } Node *cmp = bol->in(1); Node *rc_exp = cmp->in(1); @@ -875,7 +879,7 @@ //------------------------------is_invariant----------------------------- // Return true if n is invariant bool IdealLoopTree::is_invariant(Node* n) const { - Node *n_c = _phase->get_ctrl(n); + Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n; if (n_c->is_top()) return false; return !is_member(_phase->get_loop(n_c)); } @@ -1594,7 +1598,7 @@ bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) { // Check and remove empty loops (spam micro-benchmarks) if( policy_do_remove_empty_loop(phase) ) - return true; // Here we removed an empty loop + return true; // Here we removed an empty loop bool should_peel = policy_peeling(phase); // Should we peel? @@ -1688,8 +1692,8 @@ // an even number of trips). If we are peeling, we might enable some RCE // and we'd rather unroll the post-RCE'd loop SO... do not unroll if // peeling. - if( should_unroll && !should_peel ) - phase->do_unroll(this,old_new, true); + if( should_unroll && !should_peel ) + phase->do_unroll(this,old_new, true); // Adjust the pre-loop limits to align the main body // iterations. @@ -1730,10 +1734,10 @@ !_irreducible && _allow_optimizations && !tail()->is_top() ) { // Also ignore the occasional dead backedge - if (!_has_call) { - if (!iteration_split_impl( phase, old_new )) { - return false; - } + if (!_has_call) { + if (!iteration_split_impl( phase, old_new )) { + return false; + } } else if (policy_unswitching(phase)) { phase->do_unswitching(this, old_new); } @@ -1745,4 +1749,577 @@ if( _next && !_next->iteration_split( phase, old_new )) return false; return true; +} + +//-------------------------------is_uncommon_trap_proj---------------------------- +// Return true if proj is the form of "proj->[region->..]call_uct" +bool PhaseIdealLoop::is_uncommon_trap_proj(ProjNode* proj, bool must_reason_predicate) { + int path_limit = 10; + assert(proj, "invalid argument"); + Node* out = proj; + for (int ct = 0; ct < path_limit; ct++) { + out = out->unique_ctrl_out(); + if (out == NULL || out->is_Root() || out->is_Start()) + return false; + if (out->is_CallStaticJava()) { + int req = out->as_CallStaticJava()->uncommon_trap_request(); + if (req != 0) { + Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(req); + if (!must_reason_predicate || reason == Deoptimization::Reason_predicate){ + return true; + } + } + return false; //don't do further after call + } + } + return false; +} + +//-------------------------------is_uncommon_trap_if_pattern------------------------- +// Return true for "if(test)-> proj -> ... +// | +// V +// other_proj->[region->..]call_uct" +// +//"must_reason_predicate" means the uct reason must be Reason_predicate +bool PhaseIdealLoop::is_uncommon_trap_if_pattern(ProjNode *proj, bool must_reason_predicate) { + Node *in0 = proj->in(0); + if ( !in0->is_If() ) return false; + IfNode* iff = in0->as_If(); + + //we need "If(Conv2B(Opaque1(...)))" pattern for must_reason_predicate + if (must_reason_predicate) { + if(iff->in(1)->Opcode() != Op_Conv2B || + iff->in(1)->in(1)->Opcode() != Op_Opaque1) { + return false; + } + } + + ProjNode* other_proj = iff->proj_out(1-proj->_con)->as_Proj(); + return is_uncommon_trap_proj(other_proj, must_reason_predicate); +} + +//------------------------------create_new_if_for_predicate------------------------ +// create a new if above the uct_if_pattern for the predicate to be promoted. +// +// before after +// ---------- ---------- +// ctrl ctrl +// | | +// | | +// v v +// iff new_iff +// / \ / \ +// / \ / \ +// v v v v +// uncommon_proj cont_proj if_uct if_cont +// \ | | | | +// \ | | | | +// v v v | v +// rgn loop | iff +// | | / \ +// | | / \ +// v | v v +// uncommon_trap | uncommon_proj cont_proj +// \ \ | | +// \ \ | | +// v v v v +// rgn loop +// | +// | +// v +// uncommon_trap +// +// +// We will create a region to guard the uct call if there is no one there. +// The true projecttion (if_cont) of the new_iff is returned. +ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj) { + assert(is_uncommon_trap_if_pattern(cont_proj, true), "must be a uct if pattern!"); + IfNode* iff = cont_proj->in(0)->as_If(); + + ProjNode *uncommon_proj = iff->proj_out(1 - cont_proj->_con); + Node *rgn = uncommon_proj->unique_ctrl_out(); + assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct"); + + if (!rgn->is_Region()) { //create a region to guard the call + assert(rgn->is_Call(), "must be call uct"); + CallNode* call = rgn->as_Call(); + rgn = new (C, 1) RegionNode(1); + _igvn.set_type(rgn, rgn->bottom_type()); + rgn->add_req(uncommon_proj); + set_idom(rgn, idom(uncommon_proj), dom_depth(uncommon_proj)+1); + _igvn.hash_delete(call); + call->set_req(0, rgn); + } + + //Create new_iff + uint iffdd = dom_depth(iff); + IdealLoopTree* lp = get_loop(iff); + IfNode *new_iff = new (C, 2) IfNode( iff->in(0), NULL, iff->_prob, iff->_fcnt); + register_node(new_iff, lp, idom(iff), iffdd); + Node *if_cont = new (C, 1) IfTrueNode(new_iff); + Node *if_uct = new (C, 1) IfFalseNode(new_iff); + if (cont_proj->is_IfFalse()) { + // Swap + Node* tmp = if_uct; if_uct = if_cont; if_cont = tmp; + } + register_node(if_cont, lp, new_iff, iffdd); + register_node(if_uct, get_loop(rgn), new_iff, iffdd); + + //if_cont to iff + _igvn.hash_delete(iff); + iff->set_req(0, if_cont); + set_idom(iff, if_cont, dom_depth(iff)); + + //if_uct to rgn + _igvn.hash_delete(rgn); + rgn->add_req(if_uct); + Node* ridom = idom(rgn); + Node* nrdom = dom_lca(ridom, new_iff); + set_idom(rgn, nrdom, dom_depth(rgn)); + + //rgn must have no phis + assert(!rgn->as_Region()->has_phi(), "region must have no phis"); + + return if_cont->as_Proj(); +} + +//------------------------------find_predicate_insertion_point-------------------------- +// Find a good location to insert a predicate +ProjNode* PhaseIdealLoop::find_predicate_insertion_point(Node* start_c) { + if (start_c == C->root() || !start_c->is_Proj()) + return NULL; + if (is_uncommon_trap_if_pattern(start_c->as_Proj(), true/*Reason_Predicate*/)) { + return start_c->as_Proj(); + } + return NULL; +} + +//------------------------------Invariance----------------------------------- +// Helper class for loop_predication_impl to compute invariance on the fly and +// clone invariants. +class Invariance : public StackObj { + VectorSet _visited, _invariant; + Node_Stack _stack; + VectorSet _clone_visited; + Node_List _old_new; //map of old to new (clone) + IdealLoopTree* _lpt; + PhaseIdealLoop* _phase; + + //Helper function to set up the invariance for invariance computation + //If n is a known invariant, set up directly. Otherwise, look up the + //the possibility to push n onto the stack for further processing. + void visit(Node* use, Node* n) { + if (_lpt->is_invariant(n)) {//known invariant + _invariant.set(n->_idx); + } else if (!n->is_CFG()) { + Node *n_ctrl = _phase->ctrl_or_self(n); + Node *u_ctrl = _phase->ctrl_or_self(use);//self if use is a CFG + if(_phase->is_dominator(n_ctrl, u_ctrl)) { + _stack.push(n, n->in(0) == NULL ? 1 : 0); + } + } + } + + // Compute invariance for "the_node" and (possibly) all its inputs recursively + // on the fly + void compute_invariance(Node* n) { + assert(_visited.test(n->_idx), "must be"); + visit(n, n); + while (_stack.is_nonempty()) { + Node* n = _stack.node(); + uint idx = _stack.index(); + if (idx == n->req()) {// all inputs are processed + _stack.pop(); + // n is invariant if it's inputs are all invariant + bool all_inputs_invariant = true; + for (uint i = 0; i < n->req(); i++) { + Node* in = n->in(i); + if (in == NULL) continue; + assert(_visited.test(in->_idx), "must have visited input"); + if (!_invariant.test(in->_idx)) {//bad guy + all_inputs_invariant = false; + break; + } + } + if (all_inputs_invariant) { + _invariant.set(n->_idx);//I am a invariant too + } + } else {//process next input + _stack.set_index(idx + 1); + Node* m = n->in(idx); + if (m != NULL && !_visited.test_set(m->_idx)) { + visit(n, m); + } + } + } + } + + //Helper function to set up _old_new map for clone_nodes. + //If n is a known invariant, set up directly ("clone" of n == n). + //Otherwise, push n onto the stack for real cloning. + void clone_visit(Node* n) { + assert(_invariant.test(n->_idx), "must be invariant"); + if (_lpt->is_invariant(n)) {//known invariant + _old_new.map(n->_idx, n); + } else{//to be cloned + assert (!n->is_CFG(), "should not see CFG here"); + _stack.push(n, n->in(0) == NULL ? 1 : 0); + } + } + + //Clone "n" and (possibly) all its inputs recursively + void clone_nodes(Node* n, Node* ctrl) { + clone_visit(n); + while (_stack.is_nonempty()) { + Node* n = _stack.node(); + uint idx = _stack.index(); + if (idx == n->req()) {//all inputs processed, clone n! + _stack.pop(); + // clone invariant node + Node* n_cl = n->clone(); + _old_new.map(n->_idx, n_cl); + _phase->register_new_node(n_cl, ctrl); + for (uint i = 0; i < n->req(); i++) { + Node* in = n_cl->in(i); + if (in == NULL) continue; + n_cl->set_req(i, _old_new[in->_idx]); + } + } else {//process next input + _stack.set_index(idx + 1); + Node* m = n->in(idx); + if (m != NULL && !_clone_visited.test_set(m->_idx)) { + clone_visit(m); //visit the input + } + } + } + } + + public: + Invariance(Arena* area, IdealLoopTree* lpt) : + _lpt(lpt), _phase(lpt->_phase), + _visited(area), _invariant(area), _stack(area, 10 /* guess */), + _clone_visited(area), _old_new(area) + {} + + //Map old to n for invariance computation and clone + void map_ctrl(Node* old, Node* n) { + assert(old->is_CFG() && n->is_CFG(), "must be"); + _old_new.map(old->_idx, n); //"clone" of old is n + _invariant.set(old->_idx); //old is invariant + _clone_visited.set(old->_idx); + } + + //Driver function to compute invariance + bool is_invariant(Node* n) { + if (!_visited.test_set(n->_idx)) + compute_invariance(n); + return (_invariant.test(n->_idx) != 0); + } + + //Driver function to clone invariant + Node* clone(Node* n, Node* ctrl) { + assert(ctrl->is_CFG(), "must be"); + assert(_invariant.test(n->_idx), "must be an invariant"); + if (!_clone_visited.test(n->_idx)) + clone_nodes(n, ctrl); + return _old_new[n->_idx]; + } +}; + +//------------------------------is_range_check_if ----------------------------------- +// Returns true if the predicate of iff is in "scale*iv + offset u< load_range(ptr)" format +// Note: this function is particularly designed for loop predication. We require load_range +// and offset to be loop invariant computed on the fly by "invar" +bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const { + if (!is_loop_exit(iff)) { + return false; + } + if (!iff->in(1)->is_Bool()) { + return false; + } + const BoolNode *bol = iff->in(1)->as_Bool(); + if (bol->_test._test != BoolTest::lt) { + return false; + } + if (!bol->in(1)->is_Cmp()) { + return false; + } + const CmpNode *cmp = bol->in(1)->as_Cmp(); + if (cmp->Opcode() != Op_CmpU ) { + return false; + } + if (cmp->in(2)->Opcode() != Op_LoadRange) { + return false; + } + LoadRangeNode* lr = (LoadRangeNode*)cmp->in(2); + if (!invar.is_invariant(lr)) { // loadRange must be invariant + return false; + } + Node *iv = _head->as_CountedLoop()->phi(); + int scale = 0; + Node *offset = NULL; + if (!phase->is_scaled_iv_plus_offset(cmp->in(1), iv, &scale, &offset)) { + return false; + } + if(offset && !invar.is_invariant(offset)) {//offset must be invariant + return false; + } + return true; +} + +//------------------------------rc_predicate----------------------------------- +// Create a range check predicate +// +// for (i = init; i < limit; i += stride) { +// a[scale*i+offset] +// } +// +// Compute max(scale*i + offset) for init <= i < limit and build the predicate +// as "max(scale*i + offset) u< a.length". +// +// There are two cases for max(scale*i + offset): +// (1) stride*scale > 0 +// max(scale*i + offset) = scale*(limit-stride) + offset +// (2) stride*scale < 0 +// max(scale*i + offset) = scale*init + offset +BoolNode* PhaseIdealLoop::rc_predicate(Node* ctrl, + int scale, Node* offset, + Node* init, Node* limit, Node* stride, + Node* range) { + Node* max_idx_expr = init; + int stride_con = stride->get_int(); + if ((stride_con > 0) == (scale > 0)) { + max_idx_expr = new (C, 3) SubINode(limit, stride); + register_new_node(max_idx_expr, ctrl); + } + + if (scale != 1) { + ConNode* con_scale = _igvn.intcon(scale); + max_idx_expr = new (C, 3) MulINode(max_idx_expr, con_scale); + register_new_node(max_idx_expr, ctrl); + } + + if (offset && (!offset->is_Con() || offset->get_int() != 0)){ + max_idx_expr = new (C, 3) AddINode(max_idx_expr, offset); + register_new_node(max_idx_expr, ctrl); + } + + CmpUNode* cmp = new (C, 3) CmpUNode(max_idx_expr, range); + register_new_node(cmp, ctrl); + BoolNode* bol = new (C, 2) BoolNode(cmp, BoolTest::lt); + register_new_node(bol, ctrl); + return bol; +} + +//------------------------------ loop_predication_impl-------------------------- +// Insert loop predicates for null checks and range checks +bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) { + if(!UseLoopPredicate) return false; + + // Too many traps seen? + bool tmt = C->too_many_traps(C->method(), 0, Deoptimization::Reason_predicate); + int tc = C->trap_count(Deoptimization::Reason_predicate); + if (tmt || tc > 0) { + if (TraceLoopPredicate) { + tty->print_cr("too many predicate traps: %d", tc); + C->method()->print(); //which method has too many predicate traps + tty->print_cr(""); + } + return false; + } + + CountedLoopNode *cl = NULL; + if(loop->_head->is_CountedLoop()) { + cl = loop->_head->as_CountedLoop(); + //do nothing for iteration-splitted loops + if(!cl->is_normal_loop()) return false; + } + + LoopNode *lpn = loop->_head->as_Loop(); + Node* entry = lpn->in(LoopNode::EntryControl); + + ProjNode *predicate_proj = find_predicate_insertion_point(entry); + if (!predicate_proj){ +#ifndef PRODUCT + if (TraceLoopPredicate) { + tty->print("missing predicate:"); + loop->dump_head(); + } +#endif + return false; + } + + ConNode* zero = _igvn.intcon(0); + set_ctrl(zero, C->root()); + Node *cond_false = new (C, 2) Conv2BNode(zero); + register_new_node(cond_false, C->root()); + ConNode* one = _igvn.intcon(1); + set_ctrl(one, C->root()); + Node *cond_true = new (C, 2) Conv2BNode(one); + register_new_node(cond_true, C->root()); + + ResourceArea *area = Thread::current()->resource_area(); + Invariance invar(area, loop); + + // Create list of if-projs such that a newer proj dominates all older + // projs in the list, and they all dominate loop->tail() + Node_List if_proj_list(area); + LoopNode *head = loop->_head->as_Loop(); + Node *current_proj = loop->tail(); //start from tail + while ( current_proj != head ) { + if (loop == get_loop(current_proj) && //still in the loop ? + current_proj->is_Proj() && // is a projection ? + current_proj->in(0)->Opcode() == Op_If) { // is a if projection ? + if_proj_list.push(current_proj); + } + current_proj = idom(current_proj); + } + + bool hoisted = false; //true if at least one proj is promoted + while (if_proj_list.size() > 0) { + // Following are changed to nonnull when a predicate can be hoisted + ProjNode* new_predicate_proj = NULL; + BoolNode* new_predicate_bol = NULL; + + ProjNode* proj = if_proj_list.pop()->as_Proj(); + IfNode* iff = proj->in(0)->as_If(); + + if(!is_uncommon_trap_if_pattern(proj)) { + if(loop->is_loop_exit(iff)) { + // stop processing the remaining projs in the list because the execution of them + // depends on the condition of "iff" (iff->in(1)). + break; + } else { + //Both arms are inside the loop. There are two cases: + // (1) there is one backward branch. In this case, any remaining proj + // in the if_proj list post-dominates "iff". So, the condition of "iff" + // does not determine the execution the remining projs directly, and we + // can safely continue. + // (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj" + // does not dominate loop->tail(), so it can not be in the if_proj list. + continue; + } + } + + Node* test = iff->in(1); + if (!test->is_Bool()){ //Conv2B, ... + continue; + } + BoolNode* bol = test->as_Bool(); + if (invar.is_invariant(bol)) { + // Invariant test + new_predicate_proj = create_new_if_for_predicate(predicate_proj); + Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0); + new_predicate_bol = invar.clone(bol, ctrl)->as_Bool(); + if (TraceLoopPredicate) tty->print("invariant"); + } else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) { + // Range check (only for counted loops) + new_predicate_proj = create_new_if_for_predicate(predicate_proj); + Node *ctrl = new_predicate_proj->in(0)->as_If()->in(0); + const Node* cmp = bol->in(1)->as_Cmp(); + Node* idx = cmp->in(1); + assert(!invar.is_invariant(idx), "index is variant"); + assert(cmp->in(2)->Opcode() == Op_LoadRange, "must be"); + LoadRangeNode* ld_rng = (LoadRangeNode*)cmp->in(2); //LoadRangeNode + assert(invar.is_invariant(ld_rng), "load range must be invariant"); + ld_rng = (LoadRangeNode*)invar.clone(ld_rng, ctrl); + int scale = 1; + Node* offset = zero; + bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset); + assert(ok, "must be index expression"); + if (offset && offset != zero) { + assert(invar.is_invariant(offset), "offset must be loop invariant"); + offset = invar.clone(offset, ctrl); + } + Node* init = cl->init_trip(); + Node* limit = cl->limit(); + Node* stride = cl->stride(); + new_predicate_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, ld_rng); + if (TraceLoopPredicate) tty->print("range check"); + } + + if (new_predicate_proj == NULL) { + // The other proj of the "iff" is a uncommon trap projection, and we can assume + // the other proj will not be executed ("executed" means uct raised). + continue; + } else { + // Success - attach condition (new_predicate_bol) to predicate if + invar.map_ctrl(proj, new_predicate_proj);//so that invariance test can be appropriate + IfNode* new_iff = new_predicate_proj->in(0)->as_If(); + + // Negate test if necessary + if (proj->_con != predicate_proj->_con) { + new_predicate_bol = new (C, 2) BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate()); + register_new_node(new_predicate_bol, new_iff->in(0)); + if (TraceLoopPredicate) tty->print_cr(" if negated: %d", iff->_idx); + } else { + if (TraceLoopPredicate) tty->print_cr(" if: %d", iff->_idx); + } + + _igvn.hash_delete(new_iff); + new_iff->set_req(1, new_predicate_bol); + + _igvn.hash_delete(iff); + iff->set_req(1, proj->is_IfFalse() ? cond_false : cond_true); + + Node* ctrl = new_predicate_proj; // new control + ProjNode* dp = proj; // old control + assert(get_loop(dp) == loop, "guarenteed at the time of collecting proj"); + // Find nodes (depends only on the test) off the surviving projection; + // move them outside the loop with the control of proj_clone + for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) { + Node* cd = dp->fast_out(i); // Control-dependent node + if( cd->depends_only_on_test() ) { + assert(cd->in(0) == dp, ""); + _igvn.hash_delete(cd); + cd->set_req(0, ctrl); //ctrl, not NULL + set_early_ctrl(cd); + _igvn._worklist.push(cd); + IdealLoopTree *new_loop = get_loop(get_ctrl(cd)); + if(new_loop != loop) { + if(!loop->_child) loop->_body.yank(cd); + if(!new_loop->_child ) new_loop->_body.push(cd); + } + --i; + --imax; + } + } + + hoisted = true; + C->set_major_progress(); + } + }//end while + +#ifndef PRODUCT + //report that the loop predication has been actually performed + //for this loop + if (TraceLoopPredicate && hoisted) { + tty->print("Loop Predication Performed:"); + loop->dump_head(); + } +#endif + + return hoisted; +} + +//------------------------------loop_predication-------------------------------- +// driver routine for loop predication optimization +bool IdealLoopTree::loop_predication( PhaseIdealLoop *phase) { + bool hoisted = false; + // Recursively promote predicates + if( _child ) { + hoisted = _child->loop_predication( phase); + } + + // self + if(!_irreducible && !tail()->is_top()) { + hoisted |= phase->loop_predication_impl(this); + } + + if( _next ) { //sibling + hoisted |= _next->loop_predication( phase); + } + + return hoisted; } --- old/src/share/vm/opto/loopnode.cpp Thu Jan 7 16:01:54 2010 +++ new/src/share/vm/opto/loopnode.cpp Thu Jan 7 16:01:54 2010 @@ -96,7 +96,7 @@ e_d = dom_depth(early); // Reset depth register cache } } - + // Return earliest legal location assert(early == find_non_split_ctrl(early), "unexpected early control"); @@ -1420,11 +1420,57 @@ } } +//---------------------collect_potentially_useful_predicates----------------------- +//Helper function to collect potentially useful predicates to prevent them from +//being eliminated by PhaseIdealLoop::eliminate_useless_predicates +void PhaseIdealLoop::collect_potentially_useful_predicates( + IdealLoopTree * loop, Unique_Node_List &useful_predicates) { + if (loop->_child) { // child + collect_potentially_useful_predicates(loop->_child, useful_predicates); + } + + //self (only loops that we can apply loop predication may use their predicates) + if (loop->_head->is_Loop() && + !loop->_irreducible && + !loop->tail()->is_top()) { + LoopNode *lpn = loop->_head->as_Loop(); + Node* entry = lpn->in(LoopNode::EntryControl); + ProjNode *predicate_proj = find_predicate_insertion_point(entry); + if (predicate_proj != NULL ) { //right pattern that can be used by loop predication + assert(entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be"); + useful_predicates.push(entry->in(0)->in(1)->in(1)); //good one + } + } + + if( loop->_next ) { // sibling + collect_potentially_useful_predicates(loop->_next, useful_predicates); + } +} + +//------------------------eliminate_useless_predicates----------------------------- +// Eliminate all inserted predicates if they could not be used by loop predication. +void PhaseIdealLoop::eliminate_useless_predicates() { + if(C->predicate_count() == 0) return; //no predicate left + + Unique_Node_List useful_predicates; //to store useful predicates + if( C->has_loops()) { + collect_potentially_useful_predicates(_ltree_root->_child, useful_predicates); + } + + for (int i = C->predicate_count(); i > 0; i--) { + Node * n = C->predicate_opaque1_node(i-1); + assert(n->Opcode() == Op_Opaque1, "must be"); + if(!useful_predicates.member(n)) { //not in the useful list + _igvn.replace_node(n, n->in(1)); + } + } +} + //============================================================================= //----------------------------build_and_optimize------------------------------- // Create a PhaseLoop. Build the ideal Loop tree. Map each Ideal Node to // its corresponding LoopNode. If 'optimize' is true, do some loop cleanups. -void PhaseIdealLoop::build_and_optimize(bool do_split_ifs) { +void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool do_loop_pred) { int old_progress = C->major_progress(); // Reset major-progress flag for the driver's heuristics @@ -1577,6 +1623,12 @@ return; } + // some parser-inserted loop predicates could never be used by loop + // predication. Eliminate them before loop optimization + if(UseLoopPredicate) { + eliminate_useless_predicates(); + } + // clear out the dead code while(_deadlist.size()) { _igvn.remove_globally_dead_node(_deadlist.pop()); @@ -1619,6 +1671,11 @@ NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); ); } + // Perform loop predication before iteration splitting + if(do_loop_pred && C->has_loops() && !C->major_progress()) { + _ltree_root->_child->loop_predication(this); + } + // Perform iteration-splitting on inner loops. Split iterations to avoid // range checks or one-shot null checks. @@ -2763,6 +2820,22 @@ Node *legal = LCA; // Walk 'legal' up the IDOM chain Node *least = legal; // Best legal position so far while( early != legal ) { // While not at earliest legal +#ifdef ASSERT + if (legal->is_Start() && !early->is_Root()) { + // Bad graph. Print idom path and fail. + tty->print_cr( "Bad graph detected in build_loop_late"); + tty->print("n: ");n->dump(); tty->cr(); + tty->print("early: ");early->dump(); tty->cr(); + int ct = 0; + Node *dbg_legal = LCA; + while(!dbg_legal->is_Start() && ct < 100) { + tty->print("idom[%d] ",ct); dbg_legal->dump(); tty->cr(); + ct++; + dbg_legal = idom(dbg_legal); + } + assert(false, "Bad graph detected in build_loop_late"); + } +#endif // Find least loop nesting depth legal = idom(legal); // Bump up the IDOM tree // Check for lower nesting depth --- old/src/share/vm/opto/loopnode.hpp Thu Jan 7 16:01:55 2010 +++ new/src/share/vm/opto/loopnode.hpp Thu Jan 7 16:01:54 2010 @@ -30,6 +30,7 @@ class Node; class PhaseIdealLoop; class VectorSet; +class Invariance; struct small_cache; // @@ -325,6 +326,10 @@ // Returns TRUE if loop tree is structurally changed. bool beautify_loops( PhaseIdealLoop *phase ); + // Perform optimization to use the loop predicates for null checks and range checks. + // Applies to any loop level (not just the innermost one) + bool loop_predication( PhaseIdealLoop *phase); + // Perform iteration-splitting on inner loops. Split iterations to // avoid range checks or one-shot null checks. Returns false if the // current round of loop opts should stop. @@ -395,6 +400,9 @@ // into longer memory ops, we may want to increase alignment. bool policy_align( PhaseIdealLoop *phase ) const; + // Return TRUE if "iff" is a range check. + bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const; + // Compute loop trip count from profile data void compute_profile_trip_cnt( PhaseIdealLoop *phase ); @@ -521,9 +529,6 @@ } Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag ); - // true if CFG node d dominates CFG node n - bool is_dominator(Node *d, Node *n); - // Helper function for directing control inputs away from CFG split // points. Node *find_non_split_ctrl( Node *ctrl ) const { @@ -572,6 +577,17 @@ assert(n == find_non_split_ctrl(n), "must return legal ctrl" ); return n; } + // true if CFG node d dominates CFG node n + bool is_dominator(Node *d, Node *n); + // return get_ctrl for a data node and self(n) for a CFG node + Node* ctrl_or_self(Node* n) { + if( has_ctrl(n) ) + return get_ctrl(n); + else { + assert (n->is_CFG(), "must be a CFG node"); + return n; + } + } private: Node *get_ctrl_no_update( Node *i ) const { @@ -600,7 +616,7 @@ // Lazy-dazy update of 'get_ctrl' and 'idom_at' mechanisms. Replace // the 'old_node' with 'new_node'. Kill old-node. Add a reference // from old_node to new_node to support the lazy update. Reference - // replaces loop reference, since that is not neede for dead node. + // replaces loop reference, since that is not needed for dead node. public: void lazy_update( Node *old_node, Node *new_node ) { assert( old_node != new_node, "no cycles please" ); @@ -679,11 +695,11 @@ _dom_lca_tags(C->comp_arena()), _verify_me(NULL), _verify_only(true) { - build_and_optimize(false); + build_and_optimize(false, false); } // build the loop tree and perform any requested optimizations - void build_and_optimize(bool do_split_if); + void build_and_optimize(bool do_split_if, bool do_loop_pred); public: // Dominators for the sea of nodes @@ -694,13 +710,13 @@ Node *dom_lca_internal( Node *n1, Node *n2 ) const; // Compute the Ideal Node to Loop mapping - PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs) : + PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs, bool do_loop_pred) : PhaseTransform(Ideal_Loop), _igvn(igvn), _dom_lca_tags(C->comp_arena()), _verify_me(NULL), _verify_only(false) { - build_and_optimize(do_split_ifs); + build_and_optimize(do_split_ifs, do_loop_pred); } // Verify that verify_me made the same decisions as a fresh run. @@ -710,7 +726,7 @@ _dom_lca_tags(C->comp_arena()), _verify_me(verify_me), _verify_only(false) { - build_and_optimize(false); + build_and_optimize(false, false); } // Build and verify the loop tree without modifying the graph. This @@ -790,6 +806,30 @@ // Return true if exp is a scaled induction var plus (or minus) constant bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth = 0); + // Return true if proj is for "proj->[region->..]call_uct" + bool is_uncommon_trap_proj(ProjNode* proj, bool must_reason_predicate = false); + // Return true for "if(test)-> proj -> ... + // | + // V + // other_proj->[region->..]call_uct" + bool is_uncommon_trap_if_pattern(ProjNode* proj, bool must_reason_predicate = false); + // Create a new if above the uncommon_trap_if_pattern for the predicate to be promoted + ProjNode* create_new_if_for_predicate(ProjNode* cont_proj); + // Find a good location to insert a predicate + ProjNode* find_predicate_insertion_point(Node* start_c); + // Construct a range check for a predicate if + BoolNode* rc_predicate(Node* ctrl, + int scale, Node* offset, + Node* init, Node* limit, Node* stride, + Node* range); + + //Implementation of the loop predication to promote checks outside the loop + bool loop_predication_impl(IdealLoopTree *loop); + + //Helper function to collect predicate for eliminating the useless ones + void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1); + void eliminate_useless_predicates(); + // Eliminate range-checks and other trip-counter vs loop-invariant tests. void do_range_check( IdealLoopTree *loop, Node_List &old_new ); @@ -906,7 +946,6 @@ const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl); // Helper functions - void register_new_node( Node *n, Node *blk ); Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache ); Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true ); void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true ); @@ -918,6 +957,7 @@ public: void set_created_loop_node() { _created_loop_node = true; } bool created_loop_node() { return _created_loop_node; } + void register_new_node( Node *n, Node *blk ); #ifndef PRODUCT void dump( ) const; --- old/src/share/vm/opto/parse.hpp Thu Jan 7 16:01:55 2010 +++ new/src/share/vm/opto/parse.hpp Thu Jan 7 16:01:55 2010 @@ -427,6 +427,11 @@ } } + // Return true if the parser should add a loop predicate + bool should_add_predicate(int target_bci); + // Insert a loop predicate into the graph + void add_predicate(); + // Note: Intrinsic generation routines may be found in library_call.cpp. // Helper function to setup Ideal Call nodes @@ -488,7 +493,7 @@ void do_ifnull(BoolTest::mask btest, Node* c); void do_if(BoolTest::mask btest, Node* c); - void repush_if_args(); + int repush_if_args(); void adjust_map_after_if(BoolTest::mask btest, Node* c, float prob, Block* path, Block* other_path); IfNode* jump_if_fork_int(Node* a, Node* b, BoolTest::mask mask); --- old/src/share/vm/opto/parse1.cpp Thu Jan 7 16:01:56 2010 +++ new/src/share/vm/opto/parse1.cpp Thu Jan 7 16:01:56 2010 @@ -1378,17 +1378,21 @@ // Parse bytecodes while (!stopped() && !failing()) { iter().next(); - + // Learn the current bci from the iterator: set_parse_bci(iter().cur_bci()); if (bci() == block()->limit()) { + //insert a predicate if it falls through to a loop head block + if(should_add_predicate(bci())){ + add_predicate(); + } // Do not walk into the next block until directed by do_all_blocks. merge(bci()); break; } assert(bci() < block()->limit(), "bci still in block"); - + if (log != NULL) { // Output an optional context marker, to help place actions // that occur during parsing of this BC. If there is no log @@ -2083,6 +2087,37 @@ } } +//------------------------------should_add_predicate-------------------------- +bool Parse::should_add_predicate(int target_bci) { + if (!UseLoopPredicate) return false; + Block* target = successor_for_bci(target_bci); + if (target != NULL && + target->is_loop_head() && + block()->rpo() < target->rpo()) { + return true; + } + return false; +} + +//------------------------------add_predicate--------------------------------- +void Parse::add_predicate() { + assert(UseLoopPredicate,"use only for loop predicate"); + Node *cont = _gvn.intcon(1); + Node* opq = _gvn.transform(new (C, 2) Opaque1Node(C, cont)); + Node *bol = _gvn.transform(new (C, 2) Conv2BNode(opq)); + IfNode* iff = create_and_map_if(control(), bol, PROB_MAX, COUNT_UNKNOWN); + Node* iffalse = _gvn.transform(new (C, 1) IfFalseNode(iff)); + C->add_predicate_opaq(opq); + { + PreserveJVMState pjvms(this); + set_control(iffalse); + uncommon_trap(Deoptimization::Reason_predicate, + Deoptimization::Action_maybe_recompile); + } + Node* iftrue = _gvn.transform(new (C, 1) IfTrueNode(iff)); + set_control(iftrue); +} + #ifndef PRODUCT //------------------------show_parse_info-------------------------------------- void Parse::show_parse_info() { --- old/src/share/vm/opto/parse2.cpp Thu Jan 7 16:01:57 2010 +++ new/src/share/vm/opto/parse2.cpp Thu Jan 7 16:01:57 2010 @@ -278,6 +278,11 @@ if (len < 1) { // If this is a backward branch, add safepoint maybe_add_safepoint(default_dest); + if(should_add_predicate(default_dest)){ + _sp += 1; //set original stack for use by uncommon_trap + add_predicate(); + _sp -= 1; + } merge(default_dest); return; } @@ -324,6 +329,11 @@ if (len < 1) { // If this is a backward branch, add safepoint maybe_add_safepoint(default_dest); + if(should_add_predicate(default_dest)){ + _sp += 1; //set original stack for use by uncommon_trap + add_predicate(); + _sp -= 1; + } merge(default_dest); return; } @@ -731,6 +741,9 @@ push(_gvn.makecon(ret_addr)); // Flow to the jsr. + if(should_add_predicate(jsr_bci)){ + add_predicate(); + } merge(jsr_bci); } @@ -881,7 +894,7 @@ //-------------------------------repush_if_args-------------------------------- // Push arguments of an "if" bytecode back onto the stack by adjusting _sp. -inline void Parse::repush_if_args() { +inline int Parse::repush_if_args() { #ifndef PRODUCT if (PrintOpto && WizardMode) { tty->print("defending against excessive implicit null exceptions on %s @%d in ", @@ -895,6 +908,7 @@ assert(argument(0) != NULL, "must exist"); assert(bc_depth == 1 || argument(1) != NULL, "two must exist"); _sp += bc_depth; + return bc_depth; } //----------------------------------do_ifnull---------------------------------- @@ -954,8 +968,14 @@ // Update method data profile_taken_branch(target_bci); adjust_map_after_if(btest, c, prob, branch_block, next_block); - if (!stopped()) + if (!stopped()) { + if(should_add_predicate(target_bci)){//add a predicate if it branches to a loop + int nargs = repush_if_args();//set original stack for uncommon_trap + add_predicate(); + _sp -= nargs; + } merge(target_bci); + } } } @@ -1076,8 +1096,14 @@ // Update method data profile_taken_branch(target_bci); adjust_map_after_if(taken_btest, c, prob, branch_block, next_block); - if (!stopped()) + if (!stopped()) { + if(should_add_predicate(target_bci)){//add a predicate if it branches to a loop + int nargs = repush_if_args(); //set original stack for the uncommon_trap + add_predicate(); + _sp -= nargs; + } merge(target_bci); + } } } @@ -2080,6 +2106,10 @@ // Update method data profile_taken_branch(target_bci); + //Add loop predicate if it goes to a loop + if(should_add_predicate(target_bci)){ + add_predicate(); + } // Merge the current control into the target basic block merge(target_bci); --- old/src/share/vm/opto/split_if.cpp Thu Jan 7 16:01:58 2010 +++ new/src/share/vm/opto/split_if.cpp Thu Jan 7 16:01:58 2010 @@ -219,6 +219,7 @@ //------------------------------register_new_node------------------------------ void PhaseIdealLoop::register_new_node( Node *n, Node *blk ) { + assert(!n->is_CFG(), "must be data node"); _igvn.register_new_node_with_optimizer(n); set_ctrl(n, blk); IdealLoopTree *loop = get_loop(blk); --- old/src/share/vm/runtime/deoptimization.cpp Thu Jan 7 16:01:59 2010 +++ new/src/share/vm/runtime/deoptimization.cpp Thu Jan 7 16:01:58 2010 @@ -1672,7 +1672,8 @@ "unhandled", "constraint", "div0_check", - "age" + "age", + "predicate" }; const char* Deoptimization::_trap_action_name[Action_LIMIT] = { // Note: Keep this in sync. with enum DeoptAction. --- old/src/share/vm/runtime/deoptimization.hpp Thu Jan 7 16:01:59 2010 +++ new/src/share/vm/runtime/deoptimization.hpp Thu Jan 7 16:01:59 2010 @@ -46,6 +46,7 @@ Reason_constraint, // arbitrary runtime constraint violated Reason_div0_check, // a null_check due to division by zero Reason_age, // nmethod too old; tier threshold reached + Reason_predicate, // compiler generated predicate failed Reason_LIMIT, // Note: Keep this enum in sync. with _trap_reason_name. Reason_RECORDED_LIMIT = Reason_unloaded // some are not recorded per bc