src/share/vm/opto/loopopts.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File hotspot Sdiff src/share/vm/opto

src/share/vm/opto/loopopts.cpp

Print this page
rev 8571 : 8080289: Intermediate writes in a loop not eliminated by optimizer
Summary: Move Stores out of loop (after or before) when possible
Reviewed-by:


 636 #ifndef PRODUCT
 637     if (TraceLoopOpts) {
 638       tty->print("CMOV  ");
 639       r_loop->dump_head();
 640       if (Verbose) {
 641         bol->in(1)->dump(1);
 642         cmov->dump(1);
 643       }
 644     }
 645     if (VerifyLoopOptimizations) verify();
 646 #endif
 647   }
 648 
 649   // The useless CFG diamond will fold up later; see the optimization in
 650   // RegionNode::Ideal.
 651   _igvn._worklist.push(region);
 652 
 653   return iff->in(1);
 654 }
 655 









































































































































































































 656 //------------------------------split_if_with_blocks_pre-----------------------
 657 // Do the real work in a non-recursive function.  Data nodes want to be
 658 // cloned in the pre-order so they can feed each other nicely.
 659 Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
 660   // Cloning these guys is unlikely to win
 661   int n_op = n->Opcode();
 662   if( n_op == Op_MergeMem ) return n;
 663   if( n->is_Proj() ) return n;
 664   // Do not clone-up CmpFXXX variations, as these are always
 665   // followed by a CmpI
 666   if( n->is_Cmp() ) return n;
 667   // Attempt to use a conditional move instead of a phi/branch
 668   if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
 669     Node *cmov = conditional_move( n );
 670     if( cmov ) return cmov;
 671   }
 672   if( n->is_CFG() || n->is_LoadStore() )
 673     return n;
 674   if( n_op == Op_Opaque1 ||     // Opaque nodes cannot be mod'd
 675       n_op == Op_Opaque2 ) {
 676     if( !C->major_progress() )   // If chance of no more loop opts...
 677       _igvn._worklist.push(n);  // maybe we'll remove them
 678     return n;
 679   }
 680 
 681   if( n->is_Con() ) return n;   // No cloning for Con nodes
 682 
 683   Node *n_ctrl = get_ctrl(n);
 684   if( !n_ctrl ) return n;       // Dead node
 685 





 686   // Attempt to remix address expressions for loop invariants
 687   Node *m = remix_address_expressions( n );
 688   if( m ) return m;
 689 
 690   // Determine if the Node has inputs from some local Phi.
 691   // Returns the block to clone thru.
 692   Node *n_blk = has_local_phi_input( n );
 693   if( !n_blk ) return n;

 694   // Do not clone the trip counter through on a CountedLoop
 695   // (messes up the canonical shape).
 696   if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
 697 
 698   // Check for having no control input; not pinned.  Allow
 699   // dominating control.
 700   if( n->in(0) ) {
 701     Node *dom = idom(n_blk);
 702     if( dom_lca( n->in(0), dom ) != n->in(0) )
 703       return n;
 704   }

 705   // Policy: when is it profitable.  You must get more wins than
 706   // policy before it is considered profitable.  Policy is usually 0,
 707   // so 1 win is considered profitable.  Big merges will require big
 708   // cloning, so get a larger policy.
 709   int policy = n_blk->req() >> 2;
 710 
 711   // If the loop is a candidate for range check elimination,
 712   // delay splitting through it's phi until a later loop optimization
 713   if (n_blk->is_CountedLoop()) {
 714     IdealLoopTree *lp = get_loop(n_blk);
 715     if (lp && lp->_rce_candidate) {
 716       return n;
 717     }
 718   }
 719 
 720   // Use same limit as split_if_with_blocks_post
 721   if( C->live_nodes() > 35000 ) return n; // Method too big
 722 
 723   // Split 'n' through the merge point if it is profitable
 724   Node *phi = split_thru_phi( n, n_blk, policy );


1011             }
1012             register_new_node(x, x_ctrl);
1013 
1014             // Some institutional knowledge is needed here: 'x' is
1015             // yanked because if the optimizer runs GVN on it all the
1016             // cloned x's will common up and undo this optimization and
1017             // be forced back in the loop.  This is annoying because it
1018             // makes +VerifyOpto report false-positives on progress.  I
1019             // tried setting control edges on the x's to force them to
1020             // not combine, but the matching gets worried when it tries
1021             // to fold a StoreP and an AddP together (as part of an
1022             // address expression) and the AddP and StoreP have
1023             // different controls.
1024             if (!x->is_Load() && !x->is_DecodeNarrowPtr()) _igvn._worklist.yank(x);
1025           }
1026           _igvn.remove_dead_node(n);
1027         }
1028       }
1029     }
1030   }


1031 
1032   // Check for Opaque2's who's loop has disappeared - who's input is in the
1033   // same loop nest as their output.  Remove 'em, they are no longer useful.
1034   if( n_op == Op_Opaque2 &&
1035       n->in(1) != NULL &&
1036       get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
1037     _igvn.replace_node( n, n->in(1) );
1038   }
1039 }
1040 
1041 //------------------------------split_if_with_blocks---------------------------
1042 // Check for aggressive application of 'split-if' optimization,
1043 // using basic block level info.
1044 void PhaseIdealLoop::split_if_with_blocks( VectorSet &visited, Node_Stack &nstack ) {
1045   Node *n = C->root();
1046   visited.set(n->_idx); // first, mark node as visited
1047   // Do pre-visit work for root
1048   n = split_if_with_blocks_pre( n );
1049   uint cnt = n->outcnt();
1050   uint i   = 0;




 636 #ifndef PRODUCT
 637     if (TraceLoopOpts) {
 638       tty->print("CMOV  ");
 639       r_loop->dump_head();
 640       if (Verbose) {
 641         bol->in(1)->dump(1);
 642         cmov->dump(1);
 643       }
 644     }
 645     if (VerifyLoopOptimizations) verify();
 646 #endif
 647   }
 648 
 649   // The useless CFG diamond will fold up later; see the optimization in
 650   // RegionNode::Ideal.
 651   _igvn._worklist.push(region);
 652 
 653   return iff->in(1);
 654 }
 655 
 656 #ifdef ASSERT
 657 static void enqueue_cfg_uses(Node* m, Unique_Node_List& wq) {
 658   for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
 659     Node* u = m->fast_out(i);
 660     if (u->is_CFG()) {
 661       if (u->Opcode() == Op_NeverBranch) {
 662         u = ((NeverBranchNode*)u)->proj_out(0);
 663         enqueue_cfg_uses(u, wq);
 664       } else {
 665         wq.push(u);
 666       }
 667     }
 668   }
 669 }
 670 #endif
 671 
 672 // Try moving a store out of a loop, right before the loop
 673 Node* PhaseIdealLoop::try_move_store_before_loop(Node* n, Node *n_ctrl) {
 674   // Store has to be first in the loop body
 675   IdealLoopTree *n_loop = get_loop(n_ctrl);
 676   if (n->is_Store() && n_loop != _ltree_root && !n_loop->_irreducible) {
 677     Node* address = n->in(MemNode::Address);
 678     Node* value = n->in(MemNode::ValueIn);
 679     Node* mem = n->in(MemNode::Memory);
 680     IdealLoopTree* address_loop = get_loop(get_ctrl(address));
 681     IdealLoopTree* value_loop = get_loop(get_ctrl(value));
 682 
 683     // - address and value must be loop invariant
 684     // - memory must be a memory Phi for the loop
 685     // - Store must be the only store on this memory slice in the
 686     // loop: if there's another store following this one then value
 687     // written at iteration i by the second store could be overwritten
 688     // at iteration i+n by the first store: it's not safe to move the
 689     // first store out of the loop
 690     // - nothing must observe the Phi memory: it guarantees no read
 691     // before the store and no early exit out of the loop
 692     // With those conditions, we are also guaranteed the store post
 693     // dominates the loop head. Otherwise there would be extra Phi
 694     // involved between the loop's Phi and the store.
 695 
 696     if (!n_loop->is_member(address_loop) &&
 697         !n_loop->is_member(value_loop) &&
 698         mem->is_Phi() && mem->in(0) == n_loop->_head &&
 699         mem->outcnt() == 1 &&
 700         mem->in(LoopNode::LoopBackControl) == n) {
 701 
 702 #ifdef ASSERT
 703       // Check that store's control does post dominate loop entry
 704       bool ctrl_ok = false;
 705       {
 706         // Follow control from loop head until n or we exit the loop
 707         ResourceMark rm;
 708         Unique_Node_List wq;
 709         wq.push(n_loop->_head);
 710         for (uint next = 0; next < wq.size(); ++next) {
 711           Node *m = wq.at(next);
 712           if (m == n->in(0)) {
 713             ctrl_ok = true;
 714             continue;
 715           }
 716           assert(!has_ctrl(m), "should be CFG");
 717           if (!n_loop->is_member(get_loop(m))) {
 718             ctrl_ok = false;
 719             break;
 720           }
 721           enqueue_cfg_uses(m, wq);
 722         }
 723       }
 724       assert(ctrl_ok, "bad control");
 725 #endif
 726           
 727       // move the Store
 728       _igvn.replace_input_of(mem, LoopNode::LoopBackControl, mem);
 729       _igvn.replace_input_of(n, 0, n_loop->_head->in(LoopNode::EntryControl));
 730       _igvn.replace_input_of(n, MemNode::Memory, mem->in(LoopNode::EntryControl));
 731       // Disconnect the phi now. An empty phi can confuse other
 732       // optimizations in this pass of loop opts.
 733       _igvn.replace_node(mem, mem->in(LoopNode::EntryControl));
 734       n_loop->_body.yank(mem);
 735       
 736       IdealLoopTree* new_loop = get_loop(n->in(0));
 737       set_ctrl_and_loop(n, n->in(0));
 738         
 739       return n;
 740     }
 741   }
 742   return NULL;
 743 }
 744 
 745 // Try moving a store out of a loop, right after the loop
 746 void PhaseIdealLoop::try_move_store_after_loop(Node* n) {
 747   if (n->is_Store()) {
 748     assert(n->in(0), "store should have control set");
 749     Node *n_ctrl = get_ctrl(n);
 750     IdealLoopTree *n_loop = get_loop(n_ctrl);
 751     // Store must be in a loop
 752     if (n_loop != _ltree_root && !n_loop->_irreducible) {
 753       Node* address = n->in(MemNode::Address);
 754       Node* value = n->in(MemNode::ValueIn);
 755       IdealLoopTree* address_loop = get_loop(get_ctrl(address));
 756       // address must be loop invariant
 757       if (!n_loop->is_member(address_loop)) {
 758         assert(n_loop != address_loop, "address is loop varying");
 759         // Store must be last on this memory slice in the loop and
 760         // nothing in the loop must observe it
 761         Node* phi = NULL;
 762         for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
 763           Node* u = n->fast_out(i);
 764           if (has_ctrl(u)) { // control use?
 765             IdealLoopTree *u_loop = get_loop(get_ctrl(u));
 766             if (!n_loop->is_member(u_loop)) {
 767               continue;
 768             }
 769             if (u->is_Phi() && u->in(0) == n_loop->_head) {
 770               assert(_igvn.type(u) == Type::MEMORY, "bad phi");
 771               assert(phi == NULL, "already found");
 772               phi = u;
 773               continue;
 774             }
 775           }
 776           phi = NULL;
 777           break;
 778         }
 779         if (phi != NULL) {
 780           // Nothing in the loop before the store (next iteration)
 781           // must observe the stored value
 782           bool mem_ok = true;
 783           {
 784             ResourceMark rm;
 785             Unique_Node_List wq;
 786             wq.push(phi);
 787             for (uint next = 0; next < wq.size() && mem_ok; ++next) {
 788               Node *m = wq.at(next);
 789               for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax && mem_ok; i++) {
 790                 Node* u = m->fast_out(i);
 791                 if (u->is_Store() || u->is_Phi()) {
 792                   if (u != n) {
 793                     wq.push(u);
 794                     mem_ok = (wq.size() <= 10);
 795                   }
 796                 } else {
 797                   mem_ok = false;
 798                   break;
 799                 }
 800               }
 801             }
 802           }
 803           if (mem_ok) {
 804             // Move the Store out of the loop creating clones along
 805             // all paths out of the loop that observe the stored value
 806             _igvn.rehash_node_delayed(phi);
 807             int count = phi->replace_edge(n, n->in(MemNode::Memory));
 808             assert(count > 0, "inconsistent phi");
 809             for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
 810               Node* u = n->fast_out(i);
 811               Node* c = get_ctrl(u);
 812 
 813               if (u->is_Phi()) {
 814                 c = u->in(0)->in(u->find_edge(n));
 815               }
 816               IdealLoopTree *u_loop = get_loop(c);
 817               assert (!n_loop->is_member(u_loop), "only the phi should have been a use in the loop");
 818               while(true) {
 819                 Node* next_c = find_non_split_ctrl(idom(c));
 820                 if (n_loop->is_member(get_loop(next_c))) {
 821                   break;
 822                 }
 823                 c = next_c;
 824               }
 825 
 826               Node* st = n->clone();
 827               st->set_req(0, c);
 828               _igvn.register_new_node_with_optimizer(st);
 829               
 830               set_ctrl(st, c);
 831               IdealLoopTree* new_loop = get_loop(c);
 832               assert(new_loop->_child != NULL, "");
 833               assert(new_loop != n_loop, "should be moved out of loop");
 834               if (new_loop->_child == NULL) new_loop->_body.push(st);
 835 
 836               _igvn.replace_input_of(u, u->find_edge(n), st);
 837               --imax;
 838               --i;
 839             }
 840 
 841 
 842             assert(n->outcnt() == 0, "all uses should be gone");
 843             _igvn.replace_input_of(n, MemNode::Memory, C->top());
 844             // Disconnect the phi now. An empty phi can confuse other
 845             // optimizations in this pass of loop opts..
 846             if (phi->in(LoopNode::LoopBackControl) == phi) {
 847               _igvn.replace_node(phi, phi->in(LoopNode::EntryControl));
 848               n_loop->_body.yank(phi);
 849             }
 850           }
 851         }
 852       }
 853     }
 854   }
 855 }
 856 
 857 //------------------------------split_if_with_blocks_pre-----------------------
 858 // Do the real work in a non-recursive function.  Data nodes want to be
 859 // cloned in the pre-order so they can feed each other nicely.
 860 Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
 861   // Cloning these guys is unlikely to win
 862   int n_op = n->Opcode();
 863   if( n_op == Op_MergeMem ) return n;
 864   if( n->is_Proj() ) return n;
 865   // Do not clone-up CmpFXXX variations, as these are always
 866   // followed by a CmpI
 867   if( n->is_Cmp() ) return n;
 868   // Attempt to use a conditional move instead of a phi/branch
 869   if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
 870     Node *cmov = conditional_move( n );
 871     if( cmov ) return cmov;
 872   }
 873   if( n->is_CFG() || n->is_LoadStore() )
 874     return n;
 875   if( n_op == Op_Opaque1 ||     // Opaque nodes cannot be mod'd
 876       n_op == Op_Opaque2 ) {
 877     if( !C->major_progress() )   // If chance of no more loop opts...
 878       _igvn._worklist.push(n);  // maybe we'll remove them
 879     return n;
 880   }
 881 
 882   if( n->is_Con() ) return n;   // No cloning for Con nodes
 883 
 884   Node *n_ctrl = get_ctrl(n);
 885   if( !n_ctrl ) return n;       // Dead node
 886 
 887   Node* res = try_move_store_before_loop(n, n_ctrl);
 888   if (res != NULL) {
 889     return n;
 890   }
 891 
 892   // Attempt to remix address expressions for loop invariants
 893   Node *m = remix_address_expressions( n );
 894   if( m ) return m;
 895 
 896   // Determine if the Node has inputs from some local Phi.
 897   // Returns the block to clone thru.
 898   Node *n_blk = has_local_phi_input( n );
 899   if( !n_blk ) return n;
 900 
 901   // Do not clone the trip counter through on a CountedLoop
 902   // (messes up the canonical shape).
 903   if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
 904 
 905   // Check for having no control input; not pinned.  Allow
 906   // dominating control.
 907   if (n->in(0)) {
 908     Node *dom = idom(n_blk);
 909     if (dom_lca(n->in(0), dom) != n->in(0)) {
 910       return n;
 911     }
 912   }
 913   // Policy: when is it profitable.  You must get more wins than
 914   // policy before it is considered profitable.  Policy is usually 0,
 915   // so 1 win is considered profitable.  Big merges will require big
 916   // cloning, so get a larger policy.
 917   int policy = n_blk->req() >> 2;
 918 
 919   // If the loop is a candidate for range check elimination,
 920   // delay splitting through it's phi until a later loop optimization
 921   if (n_blk->is_CountedLoop()) {
 922     IdealLoopTree *lp = get_loop(n_blk);
 923     if (lp && lp->_rce_candidate) {
 924       return n;
 925     }
 926   }
 927 
 928   // Use same limit as split_if_with_blocks_post
 929   if( C->live_nodes() > 35000 ) return n; // Method too big
 930 
 931   // Split 'n' through the merge point if it is profitable
 932   Node *phi = split_thru_phi( n, n_blk, policy );


1219             }
1220             register_new_node(x, x_ctrl);
1221 
1222             // Some institutional knowledge is needed here: 'x' is
1223             // yanked because if the optimizer runs GVN on it all the
1224             // cloned x's will common up and undo this optimization and
1225             // be forced back in the loop.  This is annoying because it
1226             // makes +VerifyOpto report false-positives on progress.  I
1227             // tried setting control edges on the x's to force them to
1228             // not combine, but the matching gets worried when it tries
1229             // to fold a StoreP and an AddP together (as part of an
1230             // address expression) and the AddP and StoreP have
1231             // different controls.
1232             if (!x->is_Load() && !x->is_DecodeNarrowPtr()) _igvn._worklist.yank(x);
1233           }
1234           _igvn.remove_dead_node(n);
1235         }
1236       }
1237     }
1238   }
1239 
1240   try_move_store_after_loop(n);
1241 
1242   // Check for Opaque2's who's loop has disappeared - who's input is in the
1243   // same loop nest as their output.  Remove 'em, they are no longer useful.
1244   if( n_op == Op_Opaque2 &&
1245       n->in(1) != NULL &&
1246       get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
1247     _igvn.replace_node( n, n->in(1) );
1248   }
1249 }
1250 
1251 //------------------------------split_if_with_blocks---------------------------
1252 // Check for aggressive application of 'split-if' optimization,
1253 // using basic block level info.
1254 void PhaseIdealLoop::split_if_with_blocks( VectorSet &visited, Node_Stack &nstack ) {
1255   Node *n = C->root();
1256   visited.set(n->_idx); // first, mark node as visited
1257   // Do pre-visit work for root
1258   n = split_if_with_blocks_pre( n );
1259   uint cnt = n->outcnt();
1260   uint i   = 0;


src/share/vm/opto/loopopts.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File