1 #ifdef USE_PRAGMA_IDENT_SRC
2 #pragma ident "@(#)loopopts.cpp 1.222 08/11/24 12:23:09 JVM"
3 #endif
4 /*
5 * Copyright 1999-2006 Sun Microsystems, Inc. All Rights Reserved.
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7 *
8 * This code is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License version 2 only, as
10 * published by the Free Software Foundation.
11 *
12 * This code is distributed in the hope that it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 * version 2 for more details (a copy is included in the LICENSE file that
16 * accompanied this code).
17 *
18 * You should have received a copy of the GNU General Public License version
19 * 2 along with this work; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
24 * have any questions.
25 *
26 */
27
28 #include "incls/_precompiled.incl"
29 #include "incls/_loopopts.cpp.incl"
30
31 //=============================================================================
32 //------------------------------split_thru_phi---------------------------------
33 // Split Node 'n' through merge point if there is enough win.
34 Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
35 if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::BOTTOM) {
36 // ConvI2L may have type information on it which is unsafe to push up
37 // so disable this for now
38 return NULL;
39 }
40 int wins = 0;
41 assert( !n->is_CFG(), "" );
42 assert( region->is_Region(), "" );
43 Node *phi = new (C, region->req()) PhiNode( region, n->bottom_type() );
44 uint old_unique = C->unique();
45 for( uint i = 1; i < region->req(); i++ ) {
46 Node *x;
47 Node* the_clone = NULL;
48 if( region->in(i) == C->top() ) {
49 x = C->top(); // Dead path? Use a dead data op
50 } else {
51 x = n->clone(); // Else clone up the data op
52 the_clone = x; // Remember for possible deletion.
53 // Alter data node to use pre-phi inputs
54 if( n->in(0) == region )
55 x->set_req( 0, region->in(i) );
56 for( uint j = 1; j < n->req(); j++ ) {
57 Node *in = n->in(j);
58 if( in->is_Phi() && in->in(0) == region )
59 x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
60 }
61 }
62 // Check for a 'win' on some paths
63 const Type *t = x->Value(&_igvn);
71 // the PhiNode may cause the resulting node to migrate back to a previous
72 // loop iteration.
73 if( singleton && t == Type::TOP ) {
74 // Is_Loop() == false does not confirm the absence of a loop (e.g., an
75 // irreducible loop may not be indicated by an affirmative is_Loop());
76 // therefore, the only top we can split thru a phi is on a backedge of
77 // a loop.
78 singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
79 }
80
81 if( singleton ) {
82 wins++;
83 x = ((PhaseGVN&)_igvn).makecon(t);
84 } else {
85 // We now call Identity to try to simplify the cloned node.
86 // Note that some Identity methods call phase->type(this).
87 // Make sure that the type array is big enough for
88 // our new node, even though we may throw the node away.
89 // (Note: This tweaking with igvn only works because x is a new node.)
90 _igvn.set_type(x, t);
91 Node *y = x->Identity(&_igvn);
92 if( y != x ) {
93 wins++;
94 x = y;
95 } else {
96 y = _igvn.hash_find(x);
97 if( y ) {
98 wins++;
99 x = y;
100 } else {
101 // Else x is a new node we are keeping
102 // We do not need register_new_node_with_optimizer
103 // because set_type has already been called.
104 _igvn._worklist.push(x);
105 }
106 }
107 }
108 if (x != the_clone && the_clone != NULL)
109 _igvn.remove_dead_node(the_clone);
110 phi->set_req( i, x );
426 Node *rp = region->in(2);
427 if( !lp || !rp ) return NULL;
428 Node *lp_c = lp->in(0);
429 if( lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If() ) return NULL;
430 IfNode *iff = lp_c->as_If();
431
432 // Check for highly predictable branch. No point in CMOV'ing if
433 // we are going to predict accurately all the time.
434 // %%% This hides patterns produced by utility methods like Math.min.
435 if( iff->_prob < PROB_UNLIKELY_MAG(3) ||
436 iff->_prob > PROB_LIKELY_MAG(3) )
437 return NULL;
438
439 // Check for ops pinned in an arm of the diamond.
440 // Can't remove the control flow in this case
441 if( lp->outcnt() > 1 ) return NULL;
442 if( rp->outcnt() > 1 ) return NULL;
443
444 // Check profitability
445 int cost = 0;
446 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
447 Node *out = region->fast_out(i);
448 if( !out->is_Phi() ) continue; // Ignore other control edges, etc
449 PhiNode* phi = out->as_Phi();
450 switch (phi->type()->basic_type()) {
451 case T_LONG:
452 cost++; // Probably encodes as 2 CMOV's
453 case T_INT: // These all CMOV fine
454 case T_FLOAT:
455 case T_DOUBLE:
456 case T_ADDRESS: // (RawPtr)
457 cost++;
458 break;
459 case T_OBJECT: { // Base oops are OK, but not derived oops
460 const TypeOopPtr *tp = phi->type()->isa_oopptr();
461 // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
462 // CMOVE'd derived pointer? It's a CMOVE'd derived base. Thus
463 // CMOVE'ing a derived pointer requires we also CMOVE the base. If we
464 // have a Phi for the base here that we convert to a CMOVE all is well
465 // and good. But if the base is dead, we'll not make a CMOVE. Later
466 // the allocator will have to produce a base by creating a CMOVE of the
467 // relevant bases. This puts the allocator in the business of
468 // manufacturing expensive instructions, generally a bad plan.
469 // Just Say No to Conditionally-Moved Derived Pointers.
470 if( tp && tp->offset() != 0 )
471 return NULL;
472 cost++;
473 break;
474 }
475 default:
476 return NULL; // In particular, can't do memory or I/O
477 }
478 // Add in cost any speculative ops
479 for( uint j = 1; j < region->req(); j++ ) {
480 Node *proj = region->in(j);
481 Node *inp = phi->in(j);
482 if (get_ctrl(inp) == proj) { // Found local op
483 cost++;
484 // Check for a chain of dependent ops; these will all become
485 // speculative in a CMOV.
486 for( uint k = 1; k < inp->req(); k++ )
487 if (get_ctrl(inp->in(k)) == proj)
488 return NULL; // Too much speculative goo
489 }
490 }
491 // See if the Phi is used by a Cmp. This will likely Split-If, a
492 // higher-payoff operation.
493 for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
494 Node* use = phi->fast_out(k);
495 if( use->is_Cmp() )
496 return NULL;
497 }
498 }
499 if( cost >= ConditionalMoveLimit ) return NULL; // Too much goo
500
501 // --------------
502 // Now replace all Phis with CMOV's
503 Node *cmov_ctrl = iff->in(0);
504 uint flip = (lp->Opcode() == Op_IfTrue);
505 while( 1 ) {
506 PhiNode* phi = NULL;
507 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
508 Node *out = region->fast_out(i);
509 if (out->is_Phi()) {
510 phi = out->as_Phi();
511 break;
512 }
513 }
514 if (phi == NULL) break;
515 #ifndef PRODUCT
516 if( PrintOpto && VerifyLoopOptimizations ) tty->print_cr("CMOV");
517 #endif
518 // Move speculative ops
519 for( uint j = 1; j < region->req(); j++ ) {
544
545 return iff->in(1);
546 }
547
548 //------------------------------split_if_with_blocks_pre-----------------------
549 // Do the real work in a non-recursive function. Data nodes want to be
550 // cloned in the pre-order so they can feed each other nicely.
551 Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
552 // Cloning these guys is unlikely to win
553 int n_op = n->Opcode();
554 if( n_op == Op_MergeMem ) return n;
555 if( n->is_Proj() ) return n;
556 // Do not clone-up CmpFXXX variations, as these are always
557 // followed by a CmpI
558 if( n->is_Cmp() ) return n;
559 // Attempt to use a conditional move instead of a phi/branch
560 if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
561 Node *cmov = conditional_move( n );
562 if( cmov ) return cmov;
563 }
564 if( n->is_CFG() || n_op == Op_StorePConditional || n_op == Op_StoreLConditional || n_op == Op_CompareAndSwapI || n_op == Op_CompareAndSwapL ||n_op == Op_CompareAndSwapP) return n;
565 if( n_op == Op_Opaque1 || // Opaque nodes cannot be mod'd
566 n_op == Op_Opaque2 ) {
567 if( !C->major_progress() ) // If chance of no more loop opts...
568 _igvn._worklist.push(n); // maybe we'll remove them
569 return n;
570 }
571
572 if( n->is_Con() ) return n; // No cloning for Con nodes
573
574 Node *n_ctrl = get_ctrl(n);
575 if( !n_ctrl ) return n; // Dead node
576
577 // Attempt to remix address expressions for loop invariants
578 Node *m = remix_address_expressions( n );
579 if( m ) return m;
580
581 // Determine if the Node has inputs from some local Phi.
582 // Returns the block to clone thru.
583 Node *n_blk = has_local_phi_input( n );
584 if( !n_blk ) return n;
893 // Don't allow the control input to be a CFG splitting node.
894 // Such nodes should only have ProjNodes as outs, e.g. IfNode
895 // should only have IfTrueNode and IfFalseNode (4985384).
896 x_ctrl = find_non_split_ctrl(x_ctrl);
897 assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
898
899 x->set_req(0, x_ctrl);
900 }
901 register_new_node(x, x_ctrl);
902
903 // Some institutional knowledge is needed here: 'x' is
904 // yanked because if the optimizer runs GVN on it all the
905 // cloned x's will common up and undo this optimization and
906 // be forced back in the loop. This is annoying because it
907 // makes +VerifyOpto report false-positives on progress. I
908 // tried setting control edges on the x's to force them to
909 // not combine, but the matching gets worried when it tries
910 // to fold a StoreP and an AddP together (as part of an
911 // address expression) and the AddP and StoreP have
912 // different controls.
913 if( !x->is_Load() ) _igvn._worklist.yank(x);
914 }
915 _igvn.remove_dead_node(n);
916 }
917 }
918 }
919 }
920
921 // Check for Opaque2's who's loop has disappeared - who's input is in the
922 // same loop nest as their output. Remove 'em, they are no longer useful.
923 if( n_op == Op_Opaque2 &&
924 n->in(1) != NULL &&
925 get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
926 _igvn.add_users_to_worklist(n);
927 _igvn.hash_delete(n);
928 _igvn.subsume_node( n, n->in(1) );
929 }
930 }
931
932 //------------------------------split_if_with_blocks---------------------------
933 // Check for aggressive application of 'split-if' optimization,
1857 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1858 Node* use = n->fast_out(j);
1859 if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
1860 worklist.push(use);
1861 }
1862 }
1863 while( worklist.size() ) {
1864 Node *use = worklist.pop();
1865 if (!has_node(use) || use->in(0) == C->top()) continue;
1866 uint j;
1867 for (j = 0; j < use->req(); j++) {
1868 if (use->in(j) == n) break;
1869 }
1870 assert(j < use->req(), "must be there");
1871
1872 // clone "n" and insert it between the inputs of "n" and the use outside the loop
1873 Node* n_clone = n->clone();
1874 _igvn.hash_delete(use);
1875 use->set_req(j, n_clone);
1876 _igvn._worklist.push(use);
1877 if (!use->is_Phi()) {
1878 Node* use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
1879 set_ctrl(n_clone, use_c);
1880 assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
1881 get_loop(use_c)->_body.push(n_clone);
1882 } else {
1883 // Use in a phi is considered a use in the associated predecessor block
1884 Node *prevbb = use->in(0)->in(j);
1885 set_ctrl(n_clone, prevbb);
1886 assert(!loop->is_member(get_loop(prevbb)), "should be outside loop");
1887 get_loop(prevbb)->_body.push(n_clone);
1888 }
1889 _igvn.register_new_node_with_optimizer(n_clone);
1890 #if !defined(PRODUCT)
1891 if (TracePartialPeeling) {
1892 tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
1893 }
1894 #endif
1895 }
1896 }
1897
1898
1899 //------------------------------ clone_for_special_use_inside_loop -------------------------------------
1900 // clone "n" for special uses that are in the not_peeled region.
1901 // If these def-uses occur in separate blocks, the code generator
1902 // marks the method as not compilable. For example, if a "BoolNode"
1903 // is in a different basic block than the "IfNode" that uses it, then
1904 // the compilation is aborted in the code generator.
1905 void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
1906 VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
1907 if (n->is_Phi() || n->is_Load()) {
1908 return;
2224 // : | v v |
2225 // : | false true |
2226 // : | | | |
2227 // : | v stmt2 |
2228 // : | exitB: | |
2229 // : | stmt4 v |
2230 // : | ifA orig |
2231 // : | / \ |
2232 // : | / \ |
2233 // : | v v |
2234 // : | false true |
2235 // : | / \ |
2236 // : v v -----+
2237 // RegionA
2238 // |
2239 // v
2240 // exitA
2241 //
2242 bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
2243
2244 LoopNode *head = loop->_head->as_Loop();
2245
2246 if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
2247 return false;
2248 }
2249
2250 // Check for complex exit control
2251 for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
2252 Node *n = loop->_body.at(ii);
2253 int opc = n->Opcode();
2254 if (n->is_Call() ||
2255 opc == Op_Catch ||
2256 opc == Op_CatchProj ||
2257 opc == Op_Jump ||
2258 opc == Op_JumpProj) {
2259 #if !defined(PRODUCT)
2260 if (TracePartialPeeling) {
2261 tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
2262 }
2263 #endif
2624 //------------------------------reorg_offsets----------------------------------
2625 // Reorganize offset computations to lower register pressure. Mostly
2626 // prevent loop-fallout uses of the pre-incremented trip counter (which are
2627 // then alive with the post-incremented trip counter forcing an extra
2628 // register move)
2629 void PhaseIdealLoop::reorg_offsets( IdealLoopTree *loop ) {
2630
2631 CountedLoopNode *cl = loop->_head->as_CountedLoop();
2632 CountedLoopEndNode *cle = cl->loopexit();
2633 if( !cle ) return; // The occasional dead loop
2634 // Find loop exit control
2635 Node *exit = cle->proj_out(false);
2636 assert( exit->Opcode() == Op_IfFalse, "" );
2637
2638 // Check for the special case of folks using the pre-incremented
2639 // trip-counter on the fall-out path (forces the pre-incremented
2640 // and post-incremented trip counter to be live at the same time).
2641 // Fix this by adjusting to use the post-increment trip counter.
2642 Node *phi = cl->phi();
2643 if( !phi ) return; // Dead infinite loop
2644 bool progress = true;
2645 while (progress) {
2646 progress = false;
2647 for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
2648 Node* use = phi->fast_out(i); // User of trip-counter
2649 if (!has_ctrl(use)) continue;
2650 Node *u_ctrl = get_ctrl(use);
2651 if( use->is_Phi() ) {
2652 u_ctrl = NULL;
2653 for( uint j = 1; j < use->req(); j++ )
2654 if( use->in(j) == phi )
2655 u_ctrl = dom_lca( u_ctrl, use->in(0)->in(j) );
2656 }
2657 IdealLoopTree *u_loop = get_loop(u_ctrl);
2658 // Look for loop-invariant use
2659 if( u_loop == loop ) continue;
2660 if( loop->is_member( u_loop ) ) continue;
2661 // Check that use is live out the bottom. Assuming the trip-counter
2662 // update is right at the bottom, uses of of the loop middle are ok.
2663 if( dom_lca( exit, u_ctrl ) != exit ) continue;
2664 // protect against stride not being a constant
2665 if( !cle->stride_is_con() ) continue;
2666 // Hit! Refactor use to use the post-incremented tripcounter.
2667 // Compute a post-increment tripcounter.
2668 Node *opaq = new (C, 2) Opaque2Node( cle->incr() );
2669 register_new_node( opaq, u_ctrl );
2670 Node *neg_stride = _igvn.intcon(-cle->stride_con());
2671 set_ctrl(neg_stride, C->root());
2672 Node *post = new (C, 3) AddINode( opaq, neg_stride);
2673 register_new_node( post, u_ctrl );
2674 _igvn.hash_delete(use);
2675 _igvn._worklist.push(use);
2676 for( uint j = 1; j < use->req(); j++ )
2677 if( use->in(j) == phi )
2678 use->set_req(j, post);
2679 // Since DU info changed, rerun loop
2680 progress = true;
2681 break;
2682 }
2683 }
2684
2685 }
2686
|
1 #ifdef USE_PRAGMA_IDENT_SRC
2 #pragma ident "@(#)loopopts.cpp 1.222 08/11/24 12:23:09 JVM"
3 #endif
4 /*
5 * Copyright 1999-2008 Sun Microsystems, Inc. All Rights Reserved.
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7 *
8 * This code is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License version 2 only, as
10 * published by the Free Software Foundation.
11 *
12 * This code is distributed in the hope that it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 * version 2 for more details (a copy is included in the LICENSE file that
16 * accompanied this code).
17 *
18 * You should have received a copy of the GNU General Public License version
19 * 2 along with this work; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
24 * have any questions.
25 *
26 */
27
28 #include "incls/_precompiled.incl"
29 #include "incls/_loopopts.cpp.incl"
30
31 //=============================================================================
32 //------------------------------split_thru_phi---------------------------------
33 // Split Node 'n' through merge point if there is enough win.
34 Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
35 if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) {
36 // ConvI2L may have type information on it which is unsafe to push up
37 // so disable this for now
38 return NULL;
39 }
40 int wins = 0;
41 assert( !n->is_CFG(), "" );
42 assert( region->is_Region(), "" );
43
44 const Type* type = n->bottom_type();
45 const TypeOopPtr *t_oop = _igvn.type(n)->isa_oopptr();
46 Node *phi;
47 if( t_oop != NULL && t_oop->is_known_instance_field() ) {
48 int iid = t_oop->instance_id();
49 int index = C->get_alias_index(t_oop);
50 int offset = t_oop->offset();
51 phi = new (C,region->req()) PhiNode(region, type, NULL, iid, index, offset);
52 } else {
53 phi = new (C,region->req()) PhiNode(region, type);
54 }
55 uint old_unique = C->unique();
56 for( uint i = 1; i < region->req(); i++ ) {
57 Node *x;
58 Node* the_clone = NULL;
59 if( region->in(i) == C->top() ) {
60 x = C->top(); // Dead path? Use a dead data op
61 } else {
62 x = n->clone(); // Else clone up the data op
63 the_clone = x; // Remember for possible deletion.
64 // Alter data node to use pre-phi inputs
65 if( n->in(0) == region )
66 x->set_req( 0, region->in(i) );
67 for( uint j = 1; j < n->req(); j++ ) {
68 Node *in = n->in(j);
69 if( in->is_Phi() && in->in(0) == region )
70 x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
71 }
72 }
73 // Check for a 'win' on some paths
74 const Type *t = x->Value(&_igvn);
82 // the PhiNode may cause the resulting node to migrate back to a previous
83 // loop iteration.
84 if( singleton && t == Type::TOP ) {
85 // Is_Loop() == false does not confirm the absence of a loop (e.g., an
86 // irreducible loop may not be indicated by an affirmative is_Loop());
87 // therefore, the only top we can split thru a phi is on a backedge of
88 // a loop.
89 singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
90 }
91
92 if( singleton ) {
93 wins++;
94 x = ((PhaseGVN&)_igvn).makecon(t);
95 } else {
96 // We now call Identity to try to simplify the cloned node.
97 // Note that some Identity methods call phase->type(this).
98 // Make sure that the type array is big enough for
99 // our new node, even though we may throw the node away.
100 // (Note: This tweaking with igvn only works because x is a new node.)
101 _igvn.set_type(x, t);
102 // If x is a TypeNode, capture any more-precise type permanently into Node
103 // othewise it will be not updated during igvn->transform since
104 // igvn->type(x) is set to x->Value() already.
105 x->raise_bottom_type(t);
106 Node *y = x->Identity(&_igvn);
107 if( y != x ) {
108 wins++;
109 x = y;
110 } else {
111 y = _igvn.hash_find(x);
112 if( y ) {
113 wins++;
114 x = y;
115 } else {
116 // Else x is a new node we are keeping
117 // We do not need register_new_node_with_optimizer
118 // because set_type has already been called.
119 _igvn._worklist.push(x);
120 }
121 }
122 }
123 if (x != the_clone && the_clone != NULL)
124 _igvn.remove_dead_node(the_clone);
125 phi->set_req( i, x );
441 Node *rp = region->in(2);
442 if( !lp || !rp ) return NULL;
443 Node *lp_c = lp->in(0);
444 if( lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If() ) return NULL;
445 IfNode *iff = lp_c->as_If();
446
447 // Check for highly predictable branch. No point in CMOV'ing if
448 // we are going to predict accurately all the time.
449 // %%% This hides patterns produced by utility methods like Math.min.
450 if( iff->_prob < PROB_UNLIKELY_MAG(3) ||
451 iff->_prob > PROB_LIKELY_MAG(3) )
452 return NULL;
453
454 // Check for ops pinned in an arm of the diamond.
455 // Can't remove the control flow in this case
456 if( lp->outcnt() > 1 ) return NULL;
457 if( rp->outcnt() > 1 ) return NULL;
458
459 // Check profitability
460 int cost = 0;
461 int phis = 0;
462 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
463 Node *out = region->fast_out(i);
464 if( !out->is_Phi() ) continue; // Ignore other control edges, etc
465 phis++;
466 PhiNode* phi = out->as_Phi();
467 switch (phi->type()->basic_type()) {
468 case T_LONG:
469 cost++; // Probably encodes as 2 CMOV's
470 case T_INT: // These all CMOV fine
471 case T_FLOAT:
472 case T_DOUBLE:
473 case T_ADDRESS: // (RawPtr)
474 cost++;
475 break;
476 case T_NARROWOOP: // Fall through
477 case T_OBJECT: { // Base oops are OK, but not derived oops
478 const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr();
479 // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
480 // CMOVE'd derived pointer? It's a CMOVE'd derived base. Thus
481 // CMOVE'ing a derived pointer requires we also CMOVE the base. If we
482 // have a Phi for the base here that we convert to a CMOVE all is well
483 // and good. But if the base is dead, we'll not make a CMOVE. Later
484 // the allocator will have to produce a base by creating a CMOVE of the
485 // relevant bases. This puts the allocator in the business of
486 // manufacturing expensive instructions, generally a bad plan.
487 // Just Say No to Conditionally-Moved Derived Pointers.
488 if( tp && tp->offset() != 0 )
489 return NULL;
490 cost++;
491 break;
492 }
493 default:
494 return NULL; // In particular, can't do memory or I/O
495 }
496 // Add in cost any speculative ops
497 for( uint j = 1; j < region->req(); j++ ) {
498 Node *proj = region->in(j);
499 Node *inp = phi->in(j);
500 if (get_ctrl(inp) == proj) { // Found local op
501 cost++;
502 // Check for a chain of dependent ops; these will all become
503 // speculative in a CMOV.
504 for( uint k = 1; k < inp->req(); k++ )
505 if (get_ctrl(inp->in(k)) == proj)
506 return NULL; // Too much speculative goo
507 }
508 }
509 // See if the Phi is used by a Cmp or Narrow oop Decode/Encode.
510 // This will likely Split-If, a higher-payoff operation.
511 for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
512 Node* use = phi->fast_out(k);
513 if( use->is_Cmp() || use->is_DecodeN() || use->is_EncodeP() )
514 return NULL;
515 }
516 }
517 if( cost >= ConditionalMoveLimit ) return NULL; // Too much goo
518 Node* bol = iff->in(1);
519 assert( bol->Opcode() == Op_Bool, "" );
520 int cmp_op = bol->in(1)->Opcode();
521 // It is expensive to generate flags from a float compare.
522 // Avoid duplicated float compare.
523 if( phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
524
525 // --------------
526 // Now replace all Phis with CMOV's
527 Node *cmov_ctrl = iff->in(0);
528 uint flip = (lp->Opcode() == Op_IfTrue);
529 while( 1 ) {
530 PhiNode* phi = NULL;
531 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
532 Node *out = region->fast_out(i);
533 if (out->is_Phi()) {
534 phi = out->as_Phi();
535 break;
536 }
537 }
538 if (phi == NULL) break;
539 #ifndef PRODUCT
540 if( PrintOpto && VerifyLoopOptimizations ) tty->print_cr("CMOV");
541 #endif
542 // Move speculative ops
543 for( uint j = 1; j < region->req(); j++ ) {
568
569 return iff->in(1);
570 }
571
572 //------------------------------split_if_with_blocks_pre-----------------------
573 // Do the real work in a non-recursive function. Data nodes want to be
574 // cloned in the pre-order so they can feed each other nicely.
575 Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
576 // Cloning these guys is unlikely to win
577 int n_op = n->Opcode();
578 if( n_op == Op_MergeMem ) return n;
579 if( n->is_Proj() ) return n;
580 // Do not clone-up CmpFXXX variations, as these are always
581 // followed by a CmpI
582 if( n->is_Cmp() ) return n;
583 // Attempt to use a conditional move instead of a phi/branch
584 if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
585 Node *cmov = conditional_move( n );
586 if( cmov ) return cmov;
587 }
588 if( n->is_CFG() || n->is_LoadStore() )
589 return n;
590 if( n_op == Op_Opaque1 || // Opaque nodes cannot be mod'd
591 n_op == Op_Opaque2 ) {
592 if( !C->major_progress() ) // If chance of no more loop opts...
593 _igvn._worklist.push(n); // maybe we'll remove them
594 return n;
595 }
596
597 if( n->is_Con() ) return n; // No cloning for Con nodes
598
599 Node *n_ctrl = get_ctrl(n);
600 if( !n_ctrl ) return n; // Dead node
601
602 // Attempt to remix address expressions for loop invariants
603 Node *m = remix_address_expressions( n );
604 if( m ) return m;
605
606 // Determine if the Node has inputs from some local Phi.
607 // Returns the block to clone thru.
608 Node *n_blk = has_local_phi_input( n );
609 if( !n_blk ) return n;
918 // Don't allow the control input to be a CFG splitting node.
919 // Such nodes should only have ProjNodes as outs, e.g. IfNode
920 // should only have IfTrueNode and IfFalseNode (4985384).
921 x_ctrl = find_non_split_ctrl(x_ctrl);
922 assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
923
924 x->set_req(0, x_ctrl);
925 }
926 register_new_node(x, x_ctrl);
927
928 // Some institutional knowledge is needed here: 'x' is
929 // yanked because if the optimizer runs GVN on it all the
930 // cloned x's will common up and undo this optimization and
931 // be forced back in the loop. This is annoying because it
932 // makes +VerifyOpto report false-positives on progress. I
933 // tried setting control edges on the x's to force them to
934 // not combine, but the matching gets worried when it tries
935 // to fold a StoreP and an AddP together (as part of an
936 // address expression) and the AddP and StoreP have
937 // different controls.
938 if( !x->is_Load() && !x->is_DecodeN() ) _igvn._worklist.yank(x);
939 }
940 _igvn.remove_dead_node(n);
941 }
942 }
943 }
944 }
945
946 // Check for Opaque2's who's loop has disappeared - who's input is in the
947 // same loop nest as their output. Remove 'em, they are no longer useful.
948 if( n_op == Op_Opaque2 &&
949 n->in(1) != NULL &&
950 get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
951 _igvn.add_users_to_worklist(n);
952 _igvn.hash_delete(n);
953 _igvn.subsume_node( n, n->in(1) );
954 }
955 }
956
957 //------------------------------split_if_with_blocks---------------------------
958 // Check for aggressive application of 'split-if' optimization,
1882 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1883 Node* use = n->fast_out(j);
1884 if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
1885 worklist.push(use);
1886 }
1887 }
1888 while( worklist.size() ) {
1889 Node *use = worklist.pop();
1890 if (!has_node(use) || use->in(0) == C->top()) continue;
1891 uint j;
1892 for (j = 0; j < use->req(); j++) {
1893 if (use->in(j) == n) break;
1894 }
1895 assert(j < use->req(), "must be there");
1896
1897 // clone "n" and insert it between the inputs of "n" and the use outside the loop
1898 Node* n_clone = n->clone();
1899 _igvn.hash_delete(use);
1900 use->set_req(j, n_clone);
1901 _igvn._worklist.push(use);
1902 Node* use_c;
1903 if (!use->is_Phi()) {
1904 use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
1905 } else {
1906 // Use in a phi is considered a use in the associated predecessor block
1907 use_c = use->in(0)->in(j);
1908 }
1909 set_ctrl(n_clone, use_c);
1910 assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
1911 get_loop(use_c)->_body.push(n_clone);
1912 _igvn.register_new_node_with_optimizer(n_clone);
1913 #if !defined(PRODUCT)
1914 if (TracePartialPeeling) {
1915 tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
1916 }
1917 #endif
1918 }
1919 }
1920
1921
1922 //------------------------------ clone_for_special_use_inside_loop -------------------------------------
1923 // clone "n" for special uses that are in the not_peeled region.
1924 // If these def-uses occur in separate blocks, the code generator
1925 // marks the method as not compilable. For example, if a "BoolNode"
1926 // is in a different basic block than the "IfNode" that uses it, then
1927 // the compilation is aborted in the code generator.
1928 void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
1929 VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
1930 if (n->is_Phi() || n->is_Load()) {
1931 return;
2247 // : | v v |
2248 // : | false true |
2249 // : | | | |
2250 // : | v stmt2 |
2251 // : | exitB: | |
2252 // : | stmt4 v |
2253 // : | ifA orig |
2254 // : | / \ |
2255 // : | / \ |
2256 // : | v v |
2257 // : | false true |
2258 // : | / \ |
2259 // : v v -----+
2260 // RegionA
2261 // |
2262 // v
2263 // exitA
2264 //
2265 bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
2266
2267 if (!loop->_head->is_Loop()) {
2268 return false; }
2269
2270 LoopNode *head = loop->_head->as_Loop();
2271
2272 if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
2273 return false;
2274 }
2275
2276 // Check for complex exit control
2277 for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
2278 Node *n = loop->_body.at(ii);
2279 int opc = n->Opcode();
2280 if (n->is_Call() ||
2281 opc == Op_Catch ||
2282 opc == Op_CatchProj ||
2283 opc == Op_Jump ||
2284 opc == Op_JumpProj) {
2285 #if !defined(PRODUCT)
2286 if (TracePartialPeeling) {
2287 tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
2288 }
2289 #endif
2650 //------------------------------reorg_offsets----------------------------------
2651 // Reorganize offset computations to lower register pressure. Mostly
2652 // prevent loop-fallout uses of the pre-incremented trip counter (which are
2653 // then alive with the post-incremented trip counter forcing an extra
2654 // register move)
2655 void PhaseIdealLoop::reorg_offsets( IdealLoopTree *loop ) {
2656
2657 CountedLoopNode *cl = loop->_head->as_CountedLoop();
2658 CountedLoopEndNode *cle = cl->loopexit();
2659 if( !cle ) return; // The occasional dead loop
2660 // Find loop exit control
2661 Node *exit = cle->proj_out(false);
2662 assert( exit->Opcode() == Op_IfFalse, "" );
2663
2664 // Check for the special case of folks using the pre-incremented
2665 // trip-counter on the fall-out path (forces the pre-incremented
2666 // and post-incremented trip counter to be live at the same time).
2667 // Fix this by adjusting to use the post-increment trip counter.
2668 Node *phi = cl->phi();
2669 if( !phi ) return; // Dead infinite loop
2670
2671 // Shape messed up, probably by iteration_split_impl
2672 if (phi->in(LoopNode::LoopBackControl) != cl->incr()) return;
2673
2674 bool progress = true;
2675 while (progress) {
2676 progress = false;
2677 for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
2678 Node* use = phi->fast_out(i); // User of trip-counter
2679 if (!has_ctrl(use)) continue;
2680 Node *u_ctrl = get_ctrl(use);
2681 if( use->is_Phi() ) {
2682 u_ctrl = NULL;
2683 for( uint j = 1; j < use->req(); j++ )
2684 if( use->in(j) == phi )
2685 u_ctrl = dom_lca( u_ctrl, use->in(0)->in(j) );
2686 }
2687 IdealLoopTree *u_loop = get_loop(u_ctrl);
2688 // Look for loop-invariant use
2689 if( u_loop == loop ) continue;
2690 if( loop->is_member( u_loop ) ) continue;
2691 // Check that use is live out the bottom. Assuming the trip-counter
2692 // update is right at the bottom, uses of of the loop middle are ok.
2693 if( dom_lca( exit, u_ctrl ) != exit ) continue;
2694 // protect against stride not being a constant
2695 if( !cle->stride_is_con() ) continue;
2696 // Hit! Refactor use to use the post-incremented tripcounter.
2697 // Compute a post-increment tripcounter.
2698 Node *opaq = new (C, 2) Opaque2Node( C, cle->incr() );
2699 register_new_node( opaq, u_ctrl );
2700 Node *neg_stride = _igvn.intcon(-cle->stride_con());
2701 set_ctrl(neg_stride, C->root());
2702 Node *post = new (C, 3) AddINode( opaq, neg_stride);
2703 register_new_node( post, u_ctrl );
2704 _igvn.hash_delete(use);
2705 _igvn._worklist.push(use);
2706 for( uint j = 1; j < use->req(); j++ )
2707 if( use->in(j) == phi )
2708 use->set_req(j, post);
2709 // Since DU info changed, rerun loop
2710 progress = true;
2711 break;
2712 }
2713 }
2714
2715 }
2716
|