107 cl->set_exact_trip_count((uint)trip_count);
108 }
109 }
110 }
111
112 //------------------------------compute_profile_trip_cnt----------------------------
113 // Compute loop trip count from profile data as
114 // (backedge_count + loop_exit_count) / loop_exit_count
115 void IdealLoopTree::compute_profile_trip_cnt( PhaseIdealLoop *phase ) {
116 if (!_head->is_CountedLoop()) {
117 return;
118 }
119 CountedLoopNode* head = _head->as_CountedLoop();
120 if (head->profile_trip_cnt() != COUNT_UNKNOWN) {
121 return; // Already computed
122 }
123 float trip_cnt = (float)max_jint; // default is big
124
125 Node* back = head->in(LoopNode::LoopBackControl);
126 while (back != head) {
127 if ((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&
128 back->in(0) &&
129 back->in(0)->is_If() &&
130 back->in(0)->as_If()->_fcnt != COUNT_UNKNOWN &&
131 back->in(0)->as_If()->_prob != PROB_UNKNOWN) {
132 break;
133 }
134 back = phase->idom(back);
135 }
136 if (back != head) {
137 assert((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&
138 back->in(0), "if-projection exists");
139 IfNode* back_if = back->in(0)->as_If();
140 float loop_back_cnt = back_if->_fcnt * back_if->_prob;
141
142 // Now compute a loop exit count
143 float loop_exit_cnt = 0.0f;
144 for( uint i = 0; i < _body.size(); i++ ) {
145 Node *n = _body[i];
146 if( n->is_If() ) {
147 IfNode *iff = n->as_If();
148 if( iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN ) {
149 Node *exit = is_loop_exit(iff);
150 if( exit ) {
151 float exit_prob = iff->_prob;
152 if (exit->Opcode() == Op_IfFalse) exit_prob = 1.0 - exit_prob;
153 if (exit_prob > PROB_MIN) {
154 float exit_cnt = iff->_fcnt * exit_prob;
155 loop_exit_cnt += exit_cnt;
156 }
157 }
158 }
159 }
160 }
161 if (loop_exit_cnt > 0.0f) {
162 trip_cnt = (loop_back_cnt + loop_exit_cnt) / loop_exit_cnt;
163 } else {
164 // No exit count so use
165 trip_cnt = loop_back_cnt;
166 }
167 }
168 #ifndef PRODUCT
169 if (TraceProfileTripCount) {
170 tty->print_cr("compute_profile_trip_cnt lp: %d cnt: %f\n", head->_idx, trip_cnt);
171 }
172 #endif
173 head->set_profile_trip_cnt(trip_cnt);
174 }
175
176 //---------------------is_invariant_addition-----------------------------
177 // Return nonzero index of invariant operand for an Add or Sub
178 // of (nonconstant) invariant and variant values. Helper for reassociate_invariants.
179 int IdealLoopTree::is_invariant_addition(Node* n, PhaseIdealLoop *phase) {
180 int op = n->Opcode();
181 if (op == Op_AddI || op == Op_SubI) {
182 bool in1_invar = this->is_invariant(n->in(1));
183 bool in2_invar = this->is_invariant(n->in(2));
184 if (in1_invar && !in2_invar) return 1;
185 if (!in1_invar && in2_invar) return 2;
186 }
187 return 0;
188 }
189
190 //---------------------reassociate_add_sub-----------------------------
191 // Reassociate invariant add and subtract expressions:
192 //
193 // inv1 + (x + inv2) => ( inv1 + inv2) + x
194 // (x + inv2) + inv1 => ( inv1 + inv2) + x
195 // inv1 + (x - inv2) => ( inv1 - inv2) + x
196 // inv1 - (inv2 - x) => ( inv1 - inv2) + x
197 // (x + inv2) - inv1 => (-inv1 + inv2) + x
198 // (x - inv2) + inv1 => ( inv1 - inv2) + x
199 // (x - inv2) - inv1 => (-inv1 - inv2) + x
200 // inv1 + (inv2 - x) => ( inv1 + inv2) - x
201 // inv1 - (x - inv2) => ( inv1 + inv2) - x
273 // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can
274 // make some loop-invariant test (usually a null-check) happen before the loop.
275 bool IdealLoopTree::policy_peeling( PhaseIdealLoop *phase ) const {
276 Node *test = ((IdealLoopTree*)this)->tail();
277 int body_size = ((IdealLoopTree*)this)->_body.size();
278 // Peeling does loop cloning which can result in O(N^2) node construction
279 if( body_size > 255 /* Prevent overflow for large body_size */
280 || (body_size * body_size + phase->C->live_nodes()) > phase->C->max_node_limit() ) {
281 return false; // too large to safely clone
282 }
283
284 // check for vectorized loops, any peeling done was already applied
285 if (_head->is_CountedLoop() && _head->as_CountedLoop()->do_unroll_only()) return false;
286
287 while( test != _head ) { // Scan till run off top of loop
288 if( test->is_If() ) { // Test?
289 Node *ctrl = phase->get_ctrl(test->in(1));
290 if (ctrl->is_top())
291 return false; // Found dead test on live IF? No peeling!
292 // Standard IF only has one input value to check for loop invariance
293 assert(test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd || test->Opcode() == Op_RangeCheck, "Check this code when new subtype is added");
294 // Condition is not a member of this loop?
295 if( !is_member(phase->get_loop(ctrl)) &&
296 is_loop_exit(test) )
297 return true; // Found reason to peel!
298 }
299 // Walk up dominators to loop _head looking for test which is
300 // executed on every path thru loop.
301 test = phase->idom(test);
302 }
303 return false;
304 }
305
306 //------------------------------peeled_dom_test_elim---------------------------
307 // If we got the effect of peeling, either by actually peeling or by making
308 // a pre-loop which must execute at least once, we can remove all
309 // loop-invariant dominated tests in the main body.
310 void PhaseIdealLoop::peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new ) {
311 bool progress = true;
312 while( progress ) {
313 progress = false; // Reset for next iteration
314 Node *prev = loop->_head->in(LoopNode::LoopBackControl);//loop->tail();
315 Node *test = prev->in(0);
316 while( test != loop->_head ) { // Scan till run off top of loop
317
318 int p_op = prev->Opcode();
319 if( (p_op == Op_IfFalse || p_op == Op_IfTrue) &&
320 test->is_If() && // Test?
321 !test->in(1)->is_Con() && // And not already obvious?
322 // Condition is not a member of this loop?
323 !loop->is_member(get_loop(get_ctrl(test->in(1))))){
324 // Walk loop body looking for instances of this test
325 for( uint i = 0; i < loop->_body.size(); i++ ) {
326 Node *n = loop->_body.at(i);
327 if( n->is_If() && n->in(1) == test->in(1) /*&& n != loop->tail()->in(0)*/ ) {
328 // IfNode was dominated by version in peeled loop body
329 progress = true;
330 dominated_by( old_new[prev->_idx], n );
331 }
332 }
333 }
334 prev = test;
335 test = idom(test);
336 } // End of scan tests in loop
337
338 } // End of while( progress )
339 }
602 if (trip_count <= 3)
603 return true;
604
605 // Take into account that after unroll conjoined heads and tails will fold,
606 // otherwise policy_unroll() may allow more unrolling than max unrolling.
607 uint new_body_size = EMPTY_LOOP_SIZE + (body_size - EMPTY_LOOP_SIZE) * trip_count;
608 uint tst_body_size = (new_body_size - EMPTY_LOOP_SIZE) / trip_count + EMPTY_LOOP_SIZE;
609 if (body_size != tst_body_size) // Check for int overflow
610 return false;
611 if (new_body_size > unroll_limit ||
612 // Unrolling can result in a large amount of node construction
613 new_body_size >= phase->C->max_node_limit() - phase->C->live_nodes()) {
614 return false;
615 }
616
617 // Do not unroll a loop with String intrinsics code.
618 // String intrinsics are large and have loops.
619 for (uint k = 0; k < _body.size(); k++) {
620 Node* n = _body.at(k);
621 switch (n->Opcode()) {
622 case Op_StrComp:
623 case Op_StrEquals:
624 case Op_StrIndexOf:
625 case Op_StrIndexOfChar:
626 case Op_EncodeISOArray:
627 case Op_AryEq:
628 case Op_HasNegatives: {
629 return false;
630 }
631 #if INCLUDE_RTM_OPT
632 case Op_FastLock:
633 case Op_FastUnlock: {
634 // Don't unroll RTM locking code because it is large.
635 if (UseRTMLocking) {
636 return false;
637 }
638 }
639 #endif
640 } // switch
641 }
642
643 return true; // Do maximally unroll
644 }
645
646
647 //------------------------------policy_unroll----------------------------------
648 // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if
649 // the loop is a CountedLoop and the body is small enough.
650 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
651
652 CountedLoopNode *cl = _head->as_CountedLoop();
653 assert(cl->is_normal_loop() || cl->is_main_loop(), "");
720 }
721 }
722 }
723 }
724
725 // After unroll limit will be adjusted: new_limit = limit-stride.
726 // Bailout if adjustment overflow.
727 const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int();
728 if (stride_con > 0 && ((limit_type->_hi - stride_con) >= limit_type->_hi) ||
729 stride_con < 0 && ((limit_type->_lo - stride_con) <= limit_type->_lo))
730 return false; // overflow
731
732 // Adjust body_size to determine if we unroll or not
733 uint body_size = _body.size();
734 // Key test to unroll loop in CRC32 java code
735 int xors_in_loop = 0;
736 // Also count ModL, DivL and MulL which expand mightly
737 for (uint k = 0; k < _body.size(); k++) {
738 Node* n = _body.at(k);
739 switch (n->Opcode()) {
740 case Op_XorI: xors_in_loop++; break; // CRC32 java code
741 case Op_ModL: body_size += 30; break;
742 case Op_DivL: body_size += 30; break;
743 case Op_MulL: body_size += 10; break;
744 case Op_StrComp:
745 case Op_StrEquals:
746 case Op_StrIndexOf:
747 case Op_StrIndexOfChar:
748 case Op_EncodeISOArray:
749 case Op_AryEq:
750 case Op_HasNegatives: {
751 // Do not unroll a loop with String intrinsics code.
752 // String intrinsics are large and have loops.
753 return false;
754 }
755 #if INCLUDE_RTM_OPT
756 case Op_FastLock:
757 case Op_FastUnlock: {
758 // Don't unroll RTM locking code because it is large.
759 if (UseRTMLocking) {
760 return false;
761 }
762 }
763 #endif
764 } // switch
765 }
766
767 if (UseSuperWord) {
768 if (!cl->is_reduction_loop()) {
769 phase->mark_reductions(this);
770 }
771
772 // Only attempt slp analysis when user controls do not prohibit it
773 if (LoopMaxUnroll > _local_loop_unroll_factor) {
774 // Once policy_slp_analysis succeeds, mark the loop with the
775 // maximal unroll factor so that we minimize analysis passes
776 if (future_unroll_ct >= _local_loop_unroll_factor) {
777 policy_unroll_slp_analysis(cl, phase, future_unroll_ct);
844 //------------------------------policy_range_check-----------------------------
845 // Return TRUE or FALSE if the loop should be range-check-eliminated.
846 // Actually we do iteration-splitting, a more powerful form of RCE.
847 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const {
848 if (!RangeCheckElimination) return false;
849
850 CountedLoopNode *cl = _head->as_CountedLoop();
851 // If we unrolled with no intention of doing RCE and we later
852 // changed our minds, we got no pre-loop. Either we need to
853 // make a new pre-loop, or we gotta disallow RCE.
854 if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
855 Node *trip_counter = cl->phi();
856
857 // check for vectorized loops, some opts are no longer needed
858 if (cl->do_unroll_only()) return false;
859
860 // Check loop body for tests of trip-counter plus loop-invariant vs
861 // loop-invariant.
862 for (uint i = 0; i < _body.size(); i++) {
863 Node *iff = _body[i];
864 if (iff->Opcode() == Op_If ||
865 iff->Opcode() == Op_RangeCheck) { // Test?
866
867 // Comparing trip+off vs limit
868 Node *bol = iff->in(1);
869 if (bol->req() != 2) continue; // dead constant test
870 if (!bol->is_Bool()) {
871 assert(bol->Opcode() == Op_Conv2B, "predicate check only");
872 continue;
873 }
874 if (bol->as_Bool()->_test._test == BoolTest::ne)
875 continue; // not RC
876
877 Node *cmp = bol->in(1);
878 Node *rc_exp = cmp->in(1);
879 Node *limit = cmp->in(2);
880
881 Node *limit_c = phase->get_ctrl(limit);
882 if( limit_c == phase->C->top() )
883 return false; // Found dead test on live IF? No RCE!
884 if( is_member(phase->get_loop(limit_c) ) ) {
885 // Compare might have operands swapped; commute them
886 rc_exp = cmp->in(2);
887 limit = cmp->in(1);
888 limit_c = phase->get_ctrl(limit);
889 if( is_member(phase->get_loop(limit_c) ) )
890 continue; // Both inputs are loop varying; cannot RCE
891 }
1030 // Add the post loop
1031 CountedLoopNode *post_head = NULL;
1032 Node *main_exit = insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_head);
1033
1034 //------------------------------
1035 // Step B: Create Pre-Loop.
1036
1037 // Step B1: Clone the loop body. The clone becomes the pre-loop. The main
1038 // loop pre-header illegally has 2 control users (old & new loops).
1039 clone_loop( loop, old_new, dd_main_head );
1040 CountedLoopNode* pre_head = old_new[main_head->_idx]->as_CountedLoop();
1041 CountedLoopEndNode* pre_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
1042 pre_head->set_pre_loop(main_head);
1043 Node *pre_incr = old_new[incr->_idx];
1044
1045 // Reduce the pre-loop trip count.
1046 pre_end->_prob = PROB_FAIR;
1047
1048 // Find the pre-loop normal exit.
1049 Node* pre_exit = pre_end->proj_out(false);
1050 assert( pre_exit->Opcode() == Op_IfFalse, "" );
1051 IfFalseNode *new_pre_exit = new IfFalseNode(pre_end);
1052 _igvn.register_new_node_with_optimizer( new_pre_exit );
1053 set_idom(new_pre_exit, pre_end, dd_main_head);
1054 set_loop(new_pre_exit, loop->_parent);
1055
1056 // Step B2: Build a zero-trip guard for the main-loop. After leaving the
1057 // pre-loop, the main-loop may not execute at all. Later in life this
1058 // zero-trip guard will become the minimum-trip guard when we unroll
1059 // the main-loop.
1060 Node *min_opaq = new Opaque1Node(C, limit);
1061 Node *min_cmp = new CmpINode( pre_incr, min_opaq );
1062 Node *min_bol = new BoolNode( min_cmp, b_test );
1063 register_new_node( min_opaq, new_pre_exit );
1064 register_new_node( min_cmp , new_pre_exit );
1065 register_new_node( min_bol , new_pre_exit );
1066
1067 // Build the IfNode (assume the main-loop is executed always).
1068 IfNode *min_iff = new IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
1069 _igvn.register_new_node_with_optimizer( min_iff );
1070 set_idom(min_iff, new_pre_exit, dd_main_head);
1285 // so guess that unit vector trips is a reasonable value.
1286 post_head->set_profile_trip_cnt(4.0);
1287 post_head->set_is_rce_post_loop();
1288
1289 // Now force out all loop-invariant dominating tests. The optimizer
1290 // finds some, but we _know_ they are all useless.
1291 peeled_dom_test_elim(loop, old_new);
1292 loop->record_for_igvn();
1293 }
1294
1295
1296 //------------------------------insert_post_loop-------------------------------
1297 // Insert post loops. Add a post loop to the given loop passed.
1298 Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
1299 CountedLoopNode *main_head, CountedLoopEndNode *main_end,
1300 Node *incr, Node *limit, CountedLoopNode *&post_head) {
1301
1302 //------------------------------
1303 // Step A: Create a new post-Loop.
1304 Node* main_exit = main_end->proj_out(false);
1305 assert(main_exit->Opcode() == Op_IfFalse, "");
1306 int dd_main_exit = dom_depth(main_exit);
1307
1308 // Step A1: Clone the loop body of main. The clone becomes the vector post-loop.
1309 // The main loop pre-header illegally has 2 control users (old & new loops).
1310 clone_loop(loop, old_new, dd_main_exit);
1311 assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
1312 post_head = old_new[main_head->_idx]->as_CountedLoop();
1313 post_head->set_normal_loop();
1314 post_head->set_post_loop(main_head);
1315
1316 // Reduce the post-loop trip count.
1317 CountedLoopEndNode* post_end = old_new[main_end->_idx]->as_CountedLoopEnd();
1318 post_end->_prob = PROB_FAIR;
1319
1320 // Build the main-loop normal exit.
1321 IfFalseNode *new_main_exit = new IfFalseNode(main_end);
1322 _igvn.register_new_node_with_optimizer(new_main_exit);
1323 set_idom(new_main_exit, main_end, dd_main_exit);
1324 set_loop(new_main_exit, loop->_parent);
1325
1326 // Step A2: Build a zero-trip guard for the post-loop. After leaving the
1327 // main-loop, the post-loop may not execute at all. We 'opaque' the incr
1328 // (the previous loop trip-counter exit value) because we will be changing
1329 // the exit value (via additional unrolling) so we cannot constant-fold away the zero
1330 // trip guard until all unrolling is done.
1331 Node *zer_opaq = new Opaque1Node(C, incr);
1517 // No underflow.
1518 new_limit = new SubINode(limit, stride);
1519 } else {
1520 // (limit - stride) may underflow.
1521 // Clamp the adjustment value with MININT or MAXINT:
1522 //
1523 // new_limit = limit-stride
1524 // if (stride > 0)
1525 // new_limit = (limit < new_limit) ? MININT : new_limit;
1526 // else
1527 // new_limit = (limit > new_limit) ? MAXINT : new_limit;
1528 //
1529 BoolTest::mask bt = loop_end->test_trip();
1530 assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected");
1531 Node* adj_max = _igvn.intcon((stride_con > 0) ? min_jint : max_jint);
1532 set_ctrl(adj_max, C->root());
1533 Node* old_limit = NULL;
1534 Node* adj_limit = NULL;
1535 Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : NULL;
1536 if (loop_head->unrolled_count() > 1 &&
1537 limit->is_CMove() && limit->Opcode() == Op_CMoveI &&
1538 limit->in(CMoveNode::IfTrue) == adj_max &&
1539 bol->as_Bool()->_test._test == bt &&
1540 bol->in(1)->Opcode() == Op_CmpI &&
1541 bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) {
1542 // Loop was unrolled before.
1543 // Optimize the limit to avoid nested CMove:
1544 // use original limit as old limit.
1545 old_limit = bol->in(1)->in(1);
1546 // Adjust previous adjusted limit.
1547 adj_limit = limit->in(CMoveNode::IfFalse);
1548 adj_limit = new SubINode(adj_limit, stride);
1549 } else {
1550 old_limit = limit;
1551 adj_limit = new SubINode(limit, stride);
1552 }
1553 assert(old_limit != NULL && adj_limit != NULL, "");
1554 register_new_node( adj_limit, ctrl ); // adjust amount
1555 Node* adj_cmp = new CmpINode(old_limit, adj_limit);
1556 register_new_node( adj_cmp, ctrl );
1557 Node* adj_bool = new BoolNode(adj_cmp, bt);
1558 register_new_node( adj_bool, ctrl );
1559 new_limit = new CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT);
1560 }
1700
1701 void PhaseIdealLoop::mark_reductions(IdealLoopTree *loop) {
1702 if (SuperWordReductions == false) return;
1703
1704 CountedLoopNode* loop_head = loop->_head->as_CountedLoop();
1705 if (loop_head->unrolled_count() > 1) {
1706 return;
1707 }
1708
1709 Node* trip_phi = loop_head->phi();
1710 for (DUIterator_Fast imax, i = loop_head->fast_outs(imax); i < imax; i++) {
1711 Node* phi = loop_head->fast_out(i);
1712 if (phi->is_Phi() && phi->outcnt() > 0 && phi != trip_phi) {
1713 // For definitions which are loop inclusive and not tripcounts.
1714 Node* def_node = phi->in(LoopNode::LoopBackControl);
1715
1716 if (def_node != NULL) {
1717 Node* n_ctrl = get_ctrl(def_node);
1718 if (n_ctrl != NULL && loop->is_member(get_loop(n_ctrl))) {
1719 // Now test it to see if it fits the standard pattern for a reduction operator.
1720 int opc = def_node->Opcode();
1721 if (opc != ReductionNode::opcode(opc, def_node->bottom_type()->basic_type())) {
1722 if (!def_node->is_reduction()) { // Not marked yet
1723 // To be a reduction, the arithmetic node must have the phi as input and provide a def to it
1724 bool ok = false;
1725 for (unsigned j = 1; j < def_node->req(); j++) {
1726 Node* in = def_node->in(j);
1727 if (in == phi) {
1728 ok = true;
1729 break;
1730 }
1731 }
1732
1733 // do nothing if we did not match the initial criteria
1734 if (ok == false) {
1735 continue;
1736 }
1737
1738 // The result of the reduction must not be used in the loop
1739 for (DUIterator_Fast imax, i = def_node->fast_outs(imax); i < imax && ok; i++) {
1740 Node* u = def_node->fast_out(i);
1901 // ( if (scale < 0) /* and stride > 0 */
1902 // I < (low_limit-(offset+1))/scale
1903 // else /* scale > 0 and stride < 0 */
1904 // I > (low_limit-(offset+1))/scale
1905 // )
1906
1907 *main_limit = adjust_limit(stride_con, scale, plus_one, low_limit, *main_limit, pre_ctrl);
1908 }
1909 }
1910
1911
1912 //------------------------------is_scaled_iv---------------------------------
1913 // Return true if exp is a constant times an induction var
1914 bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) {
1915 if (exp == iv) {
1916 if (p_scale != NULL) {
1917 *p_scale = 1;
1918 }
1919 return true;
1920 }
1921 int opc = exp->Opcode();
1922 if (opc == Op_MulI) {
1923 if (exp->in(1) == iv && exp->in(2)->is_Con()) {
1924 if (p_scale != NULL) {
1925 *p_scale = exp->in(2)->get_int();
1926 }
1927 return true;
1928 }
1929 if (exp->in(2) == iv && exp->in(1)->is_Con()) {
1930 if (p_scale != NULL) {
1931 *p_scale = exp->in(1)->get_int();
1932 }
1933 return true;
1934 }
1935 } else if (opc == Op_LShiftI) {
1936 if (exp->in(1) == iv && exp->in(2)->is_Con()) {
1937 if (p_scale != NULL) {
1938 *p_scale = 1 << exp->in(2)->get_int();
1939 }
1940 return true;
1941 }
1942 }
1943 return false;
1944 }
1945
1946 //-----------------------------is_scaled_iv_plus_offset------------------------------
1947 // Return true if exp is a simple induction variable expression: k1*iv + (invar + k2)
1948 bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth) {
1949 if (is_scaled_iv(exp, iv, p_scale)) {
1950 if (p_offset != NULL) {
1951 Node *zero = _igvn.intcon(0);
1952 set_ctrl(zero, C->root());
1953 *p_offset = zero;
1954 }
1955 return true;
1956 }
1957 int opc = exp->Opcode();
1958 if (opc == Op_AddI) {
1959 if (is_scaled_iv(exp->in(1), iv, p_scale)) {
1960 if (p_offset != NULL) {
1961 *p_offset = exp->in(2);
1962 }
1963 return true;
1964 }
1965 if (is_scaled_iv(exp->in(2), iv, p_scale)) {
1966 if (p_offset != NULL) {
1967 *p_offset = exp->in(1);
1968 }
1969 return true;
1970 }
1971 if (exp->in(2)->is_Con()) {
1972 Node* offset2 = NULL;
1973 if (depth < 2 &&
1974 is_scaled_iv_plus_offset(exp->in(1), iv, p_scale,
1975 p_offset != NULL ? &offset2 : NULL, depth+1)) {
1976 if (p_offset != NULL) {
1977 Node *ctrl_off2 = get_ctrl(offset2);
1978 Node* offset = new AddINode(offset2, exp->in(2));
1979 register_new_node(offset, ctrl_off2);
1980 *p_offset = offset;
1981 }
1982 return true;
1983 }
1984 }
1985 } else if (opc == Op_SubI) {
1986 if (is_scaled_iv(exp->in(1), iv, p_scale)) {
1987 if (p_offset != NULL) {
1988 Node *zero = _igvn.intcon(0);
1989 set_ctrl(zero, C->root());
1990 Node *ctrl_off = get_ctrl(exp->in(2));
1991 Node* offset = new SubINode(zero, exp->in(2));
1992 register_new_node(offset, ctrl_off);
1993 *p_offset = offset;
1994 }
1995 return true;
1996 }
1997 if (is_scaled_iv(exp->in(2), iv, p_scale)) {
1998 if (p_offset != NULL) {
1999 *p_scale *= -1;
2000 *p_offset = exp->in(1);
2001 }
2002 return true;
2003 }
2004 }
2005 return false;
2032 // to not ever trip end tests
2033 Node *main_limit = cl->limit();
2034
2035 // Check graph shape. Cannot optimize a loop if zero-trip
2036 // Opaque1 node is optimized away and then another round
2037 // of loop opts attempted.
2038 if (!is_canonical_loop_entry(cl)) {
2039 return closed_range_checks;
2040 }
2041
2042 // Need to find the main-loop zero-trip guard
2043 Node *ctrl = cl->in(LoopNode::EntryControl);
2044 Node *iffm = ctrl->in(0);
2045 Node *opqzm = iffm->in(1)->in(1)->in(2);
2046 assert(opqzm->in(1) == main_limit, "do not understand situation");
2047
2048 // Find the pre-loop limit; we will expand its iterations to
2049 // not ever trip low tests.
2050 Node *p_f = iffm->in(0);
2051 // pre loop may have been optimized out
2052 if (p_f->Opcode() != Op_IfFalse) {
2053 return closed_range_checks;
2054 }
2055 CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2056 assert(pre_end->loopnode()->is_pre_loop(), "");
2057 Node *pre_opaq1 = pre_end->limit();
2058 // Occasionally it's possible for a pre-loop Opaque1 node to be
2059 // optimized away and then another round of loop opts attempted.
2060 // We can not optimize this particular loop in that case.
2061 if (pre_opaq1->Opcode() != Op_Opaque1)
2062 return closed_range_checks;
2063 Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
2064 Node *pre_limit = pre_opaq->in(1);
2065
2066 // Where do we put new limit calculations
2067 Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
2068
2069 // Ensure the original loop limit is available from the
2070 // pre-loop Opaque1 node.
2071 Node *orig_limit = pre_opaq->original_loop_limit();
2072 if (orig_limit == NULL || _igvn.type(orig_limit) == Type::TOP)
2073 return closed_range_checks;
2074
2075 // Must know if its a count-up or count-down loop
2076
2077 int stride_con = cl->stride_con();
2078 Node *zero = _igvn.intcon(0);
2079 Node *one = _igvn.intcon(1);
2080 // Use symmetrical int range [-max_jint,max_jint]
2081 Node *mini = _igvn.intcon(-max_jint);
2082 set_ctrl(zero, C->root());
2083 set_ctrl(one, C->root());
2084 set_ctrl(mini, C->root());
2085
2086 // Range checks that do not dominate the loop backedge (ie.
2087 // conditionally executed) can lengthen the pre loop limit beyond
2088 // the original loop limit. To prevent this, the pre limit is
2089 // (for stride > 0) MINed with the original loop limit (MAXed
2090 // stride < 0) when some range_check (rc) is conditionally
2091 // executed.
2092 bool conditional_rc = false;
2093
2094 // Count number of range checks and reduce by load range limits, if zero,
2095 // the loop is in canonical form to multiversion.
2096 closed_range_checks = 0;
2097
2098 // Check loop body for tests of trip-counter plus loop-invariant vs
2099 // loop-invariant.
2100 for( uint i = 0; i < loop->_body.size(); i++ ) {
2101 Node *iff = loop->_body[i];
2102 if (iff->Opcode() == Op_If ||
2103 iff->Opcode() == Op_RangeCheck) { // Test?
2104 // Test is an IfNode, has 2 projections. If BOTH are in the loop
2105 // we need loop unswitching instead of iteration splitting.
2106 closed_range_checks++;
2107 Node *exit = loop->is_loop_exit(iff);
2108 if( !exit ) continue;
2109 int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0;
2110
2111 // Get boolean condition to test
2112 Node *i1 = iff->in(1);
2113 if( !i1->is_Bool() ) continue;
2114 BoolNode *bol = i1->as_Bool();
2115 BoolTest b_test = bol->_test;
2116 // Flip sense of test if exit condition is flipped
2117 if( flip )
2118 b_test = b_test.negate();
2119
2120 // Get compare
2121 Node *cmp = bol->in(1);
2122
2123 // Look for trip_counter + offset vs limit
2124 Node *rc_exp = cmp->in(1);
2125 Node *limit = cmp->in(2);
2126 jint scale_con= 1; // Assume trip counter not scaled
2127
2128 Node *limit_c = get_ctrl(limit);
2129 if( loop->is_member(get_loop(limit_c) ) ) {
2158
2159 // As above for the 'limit', the 'offset' maybe pinned below the
2160 // zero trip test.
2161 if( offset_c == ctrl ) {
2162 continue; // Don't rce this check but continue looking for other candidates.
2163 }
2164 #ifdef ASSERT
2165 if (TraceRangeLimitCheck) {
2166 tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2167 bol->dump(2);
2168 }
2169 #endif
2170 // At this point we have the expression as:
2171 // scale_con * trip_counter + offset :: limit
2172 // where scale_con, offset and limit are loop invariant. Trip_counter
2173 // monotonically increases by stride_con, a constant. Both (or either)
2174 // stride_con and scale_con can be negative which will flip about the
2175 // sense of the test.
2176
2177 // Adjust pre and main loop limits to guard the correct iteration set
2178 if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
2179 if( b_test._test == BoolTest::lt ) { // Range checks always use lt
2180 // The underflow and overflow limits: 0 <= scale*I+offset < limit
2181 add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
2182 // (0-offset)/scale could be outside of loop iterations range.
2183 conditional_rc = true;
2184 } else {
2185 if (PrintOpto) {
2186 tty->print_cr("missed RCE opportunity");
2187 }
2188 continue; // In release mode, ignore it
2189 }
2190 } else { // Otherwise work on normal compares
2191 switch( b_test._test ) {
2192 case BoolTest::gt:
2193 // Fall into GE case
2194 case BoolTest::ge:
2195 // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2196 scale_con = -scale_con;
2197 offset = new SubINode( zero, offset );
2198 register_new_node( offset, pre_ctrl );
2226
2227 // Kill the eliminated test
2228 C->set_major_progress();
2229 Node *kill_con = _igvn.intcon( 1-flip );
2230 set_ctrl(kill_con, C->root());
2231 _igvn.replace_input_of(iff, 1, kill_con);
2232 // Find surviving projection
2233 assert(iff->is_If(), "");
2234 ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
2235 // Find loads off the surviving projection; remove their control edge
2236 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
2237 Node* cd = dp->fast_out(i); // Control-dependent node
2238 if (cd->is_Load() && cd->depends_only_on_test()) { // Loads can now float around in the loop
2239 // Allow the load to float around in the loop, or before it
2240 // but NOT before the pre-loop.
2241 _igvn.replace_input_of(cd, 0, ctrl); // ctrl, not NULL
2242 --i;
2243 --imax;
2244 }
2245 }
2246 if (limit->Opcode() == Op_LoadRange) {
2247 closed_range_checks--;
2248 }
2249
2250 } // End of is IF
2251
2252 }
2253
2254 // Update loop limits
2255 if (conditional_rc) {
2256 pre_limit = (stride_con > 0) ? (Node*)new MinINode(pre_limit, orig_limit)
2257 : (Node*)new MaxINode(pre_limit, orig_limit);
2258 register_new_node(pre_limit, pre_ctrl);
2259 }
2260 _igvn.replace_input_of(pre_opaq, 1, pre_limit);
2261
2262 // Note:: we are making the main loop limit no longer precise;
2263 // need to round up based on stride.
2264 cl->set_nonexact_trip_count();
2265 Node *main_cle = cl->loopexit();
2266 Node *main_bol = main_cle->in(1);
2284
2285 return closed_range_checks;
2286 }
2287
2288 //------------------------------has_range_checks-------------------------------
2289 // Check to see if RCE cleaned the current loop of range-checks.
2290 void PhaseIdealLoop::has_range_checks(IdealLoopTree *loop) {
2291 assert(RangeCheckElimination, "");
2292
2293 // skip if not a counted loop
2294 if (!loop->is_counted()) return;
2295
2296 CountedLoopNode *cl = loop->_head->as_CountedLoop();
2297
2298 // skip this loop if it is already checked
2299 if (cl->has_been_range_checked()) return;
2300
2301 // Now check for existance of range checks
2302 for (uint i = 0; i < loop->_body.size(); i++) {
2303 Node *iff = loop->_body[i];
2304 int iff_opc = iff->Opcode();
2305 if (iff_opc == Op_If || iff_opc == Op_RangeCheck) {
2306 cl->mark_has_range_checks();
2307 break;
2308 }
2309 }
2310 cl->set_has_been_range_checked();
2311 }
2312
2313 //-------------------------multi_version_post_loops----------------------------
2314 // Check the range checks that remain, if simple, use the bounds to guard
2315 // which version to a post loop we execute, one with range checks or one without
2316 bool PhaseIdealLoop::multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop) {
2317 bool multi_version_succeeded = false;
2318 assert(RangeCheckElimination, "");
2319 CountedLoopNode *legacy_cl = legacy_loop->_head->as_CountedLoop();
2320 assert(legacy_cl->is_post_loop(), "");
2321
2322 // Check for existance of range checks using the unique instance to make a guard with
2323 Unique_Node_List worklist;
2324 for (uint i = 0; i < legacy_loop->_body.size(); i++) {
2325 Node *iff = legacy_loop->_body[i];
2326 int iff_opc = iff->Opcode();
2327 if (iff_opc == Op_If || iff_opc == Op_RangeCheck) {
2328 worklist.push(iff);
2329 }
2330 }
2331
2332 // Find RCE'd post loop so that we can stage its guard.
2333 if (!is_canonical_loop_entry(legacy_cl)) return multi_version_succeeded;
2334 Node* ctrl = legacy_cl->in(LoopNode::EntryControl);
2335 Node* iffm = ctrl->in(0);
2336
2337 // Now we test that both the post loops are connected
2338 Node* post_loop_region = iffm->in(0);
2339 if (post_loop_region == NULL) return multi_version_succeeded;
2340 if (!post_loop_region->is_Region()) return multi_version_succeeded;
2341 Node* covering_region = post_loop_region->in(RegionNode::Control+1);
2342 if (covering_region == NULL) return multi_version_succeeded;
2343 if (!covering_region->is_Region()) return multi_version_succeeded;
2344 Node* p_f = covering_region->in(RegionNode::Control);
2345 if (p_f == NULL) return multi_version_succeeded;
2346 if (!p_f->is_IfFalse()) return multi_version_succeeded;
2347 if (!p_f->in(0)->is_CountedLoopEnd()) return multi_version_succeeded;
2365 #endif
2366
2367 // Now fetch the limit we want to compare against
2368 Node *limit = rce_cl->limit();
2369 bool first_time = true;
2370
2371 // If we got this far, we identified the post loop which has been RCE'd and
2372 // we have a work list. Now we will try to transform the if guard to cause
2373 // the loop pair to be multi version executed with the determination left to runtime
2374 // or the optimizer if full information is known about the given arrays at compile time.
2375 Node *last_min = NULL;
2376 multi_version_succeeded = true;
2377 while (worklist.size()) {
2378 Node* rc_iffm = worklist.pop();
2379 if (rc_iffm->is_If()) {
2380 Node *rc_bolzm = rc_iffm->in(1);
2381 if (rc_bolzm->is_Bool()) {
2382 Node *rc_cmpzm = rc_bolzm->in(1);
2383 if (rc_cmpzm->is_Cmp()) {
2384 Node *rc_left = rc_cmpzm->in(2);
2385 if (rc_left->Opcode() != Op_LoadRange) {
2386 multi_version_succeeded = false;
2387 break;
2388 }
2389 if (first_time) {
2390 last_min = rc_left;
2391 first_time = false;
2392 } else {
2393 Node *cur_min = new MinINode(last_min, rc_left);
2394 last_min = cur_min;
2395 _igvn.register_new_node_with_optimizer(last_min);
2396 }
2397 }
2398 }
2399 }
2400 }
2401
2402 // All we have to do is update the limit of the rce loop
2403 // with the min of our expression and the current limit.
2404 // We will use this expression to replace the current limit.
2405 if (last_min && multi_version_succeeded) {
2444 }
2445 }
2446 }
2447 }
2448
2449 //------------------------------DCE_loop_body----------------------------------
2450 // Remove simplistic dead code from loop body
2451 void IdealLoopTree::DCE_loop_body() {
2452 for( uint i = 0; i < _body.size(); i++ )
2453 if( _body.at(i)->outcnt() == 0 )
2454 _body.map( i--, _body.pop() );
2455 }
2456
2457
2458 //------------------------------adjust_loop_exit_prob--------------------------
2459 // Look for loop-exit tests with the 50/50 (or worse) guesses from the parsing stage.
2460 // Replace with a 1-in-10 exit guess.
2461 void IdealLoopTree::adjust_loop_exit_prob( PhaseIdealLoop *phase ) {
2462 Node *test = tail();
2463 while( test != _head ) {
2464 uint top = test->Opcode();
2465 if( top == Op_IfTrue || top == Op_IfFalse ) {
2466 int test_con = ((ProjNode*)test)->_con;
2467 assert(top == (uint)(test_con? Op_IfTrue: Op_IfFalse), "sanity");
2468 IfNode *iff = test->in(0)->as_If();
2469 if( iff->outcnt() == 2 ) { // Ignore dead tests
2470 Node *bol = iff->in(1);
2471 if( bol && bol->req() > 1 && bol->in(1) &&
2472 ((bol->in(1)->Opcode() == Op_StorePConditional ) ||
2473 (bol->in(1)->Opcode() == Op_StoreIConditional ) ||
2474 (bol->in(1)->Opcode() == Op_StoreLConditional ) ||
2475 (bol->in(1)->Opcode() == Op_CompareAndExchangeB ) ||
2476 (bol->in(1)->Opcode() == Op_CompareAndExchangeS ) ||
2477 (bol->in(1)->Opcode() == Op_CompareAndExchangeI ) ||
2478 (bol->in(1)->Opcode() == Op_CompareAndExchangeL ) ||
2479 (bol->in(1)->Opcode() == Op_CompareAndExchangeP ) ||
2480 (bol->in(1)->Opcode() == Op_CompareAndExchangeN ) ||
2481 (bol->in(1)->Opcode() == Op_WeakCompareAndSwapB ) ||
2482 (bol->in(1)->Opcode() == Op_WeakCompareAndSwapS ) ||
2483 (bol->in(1)->Opcode() == Op_WeakCompareAndSwapI ) ||
2484 (bol->in(1)->Opcode() == Op_WeakCompareAndSwapL ) ||
2485 (bol->in(1)->Opcode() == Op_WeakCompareAndSwapP ) ||
2486 (bol->in(1)->Opcode() == Op_WeakCompareAndSwapN ) ||
2487 (bol->in(1)->Opcode() == Op_CompareAndSwapB ) ||
2488 (bol->in(1)->Opcode() == Op_CompareAndSwapS ) ||
2489 (bol->in(1)->Opcode() == Op_CompareAndSwapI ) ||
2490 (bol->in(1)->Opcode() == Op_CompareAndSwapL ) ||
2491 (bol->in(1)->Opcode() == Op_CompareAndSwapP ) ||
2492 (bol->in(1)->Opcode() == Op_CompareAndSwapN )))
2493 return; // Allocation loops RARELY take backedge
2494 // Find the OTHER exit path from the IF
2495 Node* ex = iff->proj_out(1-test_con);
2496 float p = iff->_prob;
2497 if( !phase->is_member( this, ex ) && iff->_fcnt == COUNT_UNKNOWN ) {
2498 if( top == Op_IfTrue ) {
2499 if( p < (PROB_FAIR + PROB_UNLIKELY_MAG(3))) {
2500 iff->_prob = PROB_STATIC_FREQUENT;
2501 }
2502 } else {
2503 if( p > (PROB_FAIR - PROB_UNLIKELY_MAG(3))) {
2504 iff->_prob = PROB_STATIC_INFREQUENT;
2505 }
2506 }
2507 }
2508 }
2509 }
2510 test = phase->idom(test);
2511 }
2512 }
2513
2514 #ifdef ASSERT
2515 static CountedLoopNode* locate_pre_from_main(CountedLoopNode *cl) {
2516 Node *ctrl = cl->in(LoopNode::EntryControl);
2517 assert(ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "");
2518 Node *iffm = ctrl->in(0);
2519 assert(iffm->Opcode() == Op_If, "");
2520 Node *p_f = iffm->in(0);
2521 assert(p_f->Opcode() == Op_IfFalse, "");
2522 CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2523 assert(pre_end->loopnode()->is_pre_loop(), "");
2524 return pre_end->loopnode();
2525 }
2526 #endif
2527
2528 // Remove the main and post loops and make the pre loop execute all
2529 // iterations. Useful when the pre loop is found empty.
2530 void IdealLoopTree::remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase) {
2531 CountedLoopEndNode* pre_end = cl->loopexit();
2532 Node* pre_cmp = pre_end->cmp_node();
2533 if (pre_cmp->in(2)->Opcode() != Op_Opaque1) {
2534 // Only safe to remove the main loop if the compiler optimized it
2535 // out based on an unknown number of iterations
2536 return;
2537 }
2538
2539 // Can we find the main loop?
2540 if (_next == NULL) {
2541 return;
2542 }
2543
2544 Node* next_head = _next->_head;
2545 if (!next_head->is_CountedLoop()) {
2546 return;
2547 }
2548
2549 CountedLoopNode* main_head = next_head->as_CountedLoop();
2550 if (!main_head->is_main_loop()) {
2551 return;
2552 }
2553
2554 assert(locate_pre_from_main(main_head) == cl, "bad main loop");
2555 Node* main_iff = main_head->in(LoopNode::EntryControl)->in(0);
2556
2557 // Remove the Opaque1Node of the pre loop and make it execute all iterations
2558 phase->_igvn.replace_input_of(pre_cmp, 2, pre_cmp->in(2)->in(2));
2559 // Remove the Opaque1Node of the main loop so it can be optimized out
2560 Node* main_cmp = main_iff->in(1)->in(1);
2561 assert(main_cmp->in(2)->Opcode() == Op_Opaque1, "main loop has no opaque node?");
2562 phase->_igvn.replace_input_of(main_cmp, 2, main_cmp->in(2)->in(1));
2563 }
2564
2565 //------------------------------policy_do_remove_empty_loop--------------------
2566 // Micro-benchmark spamming. Policy is to always remove empty loops.
2567 // The 'DO' part is to replace the trip counter with the value it will
2568 // have on the last iteration. This will break the loop.
2569 bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) {
2570 // Minimum size must be empty loop
2571 if (_body.size() > EMPTY_LOOP_SIZE)
2572 return false;
2573
2574 if (!_head->is_CountedLoop())
2575 return false; // Dead loop
2576 CountedLoopNode *cl = _head->as_CountedLoop();
2577 if (!cl->is_valid_counted_loop())
2578 return false; // Malformed loop
2579 if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue))))
2580 return false; // Infinite loop
2581
2582 if (cl->is_pre_loop()) {
2583 // If the loop we are removing is a pre-loop then the main and
2584 // post loop can be removed as well
2585 remove_main_post_loops(cl, phase);
2586 }
2587
2588 #ifdef ASSERT
2589 // Ensure only one phi which is the iv.
2590 Node* iv = NULL;
2591 for (DUIterator_Fast imax, i = cl->fast_outs(imax); i < imax; i++) {
2592 Node* n = cl->fast_out(i);
2593 if (n->Opcode() == Op_Phi) {
2594 assert(iv == NULL, "Too many phis" );
2595 iv = n;
2596 }
2597 }
2598 assert(iv == cl->phi(), "Wrong phi" );
2599 #endif
2600
2601 // main and post loops have explicitly created zero trip guard
2602 bool needs_guard = !cl->is_main_loop() && !cl->is_post_loop();
2603 if (needs_guard) {
2604 // Skip guard if values not overlap.
2605 const TypeInt* init_t = phase->_igvn.type(cl->init_trip())->is_int();
2606 const TypeInt* limit_t = phase->_igvn.type(cl->limit())->is_int();
2607 int stride_con = cl->stride_con();
2608 if (stride_con > 0) {
2609 needs_guard = (init_t->_hi >= limit_t->_lo);
2610 } else {
2611 needs_guard = (init_t->_lo <= limit_t->_hi);
2612 }
2613 }
2614 if (needs_guard) {
2615 // Check for an obvious zero trip guard.
2616 Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->in(LoopNode::EntryControl));
2617 if (inctrl->Opcode() == Op_IfTrue || inctrl->Opcode() == Op_IfFalse) {
2618 bool maybe_swapped = (inctrl->Opcode() == Op_IfFalse);
2619 // The test should look like just the backedge of a CountedLoop
2620 Node* iff = inctrl->in(0);
2621 if (iff->is_If()) {
2622 Node* bol = iff->in(1);
2623 if (bol->is_Bool()) {
2624 BoolTest test = bol->as_Bool()->_test;
2625 if (maybe_swapped) {
2626 test._test = test.commute();
2627 test._test = test.negate();
2628 }
2629 if (test._test == cl->loopexit()->test_trip()) {
2630 Node* cmp = bol->in(1);
2631 int init_idx = maybe_swapped ? 2 : 1;
2632 int limit_idx = maybe_swapped ? 1 : 2;
2633 if (cmp->is_Cmp() && cmp->in(init_idx) == cl->init_trip() && cmp->in(limit_idx) == cl->limit()) {
2634 needs_guard = false;
2635 }
2636 }
2637 }
2638 }
2911 bool PhaseIdealLoop::match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
2912 Node*& shift, Node*& con) {
2913 const char* msg = NULL;
2914 Node* msg_node = NULL;
2915
2916 store_value = NULL;
2917 con = NULL;
2918 shift = NULL;
2919
2920 // Process the loop looking for stores. If there are multiple
2921 // stores or extra control flow give at this point.
2922 CountedLoopNode* head = lpt->_head->as_CountedLoop();
2923 for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) {
2924 Node* n = lpt->_body.at(i);
2925 if (n->outcnt() == 0) continue; // Ignore dead
2926 if (n->is_Store()) {
2927 if (store != NULL) {
2928 msg = "multiple stores";
2929 break;
2930 }
2931 int opc = n->Opcode();
2932 if (opc == Op_StoreP || opc == Op_StoreN || opc == Op_StoreNKlass || opc == Op_StoreCM) {
2933 msg = "oop fills not handled";
2934 break;
2935 }
2936 Node* value = n->in(MemNode::ValueIn);
2937 if (!lpt->is_invariant(value)) {
2938 msg = "variant store value";
2939 } else if (!_igvn.type(n->in(MemNode::Address))->isa_aryptr()) {
2940 msg = "not array address";
2941 }
2942 store = n;
2943 store_value = value;
2944 } else if (n->is_If() && n != head->loopexit()) {
2945 msg = "extra control flow";
2946 msg_node = n;
2947 }
2948 }
2949
2950 if (store == NULL) {
2951 // No store in loop
2952 return false;
2986 #ifndef PRODUCT
2987 if (TraceOptimizeFill) {
2988 tty->print_cr("not fill intrinsic candidate: %s", msg);
2989 if (msg_node != NULL) msg_node->dump();
2990 }
2991 #endif
2992 return false;
2993 }
2994
2995 // Make sure the address expression can be handled. It should be
2996 // head->phi * elsize + con. head->phi might have a ConvI2L(CastII()).
2997 Node* elements[4];
2998 Node* cast = NULL;
2999 Node* conv = NULL;
3000 bool found_index = false;
3001 int count = store->in(MemNode::Address)->as_AddP()->unpack_offsets(elements, ARRAY_SIZE(elements));
3002 for (int e = 0; e < count; e++) {
3003 Node* n = elements[e];
3004 if (n->is_Con() && con == NULL) {
3005 con = n;
3006 } else if (n->Opcode() == Op_LShiftX && shift == NULL) {
3007 Node* value = n->in(1);
3008 #ifdef _LP64
3009 if (value->Opcode() == Op_ConvI2L) {
3010 conv = value;
3011 value = value->in(1);
3012 }
3013 if (value->Opcode() == Op_CastII &&
3014 value->as_CastII()->has_range_check()) {
3015 // Skip range check dependent CastII nodes
3016 cast = value;
3017 value = value->in(1);
3018 }
3019 #endif
3020 if (value != head->phi()) {
3021 msg = "unhandled shift in address";
3022 } else {
3023 if (type2aelembytes(store->as_Mem()->memory_type(), true) != (1 << n->in(2)->get_int())) {
3024 msg = "scale doesn't match";
3025 } else {
3026 found_index = true;
3027 shift = n;
3028 }
3029 }
3030 } else if (n->Opcode() == Op_ConvI2L && conv == NULL) {
3031 conv = n;
3032 n = n->in(1);
3033 if (n->Opcode() == Op_CastII &&
3034 n->as_CastII()->has_range_check()) {
3035 // Skip range check dependent CastII nodes
3036 cast = n;
3037 n = n->in(1);
3038 }
3039 if (n == head->phi()) {
3040 found_index = true;
3041 } else {
3042 msg = "unhandled input to ConvI2L";
3043 }
3044 } else if (n == head->phi()) {
3045 // no shift, check below for allowed cases
3046 found_index = true;
3047 } else {
3048 msg = "unhandled node in address";
3049 msg_node = n;
3050 }
3051 }
3052
3053 if (count == -1) {
|
107 cl->set_exact_trip_count((uint)trip_count);
108 }
109 }
110 }
111
112 //------------------------------compute_profile_trip_cnt----------------------------
113 // Compute loop trip count from profile data as
114 // (backedge_count + loop_exit_count) / loop_exit_count
115 void IdealLoopTree::compute_profile_trip_cnt( PhaseIdealLoop *phase ) {
116 if (!_head->is_CountedLoop()) {
117 return;
118 }
119 CountedLoopNode* head = _head->as_CountedLoop();
120 if (head->profile_trip_cnt() != COUNT_UNKNOWN) {
121 return; // Already computed
122 }
123 float trip_cnt = (float)max_jint; // default is big
124
125 Node* back = head->in(LoopNode::LoopBackControl);
126 while (back != head) {
127 if ((back->Opcode() == Opcodes::Op_IfTrue || back->Opcode() == Opcodes::Op_IfFalse) &&
128 back->in(0) &&
129 back->in(0)->is_If() &&
130 back->in(0)->as_If()->_fcnt != COUNT_UNKNOWN &&
131 back->in(0)->as_If()->_prob != PROB_UNKNOWN) {
132 break;
133 }
134 back = phase->idom(back);
135 }
136 if (back != head) {
137 assert((back->Opcode() == Opcodes::Op_IfTrue || back->Opcode() == Opcodes::Op_IfFalse) &&
138 back->in(0), "if-projection exists");
139 IfNode* back_if = back->in(0)->as_If();
140 float loop_back_cnt = back_if->_fcnt * back_if->_prob;
141
142 // Now compute a loop exit count
143 float loop_exit_cnt = 0.0f;
144 for( uint i = 0; i < _body.size(); i++ ) {
145 Node *n = _body[i];
146 if( n->is_If() ) {
147 IfNode *iff = n->as_If();
148 if( iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN ) {
149 Node *exit = is_loop_exit(iff);
150 if( exit ) {
151 float exit_prob = iff->_prob;
152 if (exit->Opcode() == Opcodes::Op_IfFalse) exit_prob = 1.0 - exit_prob;
153 if (exit_prob > PROB_MIN) {
154 float exit_cnt = iff->_fcnt * exit_prob;
155 loop_exit_cnt += exit_cnt;
156 }
157 }
158 }
159 }
160 }
161 if (loop_exit_cnt > 0.0f) {
162 trip_cnt = (loop_back_cnt + loop_exit_cnt) / loop_exit_cnt;
163 } else {
164 // No exit count so use
165 trip_cnt = loop_back_cnt;
166 }
167 }
168 #ifndef PRODUCT
169 if (TraceProfileTripCount) {
170 tty->print_cr("compute_profile_trip_cnt lp: %d cnt: %f\n", head->_idx, trip_cnt);
171 }
172 #endif
173 head->set_profile_trip_cnt(trip_cnt);
174 }
175
176 //---------------------is_invariant_addition-----------------------------
177 // Return nonzero index of invariant operand for an Add or Sub
178 // of (nonconstant) invariant and variant values. Helper for reassociate_invariants.
179 int IdealLoopTree::is_invariant_addition(Node* n, PhaseIdealLoop *phase) {
180 Opcodes op = n->Opcode();
181 if (op == Opcodes::Op_AddI || op == Opcodes::Op_SubI) {
182 bool in1_invar = this->is_invariant(n->in(1));
183 bool in2_invar = this->is_invariant(n->in(2));
184 if (in1_invar && !in2_invar) return 1;
185 if (!in1_invar && in2_invar) return 2;
186 }
187 return 0;
188 }
189
190 //---------------------reassociate_add_sub-----------------------------
191 // Reassociate invariant add and subtract expressions:
192 //
193 // inv1 + (x + inv2) => ( inv1 + inv2) + x
194 // (x + inv2) + inv1 => ( inv1 + inv2) + x
195 // inv1 + (x - inv2) => ( inv1 - inv2) + x
196 // inv1 - (inv2 - x) => ( inv1 - inv2) + x
197 // (x + inv2) - inv1 => (-inv1 + inv2) + x
198 // (x - inv2) + inv1 => ( inv1 - inv2) + x
199 // (x - inv2) - inv1 => (-inv1 - inv2) + x
200 // inv1 + (inv2 - x) => ( inv1 + inv2) - x
201 // inv1 - (x - inv2) => ( inv1 + inv2) - x
273 // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can
274 // make some loop-invariant test (usually a null-check) happen before the loop.
275 bool IdealLoopTree::policy_peeling( PhaseIdealLoop *phase ) const {
276 Node *test = ((IdealLoopTree*)this)->tail();
277 int body_size = ((IdealLoopTree*)this)->_body.size();
278 // Peeling does loop cloning which can result in O(N^2) node construction
279 if( body_size > 255 /* Prevent overflow for large body_size */
280 || (body_size * body_size + phase->C->live_nodes()) > phase->C->max_node_limit() ) {
281 return false; // too large to safely clone
282 }
283
284 // check for vectorized loops, any peeling done was already applied
285 if (_head->is_CountedLoop() && _head->as_CountedLoop()->do_unroll_only()) return false;
286
287 while( test != _head ) { // Scan till run off top of loop
288 if( test->is_If() ) { // Test?
289 Node *ctrl = phase->get_ctrl(test->in(1));
290 if (ctrl->is_top())
291 return false; // Found dead test on live IF? No peeling!
292 // Standard IF only has one input value to check for loop invariance
293 assert(test->Opcode() == Opcodes::Op_If || test->Opcode() == Opcodes::Op_CountedLoopEnd || test->Opcode() == Opcodes::Op_RangeCheck, "Check this code when new subtype is added");
294 // Condition is not a member of this loop?
295 if( !is_member(phase->get_loop(ctrl)) &&
296 is_loop_exit(test) )
297 return true; // Found reason to peel!
298 }
299 // Walk up dominators to loop _head looking for test which is
300 // executed on every path thru loop.
301 test = phase->idom(test);
302 }
303 return false;
304 }
305
306 //------------------------------peeled_dom_test_elim---------------------------
307 // If we got the effect of peeling, either by actually peeling or by making
308 // a pre-loop which must execute at least once, we can remove all
309 // loop-invariant dominated tests in the main body.
310 void PhaseIdealLoop::peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new ) {
311 bool progress = true;
312 while( progress ) {
313 progress = false; // Reset for next iteration
314 Node *prev = loop->_head->in(LoopNode::LoopBackControl);//loop->tail();
315 Node *test = prev->in(0);
316 while( test != loop->_head ) { // Scan till run off top of loop
317
318 Opcodes p_op = prev->Opcode();
319 if( (p_op == Opcodes::Op_IfFalse || p_op == Opcodes::Op_IfTrue) &&
320 test->is_If() && // Test?
321 !test->in(1)->is_Con() && // And not already obvious?
322 // Condition is not a member of this loop?
323 !loop->is_member(get_loop(get_ctrl(test->in(1))))){
324 // Walk loop body looking for instances of this test
325 for( uint i = 0; i < loop->_body.size(); i++ ) {
326 Node *n = loop->_body.at(i);
327 if( n->is_If() && n->in(1) == test->in(1) /*&& n != loop->tail()->in(0)*/ ) {
328 // IfNode was dominated by version in peeled loop body
329 progress = true;
330 dominated_by( old_new[prev->_idx], n );
331 }
332 }
333 }
334 prev = test;
335 test = idom(test);
336 } // End of scan tests in loop
337
338 } // End of while( progress )
339 }
602 if (trip_count <= 3)
603 return true;
604
605 // Take into account that after unroll conjoined heads and tails will fold,
606 // otherwise policy_unroll() may allow more unrolling than max unrolling.
607 uint new_body_size = EMPTY_LOOP_SIZE + (body_size - EMPTY_LOOP_SIZE) * trip_count;
608 uint tst_body_size = (new_body_size - EMPTY_LOOP_SIZE) / trip_count + EMPTY_LOOP_SIZE;
609 if (body_size != tst_body_size) // Check for int overflow
610 return false;
611 if (new_body_size > unroll_limit ||
612 // Unrolling can result in a large amount of node construction
613 new_body_size >= phase->C->max_node_limit() - phase->C->live_nodes()) {
614 return false;
615 }
616
617 // Do not unroll a loop with String intrinsics code.
618 // String intrinsics are large and have loops.
619 for (uint k = 0; k < _body.size(); k++) {
620 Node* n = _body.at(k);
621 switch (n->Opcode()) {
622 case Opcodes::Op_StrComp:
623 case Opcodes::Op_StrEquals:
624 case Opcodes::Op_StrIndexOf:
625 case Opcodes::Op_StrIndexOfChar:
626 case Opcodes::Op_EncodeISOArray:
627 case Opcodes::Op_AryEq:
628 case Opcodes::Op_HasNegatives: {
629 return false;
630 }
631 #if INCLUDE_RTM_OPT
632 case Opcodes::Op_FastLock:
633 case Opcodes::Op_FastUnlock: {
634 // Don't unroll RTM locking code because it is large.
635 if (UseRTMLocking) {
636 return false;
637 }
638 }
639 #endif
640 } // switch
641 }
642
643 return true; // Do maximally unroll
644 }
645
646
647 //------------------------------policy_unroll----------------------------------
648 // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if
649 // the loop is a CountedLoop and the body is small enough.
650 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
651
652 CountedLoopNode *cl = _head->as_CountedLoop();
653 assert(cl->is_normal_loop() || cl->is_main_loop(), "");
720 }
721 }
722 }
723 }
724
725 // After unroll limit will be adjusted: new_limit = limit-stride.
726 // Bailout if adjustment overflow.
727 const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int();
728 if (stride_con > 0 && ((limit_type->_hi - stride_con) >= limit_type->_hi) ||
729 stride_con < 0 && ((limit_type->_lo - stride_con) <= limit_type->_lo))
730 return false; // overflow
731
732 // Adjust body_size to determine if we unroll or not
733 uint body_size = _body.size();
734 // Key test to unroll loop in CRC32 java code
735 int xors_in_loop = 0;
736 // Also count ModL, DivL and MulL which expand mightly
737 for (uint k = 0; k < _body.size(); k++) {
738 Node* n = _body.at(k);
739 switch (n->Opcode()) {
740 case Opcodes::Op_XorI: xors_in_loop++; break; // CRC32 java code
741 case Opcodes::Op_ModL: body_size += 30; break;
742 case Opcodes::Op_DivL: body_size += 30; break;
743 case Opcodes::Op_MulL: body_size += 10; break;
744 case Opcodes::Op_StrComp:
745 case Opcodes::Op_StrEquals:
746 case Opcodes::Op_StrIndexOf:
747 case Opcodes::Op_StrIndexOfChar:
748 case Opcodes::Op_EncodeISOArray:
749 case Opcodes::Op_AryEq:
750 case Opcodes::Op_HasNegatives: {
751 // Do not unroll a loop with String intrinsics code.
752 // String intrinsics are large and have loops.
753 return false;
754 }
755 #if INCLUDE_RTM_OPT
756 case Opcodes::Op_FastLock:
757 case Opcodes::Op_FastUnlock: {
758 // Don't unroll RTM locking code because it is large.
759 if (UseRTMLocking) {
760 return false;
761 }
762 }
763 #endif
764 } // switch
765 }
766
767 if (UseSuperWord) {
768 if (!cl->is_reduction_loop()) {
769 phase->mark_reductions(this);
770 }
771
772 // Only attempt slp analysis when user controls do not prohibit it
773 if (LoopMaxUnroll > _local_loop_unroll_factor) {
774 // Once policy_slp_analysis succeeds, mark the loop with the
775 // maximal unroll factor so that we minimize analysis passes
776 if (future_unroll_ct >= _local_loop_unroll_factor) {
777 policy_unroll_slp_analysis(cl, phase, future_unroll_ct);
844 //------------------------------policy_range_check-----------------------------
845 // Return TRUE or FALSE if the loop should be range-check-eliminated.
846 // Actually we do iteration-splitting, a more powerful form of RCE.
847 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const {
848 if (!RangeCheckElimination) return false;
849
850 CountedLoopNode *cl = _head->as_CountedLoop();
851 // If we unrolled with no intention of doing RCE and we later
852 // changed our minds, we got no pre-loop. Either we need to
853 // make a new pre-loop, or we gotta disallow RCE.
854 if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
855 Node *trip_counter = cl->phi();
856
857 // check for vectorized loops, some opts are no longer needed
858 if (cl->do_unroll_only()) return false;
859
860 // Check loop body for tests of trip-counter plus loop-invariant vs
861 // loop-invariant.
862 for (uint i = 0; i < _body.size(); i++) {
863 Node *iff = _body[i];
864 if (iff->Opcode() == Opcodes::Op_If ||
865 iff->Opcode() == Opcodes::Op_RangeCheck) { // Test?
866
867 // Comparing trip+off vs limit
868 Node *bol = iff->in(1);
869 if (bol->req() != 2) continue; // dead constant test
870 if (!bol->is_Bool()) {
871 assert(bol->Opcode() == Opcodes::Op_Conv2B, "predicate check only");
872 continue;
873 }
874 if (bol->as_Bool()->_test._test == BoolTest::ne)
875 continue; // not RC
876
877 Node *cmp = bol->in(1);
878 Node *rc_exp = cmp->in(1);
879 Node *limit = cmp->in(2);
880
881 Node *limit_c = phase->get_ctrl(limit);
882 if( limit_c == phase->C->top() )
883 return false; // Found dead test on live IF? No RCE!
884 if( is_member(phase->get_loop(limit_c) ) ) {
885 // Compare might have operands swapped; commute them
886 rc_exp = cmp->in(2);
887 limit = cmp->in(1);
888 limit_c = phase->get_ctrl(limit);
889 if( is_member(phase->get_loop(limit_c) ) )
890 continue; // Both inputs are loop varying; cannot RCE
891 }
1030 // Add the post loop
1031 CountedLoopNode *post_head = NULL;
1032 Node *main_exit = insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_head);
1033
1034 //------------------------------
1035 // Step B: Create Pre-Loop.
1036
1037 // Step B1: Clone the loop body. The clone becomes the pre-loop. The main
1038 // loop pre-header illegally has 2 control users (old & new loops).
1039 clone_loop( loop, old_new, dd_main_head );
1040 CountedLoopNode* pre_head = old_new[main_head->_idx]->as_CountedLoop();
1041 CountedLoopEndNode* pre_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
1042 pre_head->set_pre_loop(main_head);
1043 Node *pre_incr = old_new[incr->_idx];
1044
1045 // Reduce the pre-loop trip count.
1046 pre_end->_prob = PROB_FAIR;
1047
1048 // Find the pre-loop normal exit.
1049 Node* pre_exit = pre_end->proj_out(false);
1050 assert( pre_exit->Opcode() == Opcodes::Op_IfFalse, "" );
1051 IfFalseNode *new_pre_exit = new IfFalseNode(pre_end);
1052 _igvn.register_new_node_with_optimizer( new_pre_exit );
1053 set_idom(new_pre_exit, pre_end, dd_main_head);
1054 set_loop(new_pre_exit, loop->_parent);
1055
1056 // Step B2: Build a zero-trip guard for the main-loop. After leaving the
1057 // pre-loop, the main-loop may not execute at all. Later in life this
1058 // zero-trip guard will become the minimum-trip guard when we unroll
1059 // the main-loop.
1060 Node *min_opaq = new Opaque1Node(C, limit);
1061 Node *min_cmp = new CmpINode( pre_incr, min_opaq );
1062 Node *min_bol = new BoolNode( min_cmp, b_test );
1063 register_new_node( min_opaq, new_pre_exit );
1064 register_new_node( min_cmp , new_pre_exit );
1065 register_new_node( min_bol , new_pre_exit );
1066
1067 // Build the IfNode (assume the main-loop is executed always).
1068 IfNode *min_iff = new IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
1069 _igvn.register_new_node_with_optimizer( min_iff );
1070 set_idom(min_iff, new_pre_exit, dd_main_head);
1285 // so guess that unit vector trips is a reasonable value.
1286 post_head->set_profile_trip_cnt(4.0);
1287 post_head->set_is_rce_post_loop();
1288
1289 // Now force out all loop-invariant dominating tests. The optimizer
1290 // finds some, but we _know_ they are all useless.
1291 peeled_dom_test_elim(loop, old_new);
1292 loop->record_for_igvn();
1293 }
1294
1295
1296 //------------------------------insert_post_loop-------------------------------
1297 // Insert post loops. Add a post loop to the given loop passed.
1298 Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
1299 CountedLoopNode *main_head, CountedLoopEndNode *main_end,
1300 Node *incr, Node *limit, CountedLoopNode *&post_head) {
1301
1302 //------------------------------
1303 // Step A: Create a new post-Loop.
1304 Node* main_exit = main_end->proj_out(false);
1305 assert(main_exit->Opcode() == Opcodes::Op_IfFalse, "");
1306 int dd_main_exit = dom_depth(main_exit);
1307
1308 // Step A1: Clone the loop body of main. The clone becomes the vector post-loop.
1309 // The main loop pre-header illegally has 2 control users (old & new loops).
1310 clone_loop(loop, old_new, dd_main_exit);
1311 assert(old_new[main_end->_idx]->Opcode() == Opcodes::Op_CountedLoopEnd, "");
1312 post_head = old_new[main_head->_idx]->as_CountedLoop();
1313 post_head->set_normal_loop();
1314 post_head->set_post_loop(main_head);
1315
1316 // Reduce the post-loop trip count.
1317 CountedLoopEndNode* post_end = old_new[main_end->_idx]->as_CountedLoopEnd();
1318 post_end->_prob = PROB_FAIR;
1319
1320 // Build the main-loop normal exit.
1321 IfFalseNode *new_main_exit = new IfFalseNode(main_end);
1322 _igvn.register_new_node_with_optimizer(new_main_exit);
1323 set_idom(new_main_exit, main_end, dd_main_exit);
1324 set_loop(new_main_exit, loop->_parent);
1325
1326 // Step A2: Build a zero-trip guard for the post-loop. After leaving the
1327 // main-loop, the post-loop may not execute at all. We 'opaque' the incr
1328 // (the previous loop trip-counter exit value) because we will be changing
1329 // the exit value (via additional unrolling) so we cannot constant-fold away the zero
1330 // trip guard until all unrolling is done.
1331 Node *zer_opaq = new Opaque1Node(C, incr);
1517 // No underflow.
1518 new_limit = new SubINode(limit, stride);
1519 } else {
1520 // (limit - stride) may underflow.
1521 // Clamp the adjustment value with MININT or MAXINT:
1522 //
1523 // new_limit = limit-stride
1524 // if (stride > 0)
1525 // new_limit = (limit < new_limit) ? MININT : new_limit;
1526 // else
1527 // new_limit = (limit > new_limit) ? MAXINT : new_limit;
1528 //
1529 BoolTest::mask bt = loop_end->test_trip();
1530 assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected");
1531 Node* adj_max = _igvn.intcon((stride_con > 0) ? min_jint : max_jint);
1532 set_ctrl(adj_max, C->root());
1533 Node* old_limit = NULL;
1534 Node* adj_limit = NULL;
1535 Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : NULL;
1536 if (loop_head->unrolled_count() > 1 &&
1537 limit->is_CMove() && limit->Opcode() == Opcodes::Op_CMoveI &&
1538 limit->in(CMoveNode::IfTrue) == adj_max &&
1539 bol->as_Bool()->_test._test == bt &&
1540 bol->in(1)->Opcode() == Opcodes::Op_CmpI &&
1541 bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) {
1542 // Loop was unrolled before.
1543 // Optimize the limit to avoid nested CMove:
1544 // use original limit as old limit.
1545 old_limit = bol->in(1)->in(1);
1546 // Adjust previous adjusted limit.
1547 adj_limit = limit->in(CMoveNode::IfFalse);
1548 adj_limit = new SubINode(adj_limit, stride);
1549 } else {
1550 old_limit = limit;
1551 adj_limit = new SubINode(limit, stride);
1552 }
1553 assert(old_limit != NULL && adj_limit != NULL, "");
1554 register_new_node( adj_limit, ctrl ); // adjust amount
1555 Node* adj_cmp = new CmpINode(old_limit, adj_limit);
1556 register_new_node( adj_cmp, ctrl );
1557 Node* adj_bool = new BoolNode(adj_cmp, bt);
1558 register_new_node( adj_bool, ctrl );
1559 new_limit = new CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT);
1560 }
1700
1701 void PhaseIdealLoop::mark_reductions(IdealLoopTree *loop) {
1702 if (SuperWordReductions == false) return;
1703
1704 CountedLoopNode* loop_head = loop->_head->as_CountedLoop();
1705 if (loop_head->unrolled_count() > 1) {
1706 return;
1707 }
1708
1709 Node* trip_phi = loop_head->phi();
1710 for (DUIterator_Fast imax, i = loop_head->fast_outs(imax); i < imax; i++) {
1711 Node* phi = loop_head->fast_out(i);
1712 if (phi->is_Phi() && phi->outcnt() > 0 && phi != trip_phi) {
1713 // For definitions which are loop inclusive and not tripcounts.
1714 Node* def_node = phi->in(LoopNode::LoopBackControl);
1715
1716 if (def_node != NULL) {
1717 Node* n_ctrl = get_ctrl(def_node);
1718 if (n_ctrl != NULL && loop->is_member(get_loop(n_ctrl))) {
1719 // Now test it to see if it fits the standard pattern for a reduction operator.
1720 Opcodes opc = def_node->Opcode();
1721 if (opc != ReductionNode::opcode(opc, def_node->bottom_type()->basic_type())) {
1722 if (!def_node->is_reduction()) { // Not marked yet
1723 // To be a reduction, the arithmetic node must have the phi as input and provide a def to it
1724 bool ok = false;
1725 for (unsigned j = 1; j < def_node->req(); j++) {
1726 Node* in = def_node->in(j);
1727 if (in == phi) {
1728 ok = true;
1729 break;
1730 }
1731 }
1732
1733 // do nothing if we did not match the initial criteria
1734 if (ok == false) {
1735 continue;
1736 }
1737
1738 // The result of the reduction must not be used in the loop
1739 for (DUIterator_Fast imax, i = def_node->fast_outs(imax); i < imax && ok; i++) {
1740 Node* u = def_node->fast_out(i);
1901 // ( if (scale < 0) /* and stride > 0 */
1902 // I < (low_limit-(offset+1))/scale
1903 // else /* scale > 0 and stride < 0 */
1904 // I > (low_limit-(offset+1))/scale
1905 // )
1906
1907 *main_limit = adjust_limit(stride_con, scale, plus_one, low_limit, *main_limit, pre_ctrl);
1908 }
1909 }
1910
1911
1912 //------------------------------is_scaled_iv---------------------------------
1913 // Return true if exp is a constant times an induction var
1914 bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) {
1915 if (exp == iv) {
1916 if (p_scale != NULL) {
1917 *p_scale = 1;
1918 }
1919 return true;
1920 }
1921 Opcodes opc = exp->Opcode();
1922 if (opc == Opcodes::Op_MulI) {
1923 if (exp->in(1) == iv && exp->in(2)->is_Con()) {
1924 if (p_scale != NULL) {
1925 *p_scale = exp->in(2)->get_int();
1926 }
1927 return true;
1928 }
1929 if (exp->in(2) == iv && exp->in(1)->is_Con()) {
1930 if (p_scale != NULL) {
1931 *p_scale = exp->in(1)->get_int();
1932 }
1933 return true;
1934 }
1935 } else if (opc == Opcodes::Op_LShiftI) {
1936 if (exp->in(1) == iv && exp->in(2)->is_Con()) {
1937 if (p_scale != NULL) {
1938 *p_scale = 1 << exp->in(2)->get_int();
1939 }
1940 return true;
1941 }
1942 }
1943 return false;
1944 }
1945
1946 //-----------------------------is_scaled_iv_plus_offset------------------------------
1947 // Return true if exp is a simple induction variable expression: k1*iv + (invar + k2)
1948 bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth) {
1949 if (is_scaled_iv(exp, iv, p_scale)) {
1950 if (p_offset != NULL) {
1951 Node *zero = _igvn.intcon(0);
1952 set_ctrl(zero, C->root());
1953 *p_offset = zero;
1954 }
1955 return true;
1956 }
1957 Opcodes opc = exp->Opcode();
1958 if (opc == Opcodes::Op_AddI) {
1959 if (is_scaled_iv(exp->in(1), iv, p_scale)) {
1960 if (p_offset != NULL) {
1961 *p_offset = exp->in(2);
1962 }
1963 return true;
1964 }
1965 if (is_scaled_iv(exp->in(2), iv, p_scale)) {
1966 if (p_offset != NULL) {
1967 *p_offset = exp->in(1);
1968 }
1969 return true;
1970 }
1971 if (exp->in(2)->is_Con()) {
1972 Node* offset2 = NULL;
1973 if (depth < 2 &&
1974 is_scaled_iv_plus_offset(exp->in(1), iv, p_scale,
1975 p_offset != NULL ? &offset2 : NULL, depth+1)) {
1976 if (p_offset != NULL) {
1977 Node *ctrl_off2 = get_ctrl(offset2);
1978 Node* offset = new AddINode(offset2, exp->in(2));
1979 register_new_node(offset, ctrl_off2);
1980 *p_offset = offset;
1981 }
1982 return true;
1983 }
1984 }
1985 } else if (opc == Opcodes::Op_SubI) {
1986 if (is_scaled_iv(exp->in(1), iv, p_scale)) {
1987 if (p_offset != NULL) {
1988 Node *zero = _igvn.intcon(0);
1989 set_ctrl(zero, C->root());
1990 Node *ctrl_off = get_ctrl(exp->in(2));
1991 Node* offset = new SubINode(zero, exp->in(2));
1992 register_new_node(offset, ctrl_off);
1993 *p_offset = offset;
1994 }
1995 return true;
1996 }
1997 if (is_scaled_iv(exp->in(2), iv, p_scale)) {
1998 if (p_offset != NULL) {
1999 *p_scale *= -1;
2000 *p_offset = exp->in(1);
2001 }
2002 return true;
2003 }
2004 }
2005 return false;
2032 // to not ever trip end tests
2033 Node *main_limit = cl->limit();
2034
2035 // Check graph shape. Cannot optimize a loop if zero-trip
2036 // Opaque1 node is optimized away and then another round
2037 // of loop opts attempted.
2038 if (!is_canonical_loop_entry(cl)) {
2039 return closed_range_checks;
2040 }
2041
2042 // Need to find the main-loop zero-trip guard
2043 Node *ctrl = cl->in(LoopNode::EntryControl);
2044 Node *iffm = ctrl->in(0);
2045 Node *opqzm = iffm->in(1)->in(1)->in(2);
2046 assert(opqzm->in(1) == main_limit, "do not understand situation");
2047
2048 // Find the pre-loop limit; we will expand its iterations to
2049 // not ever trip low tests.
2050 Node *p_f = iffm->in(0);
2051 // pre loop may have been optimized out
2052 if (p_f->Opcode() != Opcodes::Op_IfFalse) {
2053 return closed_range_checks;
2054 }
2055 CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2056 assert(pre_end->loopnode()->is_pre_loop(), "");
2057 Node *pre_opaq1 = pre_end->limit();
2058 // Occasionally it's possible for a pre-loop Opaque1 node to be
2059 // optimized away and then another round of loop opts attempted.
2060 // We can not optimize this particular loop in that case.
2061 if (pre_opaq1->Opcode() != Opcodes::Op_Opaque1)
2062 return closed_range_checks;
2063 Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
2064 Node *pre_limit = pre_opaq->in(1);
2065
2066 // Where do we put new limit calculations
2067 Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
2068
2069 // Ensure the original loop limit is available from the
2070 // pre-loop Opaque1 node.
2071 Node *orig_limit = pre_opaq->original_loop_limit();
2072 if (orig_limit == NULL || _igvn.type(orig_limit) == Type::TOP)
2073 return closed_range_checks;
2074
2075 // Must know if its a count-up or count-down loop
2076
2077 int stride_con = cl->stride_con();
2078 Node *zero = _igvn.intcon(0);
2079 Node *one = _igvn.intcon(1);
2080 // Use symmetrical int range [-max_jint,max_jint]
2081 Node *mini = _igvn.intcon(-max_jint);
2082 set_ctrl(zero, C->root());
2083 set_ctrl(one, C->root());
2084 set_ctrl(mini, C->root());
2085
2086 // Range checks that do not dominate the loop backedge (ie.
2087 // conditionally executed) can lengthen the pre loop limit beyond
2088 // the original loop limit. To prevent this, the pre limit is
2089 // (for stride > 0) MINed with the original loop limit (MAXed
2090 // stride < 0) when some range_check (rc) is conditionally
2091 // executed.
2092 bool conditional_rc = false;
2093
2094 // Count number of range checks and reduce by load range limits, if zero,
2095 // the loop is in canonical form to multiversion.
2096 closed_range_checks = 0;
2097
2098 // Check loop body for tests of trip-counter plus loop-invariant vs
2099 // loop-invariant.
2100 for( uint i = 0; i < loop->_body.size(); i++ ) {
2101 Node *iff = loop->_body[i];
2102 if (iff->Opcode() == Opcodes::Op_If ||
2103 iff->Opcode() == Opcodes::Op_RangeCheck) { // Test?
2104 // Test is an IfNode, has 2 projections. If BOTH are in the loop
2105 // we need loop unswitching instead of iteration splitting.
2106 closed_range_checks++;
2107 Node *exit = loop->is_loop_exit(iff);
2108 if( !exit ) continue;
2109 int flip = (exit->Opcode() == Opcodes::Op_IfTrue) ? 1 : 0;
2110
2111 // Get boolean condition to test
2112 Node *i1 = iff->in(1);
2113 if( !i1->is_Bool() ) continue;
2114 BoolNode *bol = i1->as_Bool();
2115 BoolTest b_test = bol->_test;
2116 // Flip sense of test if exit condition is flipped
2117 if( flip )
2118 b_test = b_test.negate();
2119
2120 // Get compare
2121 Node *cmp = bol->in(1);
2122
2123 // Look for trip_counter + offset vs limit
2124 Node *rc_exp = cmp->in(1);
2125 Node *limit = cmp->in(2);
2126 jint scale_con= 1; // Assume trip counter not scaled
2127
2128 Node *limit_c = get_ctrl(limit);
2129 if( loop->is_member(get_loop(limit_c) ) ) {
2158
2159 // As above for the 'limit', the 'offset' maybe pinned below the
2160 // zero trip test.
2161 if( offset_c == ctrl ) {
2162 continue; // Don't rce this check but continue looking for other candidates.
2163 }
2164 #ifdef ASSERT
2165 if (TraceRangeLimitCheck) {
2166 tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2167 bol->dump(2);
2168 }
2169 #endif
2170 // At this point we have the expression as:
2171 // scale_con * trip_counter + offset :: limit
2172 // where scale_con, offset and limit are loop invariant. Trip_counter
2173 // monotonically increases by stride_con, a constant. Both (or either)
2174 // stride_con and scale_con can be negative which will flip about the
2175 // sense of the test.
2176
2177 // Adjust pre and main loop limits to guard the correct iteration set
2178 if( cmp->Opcode() == Opcodes::Op_CmpU ) {// Unsigned compare is really 2 tests
2179 if( b_test._test == BoolTest::lt ) { // Range checks always use lt
2180 // The underflow and overflow limits: 0 <= scale*I+offset < limit
2181 add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
2182 // (0-offset)/scale could be outside of loop iterations range.
2183 conditional_rc = true;
2184 } else {
2185 if (PrintOpto) {
2186 tty->print_cr("missed RCE opportunity");
2187 }
2188 continue; // In release mode, ignore it
2189 }
2190 } else { // Otherwise work on normal compares
2191 switch( b_test._test ) {
2192 case BoolTest::gt:
2193 // Fall into GE case
2194 case BoolTest::ge:
2195 // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2196 scale_con = -scale_con;
2197 offset = new SubINode( zero, offset );
2198 register_new_node( offset, pre_ctrl );
2226
2227 // Kill the eliminated test
2228 C->set_major_progress();
2229 Node *kill_con = _igvn.intcon( 1-flip );
2230 set_ctrl(kill_con, C->root());
2231 _igvn.replace_input_of(iff, 1, kill_con);
2232 // Find surviving projection
2233 assert(iff->is_If(), "");
2234 ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
2235 // Find loads off the surviving projection; remove their control edge
2236 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
2237 Node* cd = dp->fast_out(i); // Control-dependent node
2238 if (cd->is_Load() && cd->depends_only_on_test()) { // Loads can now float around in the loop
2239 // Allow the load to float around in the loop, or before it
2240 // but NOT before the pre-loop.
2241 _igvn.replace_input_of(cd, 0, ctrl); // ctrl, not NULL
2242 --i;
2243 --imax;
2244 }
2245 }
2246 if (limit->Opcode() == Opcodes::Op_LoadRange) {
2247 closed_range_checks--;
2248 }
2249
2250 } // End of is IF
2251
2252 }
2253
2254 // Update loop limits
2255 if (conditional_rc) {
2256 pre_limit = (stride_con > 0) ? (Node*)new MinINode(pre_limit, orig_limit)
2257 : (Node*)new MaxINode(pre_limit, orig_limit);
2258 register_new_node(pre_limit, pre_ctrl);
2259 }
2260 _igvn.replace_input_of(pre_opaq, 1, pre_limit);
2261
2262 // Note:: we are making the main loop limit no longer precise;
2263 // need to round up based on stride.
2264 cl->set_nonexact_trip_count();
2265 Node *main_cle = cl->loopexit();
2266 Node *main_bol = main_cle->in(1);
2284
2285 return closed_range_checks;
2286 }
2287
2288 //------------------------------has_range_checks-------------------------------
2289 // Check to see if RCE cleaned the current loop of range-checks.
2290 void PhaseIdealLoop::has_range_checks(IdealLoopTree *loop) {
2291 assert(RangeCheckElimination, "");
2292
2293 // skip if not a counted loop
2294 if (!loop->is_counted()) return;
2295
2296 CountedLoopNode *cl = loop->_head->as_CountedLoop();
2297
2298 // skip this loop if it is already checked
2299 if (cl->has_been_range_checked()) return;
2300
2301 // Now check for existance of range checks
2302 for (uint i = 0; i < loop->_body.size(); i++) {
2303 Node *iff = loop->_body[i];
2304 Opcodes iff_opc = iff->Opcode();
2305 if (iff_opc == Opcodes::Op_If || iff_opc == Opcodes::Op_RangeCheck) {
2306 cl->mark_has_range_checks();
2307 break;
2308 }
2309 }
2310 cl->set_has_been_range_checked();
2311 }
2312
2313 //-------------------------multi_version_post_loops----------------------------
2314 // Check the range checks that remain, if simple, use the bounds to guard
2315 // which version to a post loop we execute, one with range checks or one without
2316 bool PhaseIdealLoop::multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop) {
2317 bool multi_version_succeeded = false;
2318 assert(RangeCheckElimination, "");
2319 CountedLoopNode *legacy_cl = legacy_loop->_head->as_CountedLoop();
2320 assert(legacy_cl->is_post_loop(), "");
2321
2322 // Check for existance of range checks using the unique instance to make a guard with
2323 Unique_Node_List worklist;
2324 for (uint i = 0; i < legacy_loop->_body.size(); i++) {
2325 Node *iff = legacy_loop->_body[i];
2326 Opcodes iff_opc = iff->Opcode();
2327 if (iff_opc == Opcodes::Op_If || iff_opc == Opcodes::Op_RangeCheck) {
2328 worklist.push(iff);
2329 }
2330 }
2331
2332 // Find RCE'd post loop so that we can stage its guard.
2333 if (!is_canonical_loop_entry(legacy_cl)) return multi_version_succeeded;
2334 Node* ctrl = legacy_cl->in(LoopNode::EntryControl);
2335 Node* iffm = ctrl->in(0);
2336
2337 // Now we test that both the post loops are connected
2338 Node* post_loop_region = iffm->in(0);
2339 if (post_loop_region == NULL) return multi_version_succeeded;
2340 if (!post_loop_region->is_Region()) return multi_version_succeeded;
2341 Node* covering_region = post_loop_region->in(RegionNode::Control+1);
2342 if (covering_region == NULL) return multi_version_succeeded;
2343 if (!covering_region->is_Region()) return multi_version_succeeded;
2344 Node* p_f = covering_region->in(RegionNode::Control);
2345 if (p_f == NULL) return multi_version_succeeded;
2346 if (!p_f->is_IfFalse()) return multi_version_succeeded;
2347 if (!p_f->in(0)->is_CountedLoopEnd()) return multi_version_succeeded;
2365 #endif
2366
2367 // Now fetch the limit we want to compare against
2368 Node *limit = rce_cl->limit();
2369 bool first_time = true;
2370
2371 // If we got this far, we identified the post loop which has been RCE'd and
2372 // we have a work list. Now we will try to transform the if guard to cause
2373 // the loop pair to be multi version executed with the determination left to runtime
2374 // or the optimizer if full information is known about the given arrays at compile time.
2375 Node *last_min = NULL;
2376 multi_version_succeeded = true;
2377 while (worklist.size()) {
2378 Node* rc_iffm = worklist.pop();
2379 if (rc_iffm->is_If()) {
2380 Node *rc_bolzm = rc_iffm->in(1);
2381 if (rc_bolzm->is_Bool()) {
2382 Node *rc_cmpzm = rc_bolzm->in(1);
2383 if (rc_cmpzm->is_Cmp()) {
2384 Node *rc_left = rc_cmpzm->in(2);
2385 if (rc_left->Opcode() != Opcodes::Op_LoadRange) {
2386 multi_version_succeeded = false;
2387 break;
2388 }
2389 if (first_time) {
2390 last_min = rc_left;
2391 first_time = false;
2392 } else {
2393 Node *cur_min = new MinINode(last_min, rc_left);
2394 last_min = cur_min;
2395 _igvn.register_new_node_with_optimizer(last_min);
2396 }
2397 }
2398 }
2399 }
2400 }
2401
2402 // All we have to do is update the limit of the rce loop
2403 // with the min of our expression and the current limit.
2404 // We will use this expression to replace the current limit.
2405 if (last_min && multi_version_succeeded) {
2444 }
2445 }
2446 }
2447 }
2448
2449 //------------------------------DCE_loop_body----------------------------------
2450 // Remove simplistic dead code from loop body
2451 void IdealLoopTree::DCE_loop_body() {
2452 for( uint i = 0; i < _body.size(); i++ )
2453 if( _body.at(i)->outcnt() == 0 )
2454 _body.map( i--, _body.pop() );
2455 }
2456
2457
2458 //------------------------------adjust_loop_exit_prob--------------------------
2459 // Look for loop-exit tests with the 50/50 (or worse) guesses from the parsing stage.
2460 // Replace with a 1-in-10 exit guess.
2461 void IdealLoopTree::adjust_loop_exit_prob( PhaseIdealLoop *phase ) {
2462 Node *test = tail();
2463 while( test != _head ) {
2464 Opcodes top = test->Opcode();
2465 if( top == Opcodes::Op_IfTrue || top == Opcodes::Op_IfFalse ) {
2466 int test_con = ((ProjNode*)test)->_con;
2467 assert(top == (test_con? Opcodes::Op_IfTrue: Opcodes::Op_IfFalse), "sanity");
2468 IfNode *iff = test->in(0)->as_If();
2469 if( iff->outcnt() == 2 ) { // Ignore dead tests
2470 Node *bol = iff->in(1);
2471 if( bol && bol->req() > 1 && bol->in(1) &&
2472 ((bol->in(1)->Opcode() == Opcodes::Op_StorePConditional ) ||
2473 (bol->in(1)->Opcode() == Opcodes::Op_StoreIConditional ) ||
2474 (bol->in(1)->Opcode() == Opcodes::Op_StoreLConditional ) ||
2475 (bol->in(1)->Opcode() == Opcodes::Op_CompareAndExchangeB ) ||
2476 (bol->in(1)->Opcode() == Opcodes::Op_CompareAndExchangeS ) ||
2477 (bol->in(1)->Opcode() == Opcodes::Op_CompareAndExchangeI ) ||
2478 (bol->in(1)->Opcode() == Opcodes::Op_CompareAndExchangeL ) ||
2479 (bol->in(1)->Opcode() == Opcodes::Op_CompareAndExchangeP ) ||
2480 (bol->in(1)->Opcode() == Opcodes::Op_CompareAndExchangeN ) ||
2481 (bol->in(1)->Opcode() == Opcodes::Op_WeakCompareAndSwapB ) ||
2482 (bol->in(1)->Opcode() == Opcodes::Op_WeakCompareAndSwapS ) ||
2483 (bol->in(1)->Opcode() == Opcodes::Op_WeakCompareAndSwapI ) ||
2484 (bol->in(1)->Opcode() == Opcodes::Op_WeakCompareAndSwapL ) ||
2485 (bol->in(1)->Opcode() == Opcodes::Op_WeakCompareAndSwapP ) ||
2486 (bol->in(1)->Opcode() == Opcodes::Op_WeakCompareAndSwapN ) ||
2487 (bol->in(1)->Opcode() == Opcodes::Op_CompareAndSwapB ) ||
2488 (bol->in(1)->Opcode() == Opcodes::Op_CompareAndSwapS ) ||
2489 (bol->in(1)->Opcode() == Opcodes::Op_CompareAndSwapI ) ||
2490 (bol->in(1)->Opcode() == Opcodes::Op_CompareAndSwapL ) ||
2491 (bol->in(1)->Opcode() == Opcodes::Op_CompareAndSwapP ) ||
2492 (bol->in(1)->Opcode() == Opcodes::Op_CompareAndSwapN )))
2493 return; // Allocation loops RARELY take backedge
2494 // Find the OTHER exit path from the IF
2495 Node* ex = iff->proj_out(1-test_con);
2496 float p = iff->_prob;
2497 if( !phase->is_member( this, ex ) && iff->_fcnt == COUNT_UNKNOWN ) {
2498 if( top == Opcodes::Op_IfTrue ) {
2499 if( p < (PROB_FAIR + PROB_UNLIKELY_MAG(3))) {
2500 iff->_prob = PROB_STATIC_FREQUENT;
2501 }
2502 } else {
2503 if( p > (PROB_FAIR - PROB_UNLIKELY_MAG(3))) {
2504 iff->_prob = PROB_STATIC_INFREQUENT;
2505 }
2506 }
2507 }
2508 }
2509 }
2510 test = phase->idom(test);
2511 }
2512 }
2513
2514 #ifdef ASSERT
2515 static CountedLoopNode* locate_pre_from_main(CountedLoopNode *cl) {
2516 Node *ctrl = cl->in(LoopNode::EntryControl);
2517 assert(ctrl->Opcode() == Opcodes::Op_IfTrue || ctrl->Opcode() == Opcodes::Op_IfFalse, "");
2518 Node *iffm = ctrl->in(0);
2519 assert(iffm->Opcode() == Opcodes::Op_If, "");
2520 Node *p_f = iffm->in(0);
2521 assert(p_f->Opcode() == Opcodes::Op_IfFalse, "");
2522 CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2523 assert(pre_end->loopnode()->is_pre_loop(), "");
2524 return pre_end->loopnode();
2525 }
2526 #endif
2527
2528 // Remove the main and post loops and make the pre loop execute all
2529 // iterations. Useful when the pre loop is found empty.
2530 void IdealLoopTree::remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase) {
2531 CountedLoopEndNode* pre_end = cl->loopexit();
2532 Node* pre_cmp = pre_end->cmp_node();
2533 if (pre_cmp->in(2)->Opcode() != Opcodes::Op_Opaque1) {
2534 // Only safe to remove the main loop if the compiler optimized it
2535 // out based on an unknown number of iterations
2536 return;
2537 }
2538
2539 // Can we find the main loop?
2540 if (_next == NULL) {
2541 return;
2542 }
2543
2544 Node* next_head = _next->_head;
2545 if (!next_head->is_CountedLoop()) {
2546 return;
2547 }
2548
2549 CountedLoopNode* main_head = next_head->as_CountedLoop();
2550 if (!main_head->is_main_loop()) {
2551 return;
2552 }
2553
2554 assert(locate_pre_from_main(main_head) == cl, "bad main loop");
2555 Node* main_iff = main_head->in(LoopNode::EntryControl)->in(0);
2556
2557 // Remove the Opaque1Node of the pre loop and make it execute all iterations
2558 phase->_igvn.replace_input_of(pre_cmp, 2, pre_cmp->in(2)->in(2));
2559 // Remove the Opaque1Node of the main loop so it can be optimized out
2560 Node* main_cmp = main_iff->in(1)->in(1);
2561 assert(main_cmp->in(2)->Opcode() == Opcodes::Op_Opaque1, "main loop has no opaque node?");
2562 phase->_igvn.replace_input_of(main_cmp, 2, main_cmp->in(2)->in(1));
2563 }
2564
2565 //------------------------------policy_do_remove_empty_loop--------------------
2566 // Micro-benchmark spamming. Policy is to always remove empty loops.
2567 // The 'DO' part is to replace the trip counter with the value it will
2568 // have on the last iteration. This will break the loop.
2569 bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) {
2570 // Minimum size must be empty loop
2571 if (_body.size() > EMPTY_LOOP_SIZE)
2572 return false;
2573
2574 if (!_head->is_CountedLoop())
2575 return false; // Dead loop
2576 CountedLoopNode *cl = _head->as_CountedLoop();
2577 if (!cl->is_valid_counted_loop())
2578 return false; // Malformed loop
2579 if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue))))
2580 return false; // Infinite loop
2581
2582 if (cl->is_pre_loop()) {
2583 // If the loop we are removing is a pre-loop then the main and
2584 // post loop can be removed as well
2585 remove_main_post_loops(cl, phase);
2586 }
2587
2588 #ifdef ASSERT
2589 // Ensure only one phi which is the iv.
2590 Node* iv = NULL;
2591 for (DUIterator_Fast imax, i = cl->fast_outs(imax); i < imax; i++) {
2592 Node* n = cl->fast_out(i);
2593 if (n->Opcode() == Opcodes::Op_Phi) {
2594 assert(iv == NULL, "Too many phis" );
2595 iv = n;
2596 }
2597 }
2598 assert(iv == cl->phi(), "Wrong phi" );
2599 #endif
2600
2601 // main and post loops have explicitly created zero trip guard
2602 bool needs_guard = !cl->is_main_loop() && !cl->is_post_loop();
2603 if (needs_guard) {
2604 // Skip guard if values not overlap.
2605 const TypeInt* init_t = phase->_igvn.type(cl->init_trip())->is_int();
2606 const TypeInt* limit_t = phase->_igvn.type(cl->limit())->is_int();
2607 int stride_con = cl->stride_con();
2608 if (stride_con > 0) {
2609 needs_guard = (init_t->_hi >= limit_t->_lo);
2610 } else {
2611 needs_guard = (init_t->_lo <= limit_t->_hi);
2612 }
2613 }
2614 if (needs_guard) {
2615 // Check for an obvious zero trip guard.
2616 Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->in(LoopNode::EntryControl));
2617 if (inctrl->Opcode() == Opcodes::Op_IfTrue || inctrl->Opcode() == Opcodes::Op_IfFalse) {
2618 bool maybe_swapped = (inctrl->Opcode() == Opcodes::Op_IfFalse);
2619 // The test should look like just the backedge of a CountedLoop
2620 Node* iff = inctrl->in(0);
2621 if (iff->is_If()) {
2622 Node* bol = iff->in(1);
2623 if (bol->is_Bool()) {
2624 BoolTest test = bol->as_Bool()->_test;
2625 if (maybe_swapped) {
2626 test._test = test.commute();
2627 test._test = test.negate();
2628 }
2629 if (test._test == cl->loopexit()->test_trip()) {
2630 Node* cmp = bol->in(1);
2631 int init_idx = maybe_swapped ? 2 : 1;
2632 int limit_idx = maybe_swapped ? 1 : 2;
2633 if (cmp->is_Cmp() && cmp->in(init_idx) == cl->init_trip() && cmp->in(limit_idx) == cl->limit()) {
2634 needs_guard = false;
2635 }
2636 }
2637 }
2638 }
2911 bool PhaseIdealLoop::match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
2912 Node*& shift, Node*& con) {
2913 const char* msg = NULL;
2914 Node* msg_node = NULL;
2915
2916 store_value = NULL;
2917 con = NULL;
2918 shift = NULL;
2919
2920 // Process the loop looking for stores. If there are multiple
2921 // stores or extra control flow give at this point.
2922 CountedLoopNode* head = lpt->_head->as_CountedLoop();
2923 for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) {
2924 Node* n = lpt->_body.at(i);
2925 if (n->outcnt() == 0) continue; // Ignore dead
2926 if (n->is_Store()) {
2927 if (store != NULL) {
2928 msg = "multiple stores";
2929 break;
2930 }
2931 Opcodes opc = n->Opcode();
2932 if (opc == Opcodes::Op_StoreP || opc == Opcodes::Op_StoreN || opc == Opcodes::Op_StoreNKlass || opc == Opcodes::Op_StoreCM) {
2933 msg = "oop fills not handled";
2934 break;
2935 }
2936 Node* value = n->in(MemNode::ValueIn);
2937 if (!lpt->is_invariant(value)) {
2938 msg = "variant store value";
2939 } else if (!_igvn.type(n->in(MemNode::Address))->isa_aryptr()) {
2940 msg = "not array address";
2941 }
2942 store = n;
2943 store_value = value;
2944 } else if (n->is_If() && n != head->loopexit()) {
2945 msg = "extra control flow";
2946 msg_node = n;
2947 }
2948 }
2949
2950 if (store == NULL) {
2951 // No store in loop
2952 return false;
2986 #ifndef PRODUCT
2987 if (TraceOptimizeFill) {
2988 tty->print_cr("not fill intrinsic candidate: %s", msg);
2989 if (msg_node != NULL) msg_node->dump();
2990 }
2991 #endif
2992 return false;
2993 }
2994
2995 // Make sure the address expression can be handled. It should be
2996 // head->phi * elsize + con. head->phi might have a ConvI2L(CastII()).
2997 Node* elements[4];
2998 Node* cast = NULL;
2999 Node* conv = NULL;
3000 bool found_index = false;
3001 int count = store->in(MemNode::Address)->as_AddP()->unpack_offsets(elements, ARRAY_SIZE(elements));
3002 for (int e = 0; e < count; e++) {
3003 Node* n = elements[e];
3004 if (n->is_Con() && con == NULL) {
3005 con = n;
3006 } else if (n->Opcode() == Opcodes::Op_LShiftX && shift == NULL) {
3007 Node* value = n->in(1);
3008 #ifdef _LP64
3009 if (value->Opcode() == Opcodes::Op_ConvI2L) {
3010 conv = value;
3011 value = value->in(1);
3012 }
3013 if (value->Opcode() == Opcodes::Op_CastII &&
3014 value->as_CastII()->has_range_check()) {
3015 // Skip range check dependent CastII nodes
3016 cast = value;
3017 value = value->in(1);
3018 }
3019 #endif
3020 if (value != head->phi()) {
3021 msg = "unhandled shift in address";
3022 } else {
3023 if (type2aelembytes(store->as_Mem()->memory_type(), true) != (1 << n->in(2)->get_int())) {
3024 msg = "scale doesn't match";
3025 } else {
3026 found_index = true;
3027 shift = n;
3028 }
3029 }
3030 } else if (n->Opcode() == Opcodes::Op_ConvI2L && conv == NULL) {
3031 conv = n;
3032 n = n->in(1);
3033 if (n->Opcode() == Opcodes::Op_CastII &&
3034 n->as_CastII()->has_range_check()) {
3035 // Skip range check dependent CastII nodes
3036 cast = n;
3037 n = n->in(1);
3038 }
3039 if (n == head->phi()) {
3040 found_index = true;
3041 } else {
3042 msg = "unhandled input to ConvI2L";
3043 }
3044 } else if (n == head->phi()) {
3045 // no shift, check below for allowed cases
3046 found_index = true;
3047 } else {
3048 msg = "unhandled node in address";
3049 msg_node = n;
3050 }
3051 }
3052
3053 if (count == -1) {
|