src/share/vm/opto/loopTransform.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File 8072422 Sdiff src/share/vm/opto

src/share/vm/opto/loopTransform.cpp

Print this page




1451   Node *stride = loop_head->stride();
1452 
1453   Node *opaq = NULL;
1454   if (adjust_min_trip) {       // If not maximally unrolling, need adjustment
1455     // Search for zero-trip guard.
1456 
1457     // Check the shape of the graph at the loop entry. If an inappropriate
1458     // graph shape is encountered, the compiler bails out loop unrolling;
1459     // compilation of the method will still succeed.
1460     if (!is_canonical_main_loop_entry(loop_head)) {
1461       return;
1462     }
1463     opaq = ctrl->in(0)->in(1)->in(1)->in(2);
1464     // Zero-trip test uses an 'opaque' node which is not shared.
1465     assert(opaq->outcnt() == 1 && opaq->in(1) == limit, "");
1466   }
1467 
1468   C->set_major_progress();
1469 
1470   Node* new_limit = NULL;
1471   if (UnrollLimitCheck) {
1472     int stride_con = stride->get_int();
1473     int stride_p = (stride_con > 0) ? stride_con : -stride_con;
1474     uint old_trip_count = loop_head->trip_count();
1475     // Verify that unroll policy result is still valid.
1476     assert(old_trip_count > 1 &&
1477            (!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity");
1478 
1479     // Adjust loop limit to keep valid iterations number after unroll.
1480     // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride
1481     // which may overflow.
1482     if (!adjust_min_trip) {
1483       assert(old_trip_count > 1 && (old_trip_count & 1) == 0,
1484              "odd trip count for maximally unroll");
1485       // Don't need to adjust limit for maximally unroll since trip count is even.
1486     } else if (loop_head->has_exact_trip_count() && init->is_Con()) {
1487       // Loop's limit is constant. Loop's init could be constant when pre-loop
1488       // become peeled iteration.
1489       jlong init_con = init->get_int();
1490       // We can keep old loop limit if iterations count stays the same:
1491       //   old_trip_count == new_trip_count * 2


1603       // Step 3: Find the min-trip test guaranteed before a 'main' loop.
1604       // Make it a 1-trip test (means at least 2 trips).
1605 
1606       // Guard test uses an 'opaque' node which is not shared.  Hence I
1607       // can edit it's inputs directly.  Hammer in the new limit for the
1608       // minimum-trip guard.
1609       assert(opaq->outcnt() == 1, "");
1610       _igvn.replace_input_of(opaq, 1, new_limit);
1611     }
1612 
1613     // Adjust max trip count. The trip count is intentionally rounded
1614     // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
1615     // the main, unrolled, part of the loop will never execute as it is protected
1616     // by the min-trip test.  See bug 4834191 for a case where we over-unrolled
1617     // and later determined that part of the unrolled loop was dead.
1618     loop_head->set_trip_count(old_trip_count / 2);
1619 
1620     // Double the count of original iterations in the unrolled loop body.
1621     loop_head->double_unrolled_count();
1622 
1623   } else { // LoopLimitCheck
1624 
1625     // Adjust max trip count. The trip count is intentionally rounded
1626     // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
1627     // the main, unrolled, part of the loop will never execute as it is protected
1628     // by the min-trip test.  See bug 4834191 for a case where we over-unrolled
1629     // and later determined that part of the unrolled loop was dead.
1630     loop_head->set_trip_count(loop_head->trip_count() / 2);
1631 
1632     // Double the count of original iterations in the unrolled loop body.
1633     loop_head->double_unrolled_count();
1634 
1635     // -----------
1636     // Step 2: Cut back the trip counter for an unroll amount of 2.
1637     // Loop will normally trip (limit - init)/stride_con.  Since it's a
1638     // CountedLoop this is exact (stride divides limit-init exactly).
1639     // We are going to double the loop body, so we want to knock off any
1640     // odd iteration: (trip_cnt & ~1).  Then back compute a new limit.
1641     Node *span = new SubINode( limit, init );
1642     register_new_node( span, ctrl );
1643     Node *trip = new DivINode( 0, span, stride );
1644     register_new_node( trip, ctrl );
1645     Node *mtwo = _igvn.intcon(-2);
1646     set_ctrl(mtwo, C->root());
1647     Node *rond = new AndINode( trip, mtwo );
1648     register_new_node( rond, ctrl );
1649     Node *spn2 = new MulINode( rond, stride );
1650     register_new_node( spn2, ctrl );
1651     new_limit = new AddINode( spn2, init );
1652     register_new_node( new_limit, ctrl );
1653 
1654     // Hammer in the new limit
1655     Node *ctrl2 = loop_end->in(0);
1656     Node *cmp2 = new CmpINode( loop_head->incr(), new_limit );
1657     register_new_node( cmp2, ctrl2 );
1658     Node *bol2 = new BoolNode( cmp2, loop_end->test_trip() );
1659     register_new_node( bol2, ctrl2 );
1660     _igvn.replace_input_of(loop_end, CountedLoopEndNode::TestValue, bol2);
1661 
1662     // Step 3: Find the min-trip test guaranteed before a 'main' loop.
1663     // Make it a 1-trip test (means at least 2 trips).
1664     if( adjust_min_trip ) {
1665       assert( new_limit != NULL, "" );
1666       // Guard test uses an 'opaque' node which is not shared.  Hence I
1667       // can edit it's inputs directly.  Hammer in the new limit for the
1668       // minimum-trip guard.
1669       assert( opaq->outcnt() == 1, "" );
1670       _igvn.hash_delete(opaq);
1671       opaq->set_req(1, new_limit);
1672     }
1673   } // LoopLimitCheck
1674 
1675   // ---------
1676   // Step 4: Clone the loop body.  Move it inside the loop.  This loop body
1677   // represents the odd iterations; since the loop trips an even number of
1678   // times its backedge is never taken.  Kill the backedge.
1679   uint dd = dom_depth(loop_head);
1680   clone_loop( loop, old_new, dd );
1681 
1682   // Make backedges of the clone equal to backedges of the original.
1683   // Make the fall-in from the original come from the fall-out of the clone.
1684   for (DUIterator_Fast jmax, j = loop_head->fast_outs(jmax); j < jmax; j++) {
1685     Node* phi = loop_head->fast_out(j);
1686     if( phi->is_Phi() && phi->in(0) == loop_head && phi->outcnt() > 0 ) {
1687       Node *newphi = old_new[phi->_idx];
1688       _igvn.hash_delete( phi );
1689       _igvn.hash_delete( newphi );
1690 
1691       phi   ->set_req(LoopNode::   EntryControl, newphi->in(LoopNode::LoopBackControl));
1692       newphi->set_req(LoopNode::LoopBackControl, phi   ->in(LoopNode::LoopBackControl));
1693       phi   ->set_req(LoopNode::LoopBackControl, C->top());
1694     }


1887     //     else /* scale < 0 and stride < 0 */
1888     //       I > (upper_limit-offset)/scale
1889     //   )
1890     //
1891     // (upper_limit-offset) may overflow or underflow.
1892     // But it is fine since main loop will either have
1893     // less iterations or will be skipped in such case.
1894     *main_limit = adjust_limit(stride_con, scale, offset, upper_limit, *main_limit, pre_ctrl);
1895 
1896     // The underflow limit: low_limit <= scale*I+offset.
1897     // For pre-loop compute
1898     //   NOT(scale*I+offset >= low_limit)
1899     //   scale*I+offset < low_limit
1900     //   ( if (scale > 0) /* and stride > 0 */
1901     //       I < (low_limit-offset)/scale
1902     //     else /* scale < 0 and stride < 0 */
1903     //       I > (low_limit-offset)/scale
1904     //   )
1905 
1906     if (low_limit->get_int() == -max_jint) {
1907       if (!RangeLimitCheck) return;
1908       // We need this guard when scale*pre_limit+offset >= limit
1909       // due to underflow. So we need execute pre-loop until
1910       // scale*I+offset >= min_int. But (min_int-offset) will
1911       // underflow when offset > 0 and X will be > original_limit
1912       // when stride > 0. To avoid it we replace positive offset with 0.
1913       //
1914       // Also (min_int+1 == -max_int) is used instead of min_int here
1915       // to avoid problem with scale == -1 (min_int/(-1) == min_int).
1916       Node* shift = _igvn.intcon(31);
1917       set_ctrl(shift, C->root());
1918       Node* sign = new RShiftINode(offset, shift);
1919       register_new_node(sign, pre_ctrl);
1920       offset = new AndINode(offset, sign);
1921       register_new_node(offset, pre_ctrl);
1922     } else {
1923       assert(low_limit->get_int() == 0, "wrong low limit for range check");
1924       // The only problem we have here when offset == min_int
1925       // since (0-min_int) == min_int. It may be fine for stride > 0
1926       // but for stride < 0 X will be < original_limit. To avoid it
1927       // max(pre_limit, original_limit) is used in do_range_check().


1939     //   scale*I+offset >= upper_limit
1940     //   scale*I+offset+1 > upper_limit
1941     //   ( if (scale < 0) /* and stride > 0 */
1942     //       I < (upper_limit-(offset+1))/scale
1943     //     else /* scale > 0 and stride < 0 */
1944     //       I > (upper_limit-(offset+1))/scale
1945     //   )
1946     //
1947     // (upper_limit-offset-1) may underflow or overflow.
1948     // To avoid it min(pre_limit, original_limit) is used
1949     // in do_range_check() for stride > 0 and max() for < 0.
1950     Node *one  = _igvn.intcon(1);
1951     set_ctrl(one, C->root());
1952 
1953     Node *plus_one = new AddINode(offset, one);
1954     register_new_node( plus_one, pre_ctrl );
1955     // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
1956     *pre_limit = adjust_limit((-stride_con), scale, plus_one, upper_limit, *pre_limit, pre_ctrl);
1957 
1958     if (low_limit->get_int() == -max_jint) {
1959       if (!RangeLimitCheck) return;
1960       // We need this guard when scale*main_limit+offset >= limit
1961       // due to underflow. So we need execute main-loop while
1962       // scale*I+offset+1 > min_int. But (min_int-offset-1) will
1963       // underflow when (offset+1) > 0 and X will be < main_limit
1964       // when scale < 0 (and stride > 0). To avoid it we replace
1965       // positive (offset+1) with 0.
1966       //
1967       // Also (min_int+1 == -max_int) is used instead of min_int here
1968       // to avoid problem with scale == -1 (min_int/(-1) == min_int).
1969       Node* shift = _igvn.intcon(31);
1970       set_ctrl(shift, C->root());
1971       Node* sign = new RShiftINode(plus_one, shift);
1972       register_new_node(sign, pre_ctrl);
1973       plus_one = new AndINode(plus_one, sign);
1974       register_new_node(plus_one, pre_ctrl);
1975     } else {
1976       assert(low_limit->get_int() == 0, "wrong low limit for range check");
1977       // The only problem we have here when offset == max_int
1978       // since (max_int+1) == min_int and (0-min_int) == min_int.
1979       // But it is fine since main loop will either have


2241 #ifdef ASSERT
2242       if (TraceRangeLimitCheck) {
2243         tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2244         bol->dump(2);
2245       }
2246 #endif
2247       // At this point we have the expression as:
2248       //   scale_con * trip_counter + offset :: limit
2249       // where scale_con, offset and limit are loop invariant.  Trip_counter
2250       // monotonically increases by stride_con, a constant.  Both (or either)
2251       // stride_con and scale_con can be negative which will flip about the
2252       // sense of the test.
2253 
2254       // Adjust pre and main loop limits to guard the correct iteration set
2255       if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
2256         if( b_test._test == BoolTest::lt ) { // Range checks always use lt
2257           // The underflow and overflow limits: 0 <= scale*I+offset < limit
2258           add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
2259           if (!conditional_rc) {
2260             // (0-offset)/scale could be outside of loop iterations range.
2261             conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
2262           }
2263         } else {
2264           if (PrintOpto) {
2265             tty->print_cr("missed RCE opportunity");
2266           }
2267           continue;             // In release mode, ignore it
2268         }
2269       } else {                  // Otherwise work on normal compares
2270         switch( b_test._test ) {
2271         case BoolTest::gt:
2272           // Fall into GE case
2273         case BoolTest::ge:
2274           // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2275           scale_con = -scale_con;
2276           offset = new SubINode( zero, offset );
2277           register_new_node( offset, pre_ctrl );
2278           limit  = new SubINode( zero, limit  );
2279           register_new_node( limit, pre_ctrl );
2280           // Fall into LE case
2281         case BoolTest::le:
2282           if (b_test._test != BoolTest::gt) {
2283             // Convert X <= Y to X < Y+1
2284             limit = new AddINode( limit, one );
2285             register_new_node( limit, pre_ctrl );
2286           }
2287           // Fall into LT case
2288         case BoolTest::lt:
2289           // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
2290           // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
2291           // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
2292           add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit );
2293           if (!conditional_rc) {
2294             // ((MIN_INT+1)-offset)/scale could be outside of loop iterations range.
2295             // Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could
2296             // still be outside of loop range.
2297             conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
2298           }
2299           break;
2300         default:
2301           if (PrintOpto) {
2302             tty->print_cr("missed RCE opportunity");
2303           }
2304           continue;             // Unhandled case
2305         }
2306       }
2307 
2308       // Kill the eliminated test
2309       C->set_major_progress();
2310       Node *kill_con = _igvn.intcon( 1-flip );
2311       set_ctrl(kill_con, C->root());
2312       _igvn.replace_input_of(iff, 1, kill_con);
2313       // Find surviving projection
2314       assert(iff->is_If(), "");
2315       ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
2316       // Find loads off the surviving projection; remove their control edge
2317       for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {


2323           --i;
2324           --imax;
2325         }
2326       }
2327 
2328     } // End of is IF
2329 
2330   }
2331 
2332   // Update loop limits
2333   if (conditional_rc) {
2334     pre_limit = (stride_con > 0) ? (Node*)new MinINode(pre_limit, orig_limit)
2335                                  : (Node*)new MaxINode(pre_limit, orig_limit);
2336     register_new_node(pre_limit, pre_ctrl);
2337   }
2338   _igvn.replace_input_of(pre_opaq, 1, pre_limit);
2339 
2340   // Note:: we are making the main loop limit no longer precise;
2341   // need to round up based on stride.
2342   cl->set_nonexact_trip_count();
2343   if (!LoopLimitCheck && stride_con != 1 && stride_con != -1) { // Cutout for common case
2344     // "Standard" round-up logic:  ([main_limit-init+(y-1)]/y)*y+init
2345     // Hopefully, compiler will optimize for powers of 2.
2346     Node *ctrl = get_ctrl(main_limit);
2347     Node *stride = cl->stride();
2348     Node *init = cl->init_trip()->uncast();
2349     Node *span = new SubINode(main_limit,init);
2350     register_new_node(span,ctrl);
2351     Node *rndup = _igvn.intcon(stride_con + ((stride_con>0)?-1:1));
2352     Node *add = new AddINode(span,rndup);
2353     register_new_node(add,ctrl);
2354     Node *div = new DivINode(0,add,stride);
2355     register_new_node(div,ctrl);
2356     Node *mul = new MulINode(div,stride);
2357     register_new_node(mul,ctrl);
2358     Node *newlim = new AddINode(mul,init);
2359     register_new_node(newlim,ctrl);
2360     main_limit = newlim;
2361   }
2362 
2363   Node *main_cle = cl->loopexit();
2364   Node *main_bol = main_cle->in(1);
2365   // Hacking loop bounds; need private copies of exit test
2366   if( main_bol->outcnt() > 1 ) {// BoolNode shared?
2367     main_bol = main_bol->clone();// Clone a private BoolNode
2368     register_new_node( main_bol, main_cle->in(0) );
2369     _igvn.replace_input_of(main_cle, 1, main_bol);
2370   }
2371   Node *main_cmp = main_bol->in(1);
2372   if( main_cmp->outcnt() > 1 ) { // CmpNode shared?
2373     main_cmp = main_cmp->clone();// Clone a private CmpNode
2374     register_new_node( main_cmp, main_cle->in(0) );
2375     _igvn.replace_input_of(main_bol, 1, main_cmp);
2376   }
2377   // Hack the now-private loop bounds
2378   _igvn.replace_input_of(main_cmp, 2, main_limit);
2379   // The OpaqueNode is unshared by design
2380   assert( opqzm->outcnt() == 1, "cannot hack shared node" );
2381   _igvn.replace_input_of(opqzm, 1, main_limit);
2382 }




1451   Node *stride = loop_head->stride();
1452 
1453   Node *opaq = NULL;
1454   if (adjust_min_trip) {       // If not maximally unrolling, need adjustment
1455     // Search for zero-trip guard.
1456 
1457     // Check the shape of the graph at the loop entry. If an inappropriate
1458     // graph shape is encountered, the compiler bails out loop unrolling;
1459     // compilation of the method will still succeed.
1460     if (!is_canonical_main_loop_entry(loop_head)) {
1461       return;
1462     }
1463     opaq = ctrl->in(0)->in(1)->in(1)->in(2);
1464     // Zero-trip test uses an 'opaque' node which is not shared.
1465     assert(opaq->outcnt() == 1 && opaq->in(1) == limit, "");
1466   }
1467 
1468   C->set_major_progress();
1469 
1470   Node* new_limit = NULL;

1471   int stride_con = stride->get_int();
1472   int stride_p = (stride_con > 0) ? stride_con : -stride_con;
1473   uint old_trip_count = loop_head->trip_count();
1474   // Verify that unroll policy result is still valid.
1475   assert(old_trip_count > 1 &&
1476       (!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity");
1477 
1478   // Adjust loop limit to keep valid iterations number after unroll.
1479   // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride
1480   // which may overflow.
1481   if (!adjust_min_trip) {
1482     assert(old_trip_count > 1 && (old_trip_count & 1) == 0,
1483         "odd trip count for maximally unroll");
1484     // Don't need to adjust limit for maximally unroll since trip count is even.
1485   } else if (loop_head->has_exact_trip_count() && init->is_Con()) {
1486     // Loop's limit is constant. Loop's init could be constant when pre-loop
1487     // become peeled iteration.
1488     jlong init_con = init->get_int();
1489     // We can keep old loop limit if iterations count stays the same:
1490     //   old_trip_count == new_trip_count * 2


1602     // Step 3: Find the min-trip test guaranteed before a 'main' loop.
1603     // Make it a 1-trip test (means at least 2 trips).
1604 
1605     // Guard test uses an 'opaque' node which is not shared.  Hence I
1606     // can edit it's inputs directly.  Hammer in the new limit for the
1607     // minimum-trip guard.
1608     assert(opaq->outcnt() == 1, "");
1609     _igvn.replace_input_of(opaq, 1, new_limit);
1610   }
1611 
1612   // Adjust max trip count. The trip count is intentionally rounded
1613   // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
1614   // the main, unrolled, part of the loop will never execute as it is protected
1615   // by the min-trip test.  See bug 4834191 for a case where we over-unrolled
1616   // and later determined that part of the unrolled loop was dead.
1617   loop_head->set_trip_count(old_trip_count / 2);
1618 
1619   // Double the count of original iterations in the unrolled loop body.
1620   loop_head->double_unrolled_count();
1621 




















































1622   // ---------
1623   // Step 4: Clone the loop body.  Move it inside the loop.  This loop body
1624   // represents the odd iterations; since the loop trips an even number of
1625   // times its backedge is never taken.  Kill the backedge.
1626   uint dd = dom_depth(loop_head);
1627   clone_loop( loop, old_new, dd );
1628 
1629   // Make backedges of the clone equal to backedges of the original.
1630   // Make the fall-in from the original come from the fall-out of the clone.
1631   for (DUIterator_Fast jmax, j = loop_head->fast_outs(jmax); j < jmax; j++) {
1632     Node* phi = loop_head->fast_out(j);
1633     if( phi->is_Phi() && phi->in(0) == loop_head && phi->outcnt() > 0 ) {
1634       Node *newphi = old_new[phi->_idx];
1635       _igvn.hash_delete( phi );
1636       _igvn.hash_delete( newphi );
1637 
1638       phi   ->set_req(LoopNode::   EntryControl, newphi->in(LoopNode::LoopBackControl));
1639       newphi->set_req(LoopNode::LoopBackControl, phi   ->in(LoopNode::LoopBackControl));
1640       phi   ->set_req(LoopNode::LoopBackControl, C->top());
1641     }


1834     //     else /* scale < 0 and stride < 0 */
1835     //       I > (upper_limit-offset)/scale
1836     //   )
1837     //
1838     // (upper_limit-offset) may overflow or underflow.
1839     // But it is fine since main loop will either have
1840     // less iterations or will be skipped in such case.
1841     *main_limit = adjust_limit(stride_con, scale, offset, upper_limit, *main_limit, pre_ctrl);
1842 
1843     // The underflow limit: low_limit <= scale*I+offset.
1844     // For pre-loop compute
1845     //   NOT(scale*I+offset >= low_limit)
1846     //   scale*I+offset < low_limit
1847     //   ( if (scale > 0) /* and stride > 0 */
1848     //       I < (low_limit-offset)/scale
1849     //     else /* scale < 0 and stride < 0 */
1850     //       I > (low_limit-offset)/scale
1851     //   )
1852 
1853     if (low_limit->get_int() == -max_jint) {

1854       // We need this guard when scale*pre_limit+offset >= limit
1855       // due to underflow. So we need execute pre-loop until
1856       // scale*I+offset >= min_int. But (min_int-offset) will
1857       // underflow when offset > 0 and X will be > original_limit
1858       // when stride > 0. To avoid it we replace positive offset with 0.
1859       //
1860       // Also (min_int+1 == -max_int) is used instead of min_int here
1861       // to avoid problem with scale == -1 (min_int/(-1) == min_int).
1862       Node* shift = _igvn.intcon(31);
1863       set_ctrl(shift, C->root());
1864       Node* sign = new RShiftINode(offset, shift);
1865       register_new_node(sign, pre_ctrl);
1866       offset = new AndINode(offset, sign);
1867       register_new_node(offset, pre_ctrl);
1868     } else {
1869       assert(low_limit->get_int() == 0, "wrong low limit for range check");
1870       // The only problem we have here when offset == min_int
1871       // since (0-min_int) == min_int. It may be fine for stride > 0
1872       // but for stride < 0 X will be < original_limit. To avoid it
1873       // max(pre_limit, original_limit) is used in do_range_check().


1885     //   scale*I+offset >= upper_limit
1886     //   scale*I+offset+1 > upper_limit
1887     //   ( if (scale < 0) /* and stride > 0 */
1888     //       I < (upper_limit-(offset+1))/scale
1889     //     else /* scale > 0 and stride < 0 */
1890     //       I > (upper_limit-(offset+1))/scale
1891     //   )
1892     //
1893     // (upper_limit-offset-1) may underflow or overflow.
1894     // To avoid it min(pre_limit, original_limit) is used
1895     // in do_range_check() for stride > 0 and max() for < 0.
1896     Node *one  = _igvn.intcon(1);
1897     set_ctrl(one, C->root());
1898 
1899     Node *plus_one = new AddINode(offset, one);
1900     register_new_node( plus_one, pre_ctrl );
1901     // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
1902     *pre_limit = adjust_limit((-stride_con), scale, plus_one, upper_limit, *pre_limit, pre_ctrl);
1903 
1904     if (low_limit->get_int() == -max_jint) {

1905       // We need this guard when scale*main_limit+offset >= limit
1906       // due to underflow. So we need execute main-loop while
1907       // scale*I+offset+1 > min_int. But (min_int-offset-1) will
1908       // underflow when (offset+1) > 0 and X will be < main_limit
1909       // when scale < 0 (and stride > 0). To avoid it we replace
1910       // positive (offset+1) with 0.
1911       //
1912       // Also (min_int+1 == -max_int) is used instead of min_int here
1913       // to avoid problem with scale == -1 (min_int/(-1) == min_int).
1914       Node* shift = _igvn.intcon(31);
1915       set_ctrl(shift, C->root());
1916       Node* sign = new RShiftINode(plus_one, shift);
1917       register_new_node(sign, pre_ctrl);
1918       plus_one = new AndINode(plus_one, sign);
1919       register_new_node(plus_one, pre_ctrl);
1920     } else {
1921       assert(low_limit->get_int() == 0, "wrong low limit for range check");
1922       // The only problem we have here when offset == max_int
1923       // since (max_int+1) == min_int and (0-min_int) == min_int.
1924       // But it is fine since main loop will either have


2186 #ifdef ASSERT
2187       if (TraceRangeLimitCheck) {
2188         tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2189         bol->dump(2);
2190       }
2191 #endif
2192       // At this point we have the expression as:
2193       //   scale_con * trip_counter + offset :: limit
2194       // where scale_con, offset and limit are loop invariant.  Trip_counter
2195       // monotonically increases by stride_con, a constant.  Both (or either)
2196       // stride_con and scale_con can be negative which will flip about the
2197       // sense of the test.
2198 
2199       // Adjust pre and main loop limits to guard the correct iteration set
2200       if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
2201         if( b_test._test == BoolTest::lt ) { // Range checks always use lt
2202           // The underflow and overflow limits: 0 <= scale*I+offset < limit
2203           add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
2204           if (!conditional_rc) {
2205             // (0-offset)/scale could be outside of loop iterations range.
2206             conditional_rc = !loop->dominates_backedge(iff);
2207           }
2208         } else {
2209           if (PrintOpto) {
2210             tty->print_cr("missed RCE opportunity");
2211           }
2212           continue;             // In release mode, ignore it
2213         }
2214       } else {                  // Otherwise work on normal compares
2215         switch( b_test._test ) {
2216         case BoolTest::gt:
2217           // Fall into GE case
2218         case BoolTest::ge:
2219           // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2220           scale_con = -scale_con;
2221           offset = new SubINode( zero, offset );
2222           register_new_node( offset, pre_ctrl );
2223           limit  = new SubINode( zero, limit  );
2224           register_new_node( limit, pre_ctrl );
2225           // Fall into LE case
2226         case BoolTest::le:
2227           if (b_test._test != BoolTest::gt) {
2228             // Convert X <= Y to X < Y+1
2229             limit = new AddINode( limit, one );
2230             register_new_node( limit, pre_ctrl );
2231           }
2232           // Fall into LT case
2233         case BoolTest::lt:
2234           // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
2235           // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
2236           // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
2237           add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit );
2238           if (!conditional_rc) {
2239             // ((MIN_INT+1)-offset)/scale could be outside of loop iterations range.
2240             // Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could
2241             // still be outside of loop range.
2242             conditional_rc = !loop->dominates_backedge(iff);
2243           }
2244           break;
2245         default:
2246           if (PrintOpto) {
2247             tty->print_cr("missed RCE opportunity");
2248           }
2249           continue;             // Unhandled case
2250         }
2251       }
2252 
2253       // Kill the eliminated test
2254       C->set_major_progress();
2255       Node *kill_con = _igvn.intcon( 1-flip );
2256       set_ctrl(kill_con, C->root());
2257       _igvn.replace_input_of(iff, 1, kill_con);
2258       // Find surviving projection
2259       assert(iff->is_If(), "");
2260       ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
2261       // Find loads off the surviving projection; remove their control edge
2262       for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {


2268           --i;
2269           --imax;
2270         }
2271       }
2272 
2273     } // End of is IF
2274 
2275   }
2276 
2277   // Update loop limits
2278   if (conditional_rc) {
2279     pre_limit = (stride_con > 0) ? (Node*)new MinINode(pre_limit, orig_limit)
2280                                  : (Node*)new MaxINode(pre_limit, orig_limit);
2281     register_new_node(pre_limit, pre_ctrl);
2282   }
2283   _igvn.replace_input_of(pre_opaq, 1, pre_limit);
2284 
2285   // Note:: we are making the main loop limit no longer precise;
2286   // need to round up based on stride.
2287   cl->set_nonexact_trip_count();




















2288   Node *main_cle = cl->loopexit();
2289   Node *main_bol = main_cle->in(1);
2290   // Hacking loop bounds; need private copies of exit test
2291   if( main_bol->outcnt() > 1 ) {// BoolNode shared?
2292     main_bol = main_bol->clone();// Clone a private BoolNode
2293     register_new_node( main_bol, main_cle->in(0) );
2294     _igvn.replace_input_of(main_cle, 1, main_bol);
2295   }
2296   Node *main_cmp = main_bol->in(1);
2297   if( main_cmp->outcnt() > 1 ) { // CmpNode shared?
2298     main_cmp = main_cmp->clone();// Clone a private CmpNode
2299     register_new_node( main_cmp, main_cle->in(0) );
2300     _igvn.replace_input_of(main_bol, 1, main_cmp);
2301   }
2302   // Hack the now-private loop bounds
2303   _igvn.replace_input_of(main_cmp, 2, main_limit);
2304   // The OpaqueNode is unshared by design
2305   assert( opqzm->outcnt() == 1, "cannot hack shared node" );
2306   _igvn.replace_input_of(opqzm, 1, main_limit);
2307 }


src/share/vm/opto/loopTransform.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File