1451 Node *stride = loop_head->stride();
1452
1453 Node *opaq = NULL;
1454 if (adjust_min_trip) { // If not maximally unrolling, need adjustment
1455 // Search for zero-trip guard.
1456
1457 // Check the shape of the graph at the loop entry. If an inappropriate
1458 // graph shape is encountered, the compiler bails out loop unrolling;
1459 // compilation of the method will still succeed.
1460 if (!is_canonical_main_loop_entry(loop_head)) {
1461 return;
1462 }
1463 opaq = ctrl->in(0)->in(1)->in(1)->in(2);
1464 // Zero-trip test uses an 'opaque' node which is not shared.
1465 assert(opaq->outcnt() == 1 && opaq->in(1) == limit, "");
1466 }
1467
1468 C->set_major_progress();
1469
1470 Node* new_limit = NULL;
1471 if (UnrollLimitCheck) {
1472 int stride_con = stride->get_int();
1473 int stride_p = (stride_con > 0) ? stride_con : -stride_con;
1474 uint old_trip_count = loop_head->trip_count();
1475 // Verify that unroll policy result is still valid.
1476 assert(old_trip_count > 1 &&
1477 (!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity");
1478
1479 // Adjust loop limit to keep valid iterations number after unroll.
1480 // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride
1481 // which may overflow.
1482 if (!adjust_min_trip) {
1483 assert(old_trip_count > 1 && (old_trip_count & 1) == 0,
1484 "odd trip count for maximally unroll");
1485 // Don't need to adjust limit for maximally unroll since trip count is even.
1486 } else if (loop_head->has_exact_trip_count() && init->is_Con()) {
1487 // Loop's limit is constant. Loop's init could be constant when pre-loop
1488 // become peeled iteration.
1489 jlong init_con = init->get_int();
1490 // We can keep old loop limit if iterations count stays the same:
1491 // old_trip_count == new_trip_count * 2
1603 // Step 3: Find the min-trip test guaranteed before a 'main' loop.
1604 // Make it a 1-trip test (means at least 2 trips).
1605
1606 // Guard test uses an 'opaque' node which is not shared. Hence I
1607 // can edit it's inputs directly. Hammer in the new limit for the
1608 // minimum-trip guard.
1609 assert(opaq->outcnt() == 1, "");
1610 _igvn.replace_input_of(opaq, 1, new_limit);
1611 }
1612
1613 // Adjust max trip count. The trip count is intentionally rounded
1614 // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
1615 // the main, unrolled, part of the loop will never execute as it is protected
1616 // by the min-trip test. See bug 4834191 for a case where we over-unrolled
1617 // and later determined that part of the unrolled loop was dead.
1618 loop_head->set_trip_count(old_trip_count / 2);
1619
1620 // Double the count of original iterations in the unrolled loop body.
1621 loop_head->double_unrolled_count();
1622
1623 } else { // LoopLimitCheck
1624
1625 // Adjust max trip count. The trip count is intentionally rounded
1626 // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
1627 // the main, unrolled, part of the loop will never execute as it is protected
1628 // by the min-trip test. See bug 4834191 for a case where we over-unrolled
1629 // and later determined that part of the unrolled loop was dead.
1630 loop_head->set_trip_count(loop_head->trip_count() / 2);
1631
1632 // Double the count of original iterations in the unrolled loop body.
1633 loop_head->double_unrolled_count();
1634
1635 // -----------
1636 // Step 2: Cut back the trip counter for an unroll amount of 2.
1637 // Loop will normally trip (limit - init)/stride_con. Since it's a
1638 // CountedLoop this is exact (stride divides limit-init exactly).
1639 // We are going to double the loop body, so we want to knock off any
1640 // odd iteration: (trip_cnt & ~1). Then back compute a new limit.
1641 Node *span = new SubINode( limit, init );
1642 register_new_node( span, ctrl );
1643 Node *trip = new DivINode( 0, span, stride );
1644 register_new_node( trip, ctrl );
1645 Node *mtwo = _igvn.intcon(-2);
1646 set_ctrl(mtwo, C->root());
1647 Node *rond = new AndINode( trip, mtwo );
1648 register_new_node( rond, ctrl );
1649 Node *spn2 = new MulINode( rond, stride );
1650 register_new_node( spn2, ctrl );
1651 new_limit = new AddINode( spn2, init );
1652 register_new_node( new_limit, ctrl );
1653
1654 // Hammer in the new limit
1655 Node *ctrl2 = loop_end->in(0);
1656 Node *cmp2 = new CmpINode( loop_head->incr(), new_limit );
1657 register_new_node( cmp2, ctrl2 );
1658 Node *bol2 = new BoolNode( cmp2, loop_end->test_trip() );
1659 register_new_node( bol2, ctrl2 );
1660 _igvn.replace_input_of(loop_end, CountedLoopEndNode::TestValue, bol2);
1661
1662 // Step 3: Find the min-trip test guaranteed before a 'main' loop.
1663 // Make it a 1-trip test (means at least 2 trips).
1664 if( adjust_min_trip ) {
1665 assert( new_limit != NULL, "" );
1666 // Guard test uses an 'opaque' node which is not shared. Hence I
1667 // can edit it's inputs directly. Hammer in the new limit for the
1668 // minimum-trip guard.
1669 assert( opaq->outcnt() == 1, "" );
1670 _igvn.hash_delete(opaq);
1671 opaq->set_req(1, new_limit);
1672 }
1673 } // LoopLimitCheck
1674
1675 // ---------
1676 // Step 4: Clone the loop body. Move it inside the loop. This loop body
1677 // represents the odd iterations; since the loop trips an even number of
1678 // times its backedge is never taken. Kill the backedge.
1679 uint dd = dom_depth(loop_head);
1680 clone_loop( loop, old_new, dd );
1681
1682 // Make backedges of the clone equal to backedges of the original.
1683 // Make the fall-in from the original come from the fall-out of the clone.
1684 for (DUIterator_Fast jmax, j = loop_head->fast_outs(jmax); j < jmax; j++) {
1685 Node* phi = loop_head->fast_out(j);
1686 if( phi->is_Phi() && phi->in(0) == loop_head && phi->outcnt() > 0 ) {
1687 Node *newphi = old_new[phi->_idx];
1688 _igvn.hash_delete( phi );
1689 _igvn.hash_delete( newphi );
1690
1691 phi ->set_req(LoopNode:: EntryControl, newphi->in(LoopNode::LoopBackControl));
1692 newphi->set_req(LoopNode::LoopBackControl, phi ->in(LoopNode::LoopBackControl));
1693 phi ->set_req(LoopNode::LoopBackControl, C->top());
1694 }
1887 // else /* scale < 0 and stride < 0 */
1888 // I > (upper_limit-offset)/scale
1889 // )
1890 //
1891 // (upper_limit-offset) may overflow or underflow.
1892 // But it is fine since main loop will either have
1893 // less iterations or will be skipped in such case.
1894 *main_limit = adjust_limit(stride_con, scale, offset, upper_limit, *main_limit, pre_ctrl);
1895
1896 // The underflow limit: low_limit <= scale*I+offset.
1897 // For pre-loop compute
1898 // NOT(scale*I+offset >= low_limit)
1899 // scale*I+offset < low_limit
1900 // ( if (scale > 0) /* and stride > 0 */
1901 // I < (low_limit-offset)/scale
1902 // else /* scale < 0 and stride < 0 */
1903 // I > (low_limit-offset)/scale
1904 // )
1905
1906 if (low_limit->get_int() == -max_jint) {
1907 if (!RangeLimitCheck) return;
1908 // We need this guard when scale*pre_limit+offset >= limit
1909 // due to underflow. So we need execute pre-loop until
1910 // scale*I+offset >= min_int. But (min_int-offset) will
1911 // underflow when offset > 0 and X will be > original_limit
1912 // when stride > 0. To avoid it we replace positive offset with 0.
1913 //
1914 // Also (min_int+1 == -max_int) is used instead of min_int here
1915 // to avoid problem with scale == -1 (min_int/(-1) == min_int).
1916 Node* shift = _igvn.intcon(31);
1917 set_ctrl(shift, C->root());
1918 Node* sign = new RShiftINode(offset, shift);
1919 register_new_node(sign, pre_ctrl);
1920 offset = new AndINode(offset, sign);
1921 register_new_node(offset, pre_ctrl);
1922 } else {
1923 assert(low_limit->get_int() == 0, "wrong low limit for range check");
1924 // The only problem we have here when offset == min_int
1925 // since (0-min_int) == min_int. It may be fine for stride > 0
1926 // but for stride < 0 X will be < original_limit. To avoid it
1927 // max(pre_limit, original_limit) is used in do_range_check().
1939 // scale*I+offset >= upper_limit
1940 // scale*I+offset+1 > upper_limit
1941 // ( if (scale < 0) /* and stride > 0 */
1942 // I < (upper_limit-(offset+1))/scale
1943 // else /* scale > 0 and stride < 0 */
1944 // I > (upper_limit-(offset+1))/scale
1945 // )
1946 //
1947 // (upper_limit-offset-1) may underflow or overflow.
1948 // To avoid it min(pre_limit, original_limit) is used
1949 // in do_range_check() for stride > 0 and max() for < 0.
1950 Node *one = _igvn.intcon(1);
1951 set_ctrl(one, C->root());
1952
1953 Node *plus_one = new AddINode(offset, one);
1954 register_new_node( plus_one, pre_ctrl );
1955 // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
1956 *pre_limit = adjust_limit((-stride_con), scale, plus_one, upper_limit, *pre_limit, pre_ctrl);
1957
1958 if (low_limit->get_int() == -max_jint) {
1959 if (!RangeLimitCheck) return;
1960 // We need this guard when scale*main_limit+offset >= limit
1961 // due to underflow. So we need execute main-loop while
1962 // scale*I+offset+1 > min_int. But (min_int-offset-1) will
1963 // underflow when (offset+1) > 0 and X will be < main_limit
1964 // when scale < 0 (and stride > 0). To avoid it we replace
1965 // positive (offset+1) with 0.
1966 //
1967 // Also (min_int+1 == -max_int) is used instead of min_int here
1968 // to avoid problem with scale == -1 (min_int/(-1) == min_int).
1969 Node* shift = _igvn.intcon(31);
1970 set_ctrl(shift, C->root());
1971 Node* sign = new RShiftINode(plus_one, shift);
1972 register_new_node(sign, pre_ctrl);
1973 plus_one = new AndINode(plus_one, sign);
1974 register_new_node(plus_one, pre_ctrl);
1975 } else {
1976 assert(low_limit->get_int() == 0, "wrong low limit for range check");
1977 // The only problem we have here when offset == max_int
1978 // since (max_int+1) == min_int and (0-min_int) == min_int.
1979 // But it is fine since main loop will either have
2241 #ifdef ASSERT
2242 if (TraceRangeLimitCheck) {
2243 tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2244 bol->dump(2);
2245 }
2246 #endif
2247 // At this point we have the expression as:
2248 // scale_con * trip_counter + offset :: limit
2249 // where scale_con, offset and limit are loop invariant. Trip_counter
2250 // monotonically increases by stride_con, a constant. Both (or either)
2251 // stride_con and scale_con can be negative which will flip about the
2252 // sense of the test.
2253
2254 // Adjust pre and main loop limits to guard the correct iteration set
2255 if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
2256 if( b_test._test == BoolTest::lt ) { // Range checks always use lt
2257 // The underflow and overflow limits: 0 <= scale*I+offset < limit
2258 add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
2259 if (!conditional_rc) {
2260 // (0-offset)/scale could be outside of loop iterations range.
2261 conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
2262 }
2263 } else {
2264 if (PrintOpto) {
2265 tty->print_cr("missed RCE opportunity");
2266 }
2267 continue; // In release mode, ignore it
2268 }
2269 } else { // Otherwise work on normal compares
2270 switch( b_test._test ) {
2271 case BoolTest::gt:
2272 // Fall into GE case
2273 case BoolTest::ge:
2274 // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2275 scale_con = -scale_con;
2276 offset = new SubINode( zero, offset );
2277 register_new_node( offset, pre_ctrl );
2278 limit = new SubINode( zero, limit );
2279 register_new_node( limit, pre_ctrl );
2280 // Fall into LE case
2281 case BoolTest::le:
2282 if (b_test._test != BoolTest::gt) {
2283 // Convert X <= Y to X < Y+1
2284 limit = new AddINode( limit, one );
2285 register_new_node( limit, pre_ctrl );
2286 }
2287 // Fall into LT case
2288 case BoolTest::lt:
2289 // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
2290 // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
2291 // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
2292 add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit );
2293 if (!conditional_rc) {
2294 // ((MIN_INT+1)-offset)/scale could be outside of loop iterations range.
2295 // Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could
2296 // still be outside of loop range.
2297 conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
2298 }
2299 break;
2300 default:
2301 if (PrintOpto) {
2302 tty->print_cr("missed RCE opportunity");
2303 }
2304 continue; // Unhandled case
2305 }
2306 }
2307
2308 // Kill the eliminated test
2309 C->set_major_progress();
2310 Node *kill_con = _igvn.intcon( 1-flip );
2311 set_ctrl(kill_con, C->root());
2312 _igvn.replace_input_of(iff, 1, kill_con);
2313 // Find surviving projection
2314 assert(iff->is_If(), "");
2315 ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
2316 // Find loads off the surviving projection; remove their control edge
2317 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
2323 --i;
2324 --imax;
2325 }
2326 }
2327
2328 } // End of is IF
2329
2330 }
2331
2332 // Update loop limits
2333 if (conditional_rc) {
2334 pre_limit = (stride_con > 0) ? (Node*)new MinINode(pre_limit, orig_limit)
2335 : (Node*)new MaxINode(pre_limit, orig_limit);
2336 register_new_node(pre_limit, pre_ctrl);
2337 }
2338 _igvn.replace_input_of(pre_opaq, 1, pre_limit);
2339
2340 // Note:: we are making the main loop limit no longer precise;
2341 // need to round up based on stride.
2342 cl->set_nonexact_trip_count();
2343 if (!LoopLimitCheck && stride_con != 1 && stride_con != -1) { // Cutout for common case
2344 // "Standard" round-up logic: ([main_limit-init+(y-1)]/y)*y+init
2345 // Hopefully, compiler will optimize for powers of 2.
2346 Node *ctrl = get_ctrl(main_limit);
2347 Node *stride = cl->stride();
2348 Node *init = cl->init_trip()->uncast();
2349 Node *span = new SubINode(main_limit,init);
2350 register_new_node(span,ctrl);
2351 Node *rndup = _igvn.intcon(stride_con + ((stride_con>0)?-1:1));
2352 Node *add = new AddINode(span,rndup);
2353 register_new_node(add,ctrl);
2354 Node *div = new DivINode(0,add,stride);
2355 register_new_node(div,ctrl);
2356 Node *mul = new MulINode(div,stride);
2357 register_new_node(mul,ctrl);
2358 Node *newlim = new AddINode(mul,init);
2359 register_new_node(newlim,ctrl);
2360 main_limit = newlim;
2361 }
2362
2363 Node *main_cle = cl->loopexit();
2364 Node *main_bol = main_cle->in(1);
2365 // Hacking loop bounds; need private copies of exit test
2366 if( main_bol->outcnt() > 1 ) {// BoolNode shared?
2367 main_bol = main_bol->clone();// Clone a private BoolNode
2368 register_new_node( main_bol, main_cle->in(0) );
2369 _igvn.replace_input_of(main_cle, 1, main_bol);
2370 }
2371 Node *main_cmp = main_bol->in(1);
2372 if( main_cmp->outcnt() > 1 ) { // CmpNode shared?
2373 main_cmp = main_cmp->clone();// Clone a private CmpNode
2374 register_new_node( main_cmp, main_cle->in(0) );
2375 _igvn.replace_input_of(main_bol, 1, main_cmp);
2376 }
2377 // Hack the now-private loop bounds
2378 _igvn.replace_input_of(main_cmp, 2, main_limit);
2379 // The OpaqueNode is unshared by design
2380 assert( opqzm->outcnt() == 1, "cannot hack shared node" );
2381 _igvn.replace_input_of(opqzm, 1, main_limit);
2382 }
|
1451 Node *stride = loop_head->stride();
1452
1453 Node *opaq = NULL;
1454 if (adjust_min_trip) { // If not maximally unrolling, need adjustment
1455 // Search for zero-trip guard.
1456
1457 // Check the shape of the graph at the loop entry. If an inappropriate
1458 // graph shape is encountered, the compiler bails out loop unrolling;
1459 // compilation of the method will still succeed.
1460 if (!is_canonical_main_loop_entry(loop_head)) {
1461 return;
1462 }
1463 opaq = ctrl->in(0)->in(1)->in(1)->in(2);
1464 // Zero-trip test uses an 'opaque' node which is not shared.
1465 assert(opaq->outcnt() == 1 && opaq->in(1) == limit, "");
1466 }
1467
1468 C->set_major_progress();
1469
1470 Node* new_limit = NULL;
1471 int stride_con = stride->get_int();
1472 int stride_p = (stride_con > 0) ? stride_con : -stride_con;
1473 uint old_trip_count = loop_head->trip_count();
1474 // Verify that unroll policy result is still valid.
1475 assert(old_trip_count > 1 &&
1476 (!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity");
1477
1478 // Adjust loop limit to keep valid iterations number after unroll.
1479 // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride
1480 // which may overflow.
1481 if (!adjust_min_trip) {
1482 assert(old_trip_count > 1 && (old_trip_count & 1) == 0,
1483 "odd trip count for maximally unroll");
1484 // Don't need to adjust limit for maximally unroll since trip count is even.
1485 } else if (loop_head->has_exact_trip_count() && init->is_Con()) {
1486 // Loop's limit is constant. Loop's init could be constant when pre-loop
1487 // become peeled iteration.
1488 jlong init_con = init->get_int();
1489 // We can keep old loop limit if iterations count stays the same:
1490 // old_trip_count == new_trip_count * 2
1602 // Step 3: Find the min-trip test guaranteed before a 'main' loop.
1603 // Make it a 1-trip test (means at least 2 trips).
1604
1605 // Guard test uses an 'opaque' node which is not shared. Hence I
1606 // can edit it's inputs directly. Hammer in the new limit for the
1607 // minimum-trip guard.
1608 assert(opaq->outcnt() == 1, "");
1609 _igvn.replace_input_of(opaq, 1, new_limit);
1610 }
1611
1612 // Adjust max trip count. The trip count is intentionally rounded
1613 // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
1614 // the main, unrolled, part of the loop will never execute as it is protected
1615 // by the min-trip test. See bug 4834191 for a case where we over-unrolled
1616 // and later determined that part of the unrolled loop was dead.
1617 loop_head->set_trip_count(old_trip_count / 2);
1618
1619 // Double the count of original iterations in the unrolled loop body.
1620 loop_head->double_unrolled_count();
1621
1622 // ---------
1623 // Step 4: Clone the loop body. Move it inside the loop. This loop body
1624 // represents the odd iterations; since the loop trips an even number of
1625 // times its backedge is never taken. Kill the backedge.
1626 uint dd = dom_depth(loop_head);
1627 clone_loop( loop, old_new, dd );
1628
1629 // Make backedges of the clone equal to backedges of the original.
1630 // Make the fall-in from the original come from the fall-out of the clone.
1631 for (DUIterator_Fast jmax, j = loop_head->fast_outs(jmax); j < jmax; j++) {
1632 Node* phi = loop_head->fast_out(j);
1633 if( phi->is_Phi() && phi->in(0) == loop_head && phi->outcnt() > 0 ) {
1634 Node *newphi = old_new[phi->_idx];
1635 _igvn.hash_delete( phi );
1636 _igvn.hash_delete( newphi );
1637
1638 phi ->set_req(LoopNode:: EntryControl, newphi->in(LoopNode::LoopBackControl));
1639 newphi->set_req(LoopNode::LoopBackControl, phi ->in(LoopNode::LoopBackControl));
1640 phi ->set_req(LoopNode::LoopBackControl, C->top());
1641 }
1834 // else /* scale < 0 and stride < 0 */
1835 // I > (upper_limit-offset)/scale
1836 // )
1837 //
1838 // (upper_limit-offset) may overflow or underflow.
1839 // But it is fine since main loop will either have
1840 // less iterations or will be skipped in such case.
1841 *main_limit = adjust_limit(stride_con, scale, offset, upper_limit, *main_limit, pre_ctrl);
1842
1843 // The underflow limit: low_limit <= scale*I+offset.
1844 // For pre-loop compute
1845 // NOT(scale*I+offset >= low_limit)
1846 // scale*I+offset < low_limit
1847 // ( if (scale > 0) /* and stride > 0 */
1848 // I < (low_limit-offset)/scale
1849 // else /* scale < 0 and stride < 0 */
1850 // I > (low_limit-offset)/scale
1851 // )
1852
1853 if (low_limit->get_int() == -max_jint) {
1854 // We need this guard when scale*pre_limit+offset >= limit
1855 // due to underflow. So we need execute pre-loop until
1856 // scale*I+offset >= min_int. But (min_int-offset) will
1857 // underflow when offset > 0 and X will be > original_limit
1858 // when stride > 0. To avoid it we replace positive offset with 0.
1859 //
1860 // Also (min_int+1 == -max_int) is used instead of min_int here
1861 // to avoid problem with scale == -1 (min_int/(-1) == min_int).
1862 Node* shift = _igvn.intcon(31);
1863 set_ctrl(shift, C->root());
1864 Node* sign = new RShiftINode(offset, shift);
1865 register_new_node(sign, pre_ctrl);
1866 offset = new AndINode(offset, sign);
1867 register_new_node(offset, pre_ctrl);
1868 } else {
1869 assert(low_limit->get_int() == 0, "wrong low limit for range check");
1870 // The only problem we have here when offset == min_int
1871 // since (0-min_int) == min_int. It may be fine for stride > 0
1872 // but for stride < 0 X will be < original_limit. To avoid it
1873 // max(pre_limit, original_limit) is used in do_range_check().
1885 // scale*I+offset >= upper_limit
1886 // scale*I+offset+1 > upper_limit
1887 // ( if (scale < 0) /* and stride > 0 */
1888 // I < (upper_limit-(offset+1))/scale
1889 // else /* scale > 0 and stride < 0 */
1890 // I > (upper_limit-(offset+1))/scale
1891 // )
1892 //
1893 // (upper_limit-offset-1) may underflow or overflow.
1894 // To avoid it min(pre_limit, original_limit) is used
1895 // in do_range_check() for stride > 0 and max() for < 0.
1896 Node *one = _igvn.intcon(1);
1897 set_ctrl(one, C->root());
1898
1899 Node *plus_one = new AddINode(offset, one);
1900 register_new_node( plus_one, pre_ctrl );
1901 // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
1902 *pre_limit = adjust_limit((-stride_con), scale, plus_one, upper_limit, *pre_limit, pre_ctrl);
1903
1904 if (low_limit->get_int() == -max_jint) {
1905 // We need this guard when scale*main_limit+offset >= limit
1906 // due to underflow. So we need execute main-loop while
1907 // scale*I+offset+1 > min_int. But (min_int-offset-1) will
1908 // underflow when (offset+1) > 0 and X will be < main_limit
1909 // when scale < 0 (and stride > 0). To avoid it we replace
1910 // positive (offset+1) with 0.
1911 //
1912 // Also (min_int+1 == -max_int) is used instead of min_int here
1913 // to avoid problem with scale == -1 (min_int/(-1) == min_int).
1914 Node* shift = _igvn.intcon(31);
1915 set_ctrl(shift, C->root());
1916 Node* sign = new RShiftINode(plus_one, shift);
1917 register_new_node(sign, pre_ctrl);
1918 plus_one = new AndINode(plus_one, sign);
1919 register_new_node(plus_one, pre_ctrl);
1920 } else {
1921 assert(low_limit->get_int() == 0, "wrong low limit for range check");
1922 // The only problem we have here when offset == max_int
1923 // since (max_int+1) == min_int and (0-min_int) == min_int.
1924 // But it is fine since main loop will either have
2186 #ifdef ASSERT
2187 if (TraceRangeLimitCheck) {
2188 tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2189 bol->dump(2);
2190 }
2191 #endif
2192 // At this point we have the expression as:
2193 // scale_con * trip_counter + offset :: limit
2194 // where scale_con, offset and limit are loop invariant. Trip_counter
2195 // monotonically increases by stride_con, a constant. Both (or either)
2196 // stride_con and scale_con can be negative which will flip about the
2197 // sense of the test.
2198
2199 // Adjust pre and main loop limits to guard the correct iteration set
2200 if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
2201 if( b_test._test == BoolTest::lt ) { // Range checks always use lt
2202 // The underflow and overflow limits: 0 <= scale*I+offset < limit
2203 add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
2204 if (!conditional_rc) {
2205 // (0-offset)/scale could be outside of loop iterations range.
2206 conditional_rc = !loop->dominates_backedge(iff);
2207 }
2208 } else {
2209 if (PrintOpto) {
2210 tty->print_cr("missed RCE opportunity");
2211 }
2212 continue; // In release mode, ignore it
2213 }
2214 } else { // Otherwise work on normal compares
2215 switch( b_test._test ) {
2216 case BoolTest::gt:
2217 // Fall into GE case
2218 case BoolTest::ge:
2219 // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
2220 scale_con = -scale_con;
2221 offset = new SubINode( zero, offset );
2222 register_new_node( offset, pre_ctrl );
2223 limit = new SubINode( zero, limit );
2224 register_new_node( limit, pre_ctrl );
2225 // Fall into LE case
2226 case BoolTest::le:
2227 if (b_test._test != BoolTest::gt) {
2228 // Convert X <= Y to X < Y+1
2229 limit = new AddINode( limit, one );
2230 register_new_node( limit, pre_ctrl );
2231 }
2232 // Fall into LT case
2233 case BoolTest::lt:
2234 // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
2235 // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
2236 // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
2237 add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit );
2238 if (!conditional_rc) {
2239 // ((MIN_INT+1)-offset)/scale could be outside of loop iterations range.
2240 // Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could
2241 // still be outside of loop range.
2242 conditional_rc = !loop->dominates_backedge(iff);
2243 }
2244 break;
2245 default:
2246 if (PrintOpto) {
2247 tty->print_cr("missed RCE opportunity");
2248 }
2249 continue; // Unhandled case
2250 }
2251 }
2252
2253 // Kill the eliminated test
2254 C->set_major_progress();
2255 Node *kill_con = _igvn.intcon( 1-flip );
2256 set_ctrl(kill_con, C->root());
2257 _igvn.replace_input_of(iff, 1, kill_con);
2258 // Find surviving projection
2259 assert(iff->is_If(), "");
2260 ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
2261 // Find loads off the surviving projection; remove their control edge
2262 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
2268 --i;
2269 --imax;
2270 }
2271 }
2272
2273 } // End of is IF
2274
2275 }
2276
2277 // Update loop limits
2278 if (conditional_rc) {
2279 pre_limit = (stride_con > 0) ? (Node*)new MinINode(pre_limit, orig_limit)
2280 : (Node*)new MaxINode(pre_limit, orig_limit);
2281 register_new_node(pre_limit, pre_ctrl);
2282 }
2283 _igvn.replace_input_of(pre_opaq, 1, pre_limit);
2284
2285 // Note:: we are making the main loop limit no longer precise;
2286 // need to round up based on stride.
2287 cl->set_nonexact_trip_count();
2288 Node *main_cle = cl->loopexit();
2289 Node *main_bol = main_cle->in(1);
2290 // Hacking loop bounds; need private copies of exit test
2291 if( main_bol->outcnt() > 1 ) {// BoolNode shared?
2292 main_bol = main_bol->clone();// Clone a private BoolNode
2293 register_new_node( main_bol, main_cle->in(0) );
2294 _igvn.replace_input_of(main_cle, 1, main_bol);
2295 }
2296 Node *main_cmp = main_bol->in(1);
2297 if( main_cmp->outcnt() > 1 ) { // CmpNode shared?
2298 main_cmp = main_cmp->clone();// Clone a private CmpNode
2299 register_new_node( main_cmp, main_cle->in(0) );
2300 _igvn.replace_input_of(main_bol, 1, main_cmp);
2301 }
2302 // Hack the now-private loop bounds
2303 _igvn.replace_input_of(main_cmp, 2, main_limit);
2304 // The OpaqueNode is unshared by design
2305 assert( opqzm->outcnt() == 1, "cannot hack shared node" );
2306 _igvn.replace_input_of(opqzm, 1, main_limit);
2307 }
|