1520 }
1521
1522 // Now its tripping an even number of times remaining. Double loop body.
1523 // Do not adjust pre-guards; they are not needed and do not exist.
1524 if (cl->trip_count() > 0) {
1525 assert((cl->trip_count() & 1) == 0, "missed peeling");
1526 do_unroll(loop, old_new, false);
1527 }
1528 }
1529
1530 //------------------------------dominates_backedge---------------------------------
1531 // Returns true if ctrl is executed on every complete iteration
1532 bool IdealLoopTree::dominates_backedge(Node* ctrl) {
1533 assert(ctrl->is_CFG(), "must be control");
1534 Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl);
1535 return _phase->dom_lca_internal(ctrl, backedge) == ctrl;
1536 }
1537
1538 //------------------------------adjust_limit-----------------------------------
1539 // Helper function for add_constraint().
1540 Node* PhaseIdealLoop::adjust_limit(int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl) {
1541 // Compute "I :: (limit-offset)/scale"
1542 Node *con = new (C) SubINode(rc_limit, offset);
1543 register_new_node(con, pre_ctrl);
1544 Node *X = new (C) DivINode(0, con, scale);
1545 register_new_node(X, pre_ctrl);
1546
1547 // Adjust loop limit
1548 loop_limit = (stride_con > 0)
1549 ? (Node*)(new (C) MinINode(loop_limit, X))
1550 : (Node*)(new (C) MaxINode(loop_limit, X));
1551 register_new_node(loop_limit, pre_ctrl);
1552 return loop_limit;
1553 }
1554
1555 //------------------------------add_constraint---------------------------------
1556 // Constrain the main loop iterations so the conditions:
1557 // low_limit <= scale_con * I + offset < upper_limit
1558 // always holds true. That is, either increase the number of iterations in
1559 // the pre-loop or the post-loop until the condition holds true in the main
1560 // loop. Stride, scale, offset and limit are all loop invariant. Further,
1561 // stride and scale are constants (offset and limit often are).
1562 void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) {
1563 // For positive stride, the pre-loop limit always uses a MAX function
1564 // and the main loop a MIN function. For negative stride these are
1565 // reversed.
1566
1567 // Also for positive stride*scale the affine function is increasing, so the
1568 // pre-loop must check for underflow and the post-loop for overflow.
1569 // Negative stride*scale reverses this; pre-loop checks for overflow and
1570 // post-loop for underflow.
1571
1572 Node *scale = _igvn.intcon(scale_con);
1573 set_ctrl(scale, C->root());
1574
1575 if ((stride_con^scale_con) >= 0) { // Use XOR to avoid overflow
1576 // The overflow limit: scale*I+offset < upper_limit
1577 // For main-loop compute
1578 // ( if (scale > 0) /* and stride > 0 */
1579 // I < (upper_limit-offset)/scale
1580 // else /* scale < 0 and stride < 0 */
1581 // I > (upper_limit-offset)/scale
1582 // )
1583 //
1584 // (upper_limit-offset) may overflow or underflow.
1585 // But it is fine since main loop will either have
1586 // less iterations or will be skipped in such case.
1587 *main_limit = adjust_limit(stride_con, scale, offset, upper_limit, *main_limit, pre_ctrl);
1588
1589 // The underflow limit: low_limit <= scale*I+offset.
1590 // For pre-loop compute
1591 // NOT(scale*I+offset >= low_limit)
1592 // scale*I+offset < low_limit
1593 // ( if (scale > 0) /* and stride > 0 */
1594 // I < (low_limit-offset)/scale
1595 // else /* scale < 0 and stride < 0 */
1596 // I > (low_limit-offset)/scale
1597 // )
1598
1599 if (low_limit->get_int() == -max_jint) {
1600 if (!RangeLimitCheck) return;
1601 // We need this guard when scale*pre_limit+offset >= limit
1602 // due to underflow. So we need execute pre-loop until
1603 // scale*I+offset >= min_int. But (min_int-offset) will
1604 // underflow when offset > 0 and X will be > original_limit
1605 // when stride > 0. To avoid it we replace positive offset with 0.
1606 //
1607 // Also (min_int+1 == -max_int) is used instead of min_int here
1608 // to avoid problem with scale == -1 (min_int/(-1) == min_int).
1609 Node* shift = _igvn.intcon(31);
1610 set_ctrl(shift, C->root());
1611 Node* sign = new (C) RShiftINode(offset, shift);
1612 register_new_node(sign, pre_ctrl);
1613 offset = new (C) AndINode(offset, sign);
1614 register_new_node(offset, pre_ctrl);
1615 } else {
1616 assert(low_limit->get_int() == 0, "wrong low limit for range check");
1617 // The only problem we have here when offset == min_int
1618 // since (0-min_int) == min_int. It may be fine for stride > 0
1619 // but for stride < 0 X will be < original_limit. To avoid it
1620 // max(pre_limit, original_limit) is used in do_range_check().
1621 }
1622 // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
1623 *pre_limit = adjust_limit((-stride_con), scale, offset, low_limit, *pre_limit, pre_ctrl);
1624
1625 } else { // stride_con*scale_con < 0
1626 // For negative stride*scale pre-loop checks for overflow and
1627 // post-loop for underflow.
1628 //
1629 // The overflow limit: scale*I+offset < upper_limit
1630 // For pre-loop compute
1631 // NOT(scale*I+offset < upper_limit)
1632 // scale*I+offset >= upper_limit
1633 // scale*I+offset+1 > upper_limit
1634 // ( if (scale < 0) /* and stride > 0 */
1635 // I < (upper_limit-(offset+1))/scale
1636 // else /* scale > 0 and stride < 0 */
1637 // I > (upper_limit-(offset+1))/scale
1638 // )
1639 //
1640 // (upper_limit-offset-1) may underflow or overflow.
1641 // To avoid it min(pre_limit, original_limit) is used
1642 // in do_range_check() for stride > 0 and max() for < 0.
1643 Node *one = _igvn.intcon(1);
1644 set_ctrl(one, C->root());
1645
1646 Node *plus_one = new (C) AddINode(offset, one);
1647 register_new_node( plus_one, pre_ctrl );
1648 // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
1649 *pre_limit = adjust_limit((-stride_con), scale, plus_one, upper_limit, *pre_limit, pre_ctrl);
1650
1651 if (low_limit->get_int() == -max_jint) {
1652 if (!RangeLimitCheck) return;
1653 // We need this guard when scale*main_limit+offset >= limit
1654 // due to underflow. So we need execute main-loop while
1655 // scale*I+offset+1 > min_int. But (min_int-offset-1) will
1656 // underflow when (offset+1) > 0 and X will be < main_limit
1657 // when scale < 0 (and stride > 0). To avoid it we replace
1658 // positive (offset+1) with 0.
1659 //
1660 // Also (min_int+1 == -max_int) is used instead of min_int here
1661 // to avoid problem with scale == -1 (min_int/(-1) == min_int).
1662 Node* shift = _igvn.intcon(31);
1663 set_ctrl(shift, C->root());
1664 Node* sign = new (C) RShiftINode(plus_one, shift);
1665 register_new_node(sign, pre_ctrl);
1666 plus_one = new (C) AndINode(plus_one, sign);
1667 register_new_node(plus_one, pre_ctrl);
1668 } else {
1669 assert(low_limit->get_int() == 0, "wrong low limit for range check");
1670 // The only problem we have here when offset == max_int
1671 // since (max_int+1) == min_int and (0-min_int) == min_int.
1672 // But it is fine since main loop will either have
1673 // less iterations or will be skipped in such case.
1674 }
1675 // The underflow limit: low_limit <= scale*I+offset.
1676 // For main-loop compute
1677 // scale*I+offset+1 > low_limit
1678 // ( if (scale < 0) /* and stride > 0 */
1679 // I < (low_limit-(offset+1))/scale
1680 // else /* scale > 0 and stride < 0 */
1681 // I > (low_limit-(offset+1))/scale
1682 // )
1683
1684 *main_limit = adjust_limit(stride_con, scale, plus_one, low_limit, *main_limit, pre_ctrl);
1685 }
1686 }
1687
1688
1689 //------------------------------is_scaled_iv---------------------------------
1690 // Return true if exp is a constant times an induction var
1691 bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) {
1692 if (exp == iv) {
1693 if (p_scale != NULL) {
1694 *p_scale = 1;
1695 }
1696 return true;
1697 }
1698 int opc = exp->Opcode();
1699 if (opc == Op_MulI) {
1700 if (exp->in(1) == iv && exp->in(2)->is_Con()) {
1701 if (p_scale != NULL) {
1702 *p_scale = exp->in(2)->get_int();
1703 }
1704 return true;
|
1520 }
1521
1522 // Now its tripping an even number of times remaining. Double loop body.
1523 // Do not adjust pre-guards; they are not needed and do not exist.
1524 if (cl->trip_count() > 0) {
1525 assert((cl->trip_count() & 1) == 0, "missed peeling");
1526 do_unroll(loop, old_new, false);
1527 }
1528 }
1529
1530 //------------------------------dominates_backedge---------------------------------
1531 // Returns true if ctrl is executed on every complete iteration
1532 bool IdealLoopTree::dominates_backedge(Node* ctrl) {
1533 assert(ctrl->is_CFG(), "must be control");
1534 Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl);
1535 return _phase->dom_lca_internal(ctrl, backedge) == ctrl;
1536 }
1537
1538 //------------------------------adjust_limit-----------------------------------
1539 // Helper function for add_constraint().
1540 Node* PhaseIdealLoop::adjust_limit(int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl, bool round_up) {
1541 // Compute "I :: (limit-offset)/scale"
1542 Node *con = new (C) SubINode(rc_limit, offset);
1543 register_new_node(con, pre_ctrl);
1544 Node *X = new (C) DivINode(0, con, scale);
1545 register_new_node(X, pre_ctrl);
1546
1547 // When the absolute value of scale is greater than one, the integer
1548 // division may round limit down so add one to the limit.
1549 if (round_up) {
1550 X = new (C) AddINode(X, _igvn.intcon(1));
1551 register_new_node(X, pre_ctrl);
1552 }
1553
1554 // Adjust loop limit
1555 loop_limit = (stride_con > 0)
1556 ? (Node*)(new (C) MinINode(loop_limit, X))
1557 : (Node*)(new (C) MaxINode(loop_limit, X));
1558 register_new_node(loop_limit, pre_ctrl);
1559 return loop_limit;
1560 }
1561
1562 //------------------------------add_constraint---------------------------------
1563 // Constrain the main loop iterations so the conditions:
1564 // low_limit <= scale_con * I + offset < upper_limit
1565 // always holds true. That is, either increase the number of iterations in
1566 // the pre-loop or the post-loop until the condition holds true in the main
1567 // loop. Stride, scale, offset and limit are all loop invariant. Further,
1568 // stride and scale are constants (offset and limit often are).
1569 void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) {
1570 // For positive stride, the pre-loop limit always uses a MAX function
1571 // and the main loop a MIN function. For negative stride these are
1572 // reversed.
1573
1574 // Also for positive stride*scale the affine function is increasing, so the
1575 // pre-loop must check for underflow and the post-loop for overflow.
1576 // Negative stride*scale reverses this; pre-loop checks for overflow and
1577 // post-loop for underflow.
1578
1579 Node *scale = _igvn.intcon(scale_con);
1580 set_ctrl(scale, C->root());
1581
1582 if ((stride_con^scale_con) >= 0) { // Use XOR to avoid overflow
1583 // The overflow limit: scale*I+offset < upper_limit
1584 // For main-loop compute
1585 // ( if (scale > 0) /* and stride > 0 */
1586 // I < (upper_limit-offset)/scale
1587 // else /* scale < 0 and stride < 0 */
1588 // I > (upper_limit-offset)/scale
1589 // )
1590 //
1591 // (upper_limit-offset) may overflow or underflow.
1592 // But it is fine since main loop will either have
1593 // less iterations or will be skipped in such case.
1594 *main_limit = adjust_limit(stride_con, scale, offset, upper_limit, *main_limit, pre_ctrl, false);
1595
1596 // The underflow limit: low_limit <= scale*I+offset.
1597 // For pre-loop compute
1598 // NOT(scale*I+offset >= low_limit)
1599 // scale*I+offset < low_limit
1600 // ( if (scale > 0) /* and stride > 0 */
1601 // I < (low_limit-offset)/scale
1602 // else /* scale < 0 and stride < 0 */
1603 // I > (low_limit-offset)/scale
1604 // )
1605
1606 if (low_limit->get_int() == -max_jint) {
1607 if (!RangeLimitCheck) return;
1608 // We need this guard when scale*pre_limit+offset >= limit
1609 // due to underflow. So we need execute pre-loop until
1610 // scale*I+offset >= min_int. But (min_int-offset) will
1611 // underflow when offset > 0 and X will be > original_limit
1612 // when stride > 0. To avoid it we replace positive offset with 0.
1613 //
1614 // Also (min_int+1 == -max_int) is used instead of min_int here
1615 // to avoid problem with scale == -1 (min_int/(-1) == min_int).
1616 Node* shift = _igvn.intcon(31);
1617 set_ctrl(shift, C->root());
1618 Node* sign = new (C) RShiftINode(offset, shift);
1619 register_new_node(sign, pre_ctrl);
1620 offset = new (C) AndINode(offset, sign);
1621 register_new_node(offset, pre_ctrl);
1622 } else {
1623 assert(low_limit->get_int() == 0, "wrong low limit for range check");
1624 // The only problem we have here when offset == min_int
1625 // since (0-min_int) == min_int. It may be fine for stride > 0
1626 // but for stride < 0 X will be < original_limit. To avoid it
1627 // max(pre_limit, original_limit) is used in do_range_check().
1628 }
1629 // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
1630 *pre_limit = adjust_limit((-stride_con), scale, offset, low_limit, *pre_limit, pre_ctrl,
1631 scale_con > 1 && stride_con > 0);
1632
1633 } else { // stride_con*scale_con < 0
1634 // For negative stride*scale pre-loop checks for overflow and
1635 // post-loop for underflow.
1636 //
1637 // The overflow limit: scale*I+offset < upper_limit
1638 // For pre-loop compute
1639 // NOT(scale*I+offset < upper_limit)
1640 // scale*I+offset >= upper_limit
1641 // scale*I+offset+1 > upper_limit
1642 // ( if (scale < 0) /* and stride > 0 */
1643 // I < (upper_limit-(offset+1))/scale
1644 // else /* scale > 0 and stride < 0 */
1645 // I > (upper_limit-(offset+1))/scale
1646 // )
1647 //
1648 // (upper_limit-offset-1) may underflow or overflow.
1649 // To avoid it min(pre_limit, original_limit) is used
1650 // in do_range_check() for stride > 0 and max() for < 0.
1651 Node *one = _igvn.intcon(1);
1652 set_ctrl(one, C->root());
1653
1654 Node *plus_one = new (C) AddINode(offset, one);
1655 register_new_node( plus_one, pre_ctrl );
1656 // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
1657 *pre_limit = adjust_limit((-stride_con), scale, plus_one, upper_limit, *pre_limit, pre_ctrl,
1658 scale_con < -1 && stride_con > 0);
1659
1660 if (low_limit->get_int() == -max_jint) {
1661 if (!RangeLimitCheck) return;
1662 // We need this guard when scale*main_limit+offset >= limit
1663 // due to underflow. So we need execute main-loop while
1664 // scale*I+offset+1 > min_int. But (min_int-offset-1) will
1665 // underflow when (offset+1) > 0 and X will be < main_limit
1666 // when scale < 0 (and stride > 0). To avoid it we replace
1667 // positive (offset+1) with 0.
1668 //
1669 // Also (min_int+1 == -max_int) is used instead of min_int here
1670 // to avoid problem with scale == -1 (min_int/(-1) == min_int).
1671 Node* shift = _igvn.intcon(31);
1672 set_ctrl(shift, C->root());
1673 Node* sign = new (C) RShiftINode(plus_one, shift);
1674 register_new_node(sign, pre_ctrl);
1675 plus_one = new (C) AndINode(plus_one, sign);
1676 register_new_node(plus_one, pre_ctrl);
1677 } else {
1678 assert(low_limit->get_int() == 0, "wrong low limit for range check");
1679 // The only problem we have here when offset == max_int
1680 // since (max_int+1) == min_int and (0-min_int) == min_int.
1681 // But it is fine since main loop will either have
1682 // less iterations or will be skipped in such case.
1683 }
1684 // The underflow limit: low_limit <= scale*I+offset.
1685 // For main-loop compute
1686 // scale*I+offset+1 > low_limit
1687 // ( if (scale < 0) /* and stride > 0 */
1688 // I < (low_limit-(offset+1))/scale
1689 // else /* scale > 0 and stride < 0 */
1690 // I > (low_limit-(offset+1))/scale
1691 // )
1692
1693 *main_limit = adjust_limit(stride_con, scale, plus_one, low_limit, *main_limit, pre_ctrl,
1694 false);
1695 }
1696 }
1697
1698
1699 //------------------------------is_scaled_iv---------------------------------
1700 // Return true if exp is a constant times an induction var
1701 bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) {
1702 if (exp == iv) {
1703 if (p_scale != NULL) {
1704 *p_scale = 1;
1705 }
1706 return true;
1707 }
1708 int opc = exp->Opcode();
1709 if (opc == Op_MulI) {
1710 if (exp->in(1) == iv && exp->in(2)->is_Con()) {
1711 if (p_scale != NULL) {
1712 *p_scale = exp->in(2)->get_int();
1713 }
1714 return true;
|