1592 // "to" address is assumed to be heapword aligned. 1593 // 1594 // Arguments for generated stub: 1595 // to: O0 1596 // value: O1 1597 // count: O2 treated as signed 1598 // 1599 address generate_fill(BasicType t, bool aligned, const char* name) { 1600 __ align(CodeEntryAlignment); 1601 StubCodeMark mark(this, "StubRoutines", name); 1602 address start = __ pc(); 1603 1604 const Register to = O0; // source array address 1605 const Register value = O1; // fill value 1606 const Register count = O2; // elements count 1607 // O3 is used as a temp register 1608 1609 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1610 1611 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 1612 Label L_fill_2_bytes, L_fill_4_bytes, L_fill_32_bytes; 1613 1614 int shift = -1; 1615 switch (t) { 1616 case T_BYTE: 1617 shift = 2; 1618 break; 1619 case T_SHORT: 1620 shift = 1; 1621 break; 1622 case T_INT: 1623 shift = 0; 1624 break; 1625 default: ShouldNotReachHere(); 1626 } 1627 1628 BLOCK_COMMENT("Entry:"); 1629 1630 if (t == T_BYTE) { 1631 // Zero extend value 1632 __ and3(value, 0xff, value); 1633 __ sllx(value, 8, O3); 1634 __ or3(value, O3, value); 1635 } 1636 if (t == T_SHORT) { 1637 // Zero extend value 1638 __ sethi(0xffff0000, O3); 1639 __ andn(value, O3, value); 1640 } 1641 if (t == T_BYTE || t == T_SHORT) { 1642 __ sllx(value, 16, O3); 1643 __ or3(value, O3, value); 1644 } 1645 1646 __ cmp(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 1647 __ brx(Assembler::lessUnsigned, false, Assembler::pn, L_fill_4_bytes); // use unsigned cmp 1648 __ delayed()->andcc(count, 1<<shift, G0); 1649 1650 if (!aligned && (t == T_BYTE || t == T_SHORT)) { 1651 // align source address at 4 bytes address boundary 1652 if (t == T_BYTE) { 1653 // One byte misalignment happens only for byte arrays 1654 __ andcc(to, 1, G0); 1655 __ br(Assembler::zero, false, Assembler::pt, L_skip_align1); 1656 __ delayed()->nop(); 1657 __ stb(value, to, 0); 1658 __ inc(to, 1); 1659 __ dec(count, 1); 1660 __ BIND(L_skip_align1); 1661 } 1662 // Two bytes misalignment happens only for byte and short (char) arrays 1663 __ andcc(to, 2, G0); 1664 __ br(Assembler::zero, false, Assembler::pt, L_skip_align2); 1665 __ delayed()->nop(); 1666 __ sth(value, to, 0); 1667 __ inc(to, 2); 1668 __ dec(count, 1 << (shift - 1)); 1681 __ BIND(L_fill_32_bytes); 1682 #ifdef _LP64 1683 } 1684 #endif 1685 1686 if (t == T_INT) { 1687 // Zero extend value 1688 __ srl(value, 0, value); 1689 } 1690 if (t == T_BYTE || t == T_SHORT || t == T_INT) { 1691 __ sllx(value, 32, O3); 1692 __ or3(value, O3, value); 1693 } 1694 1695 Label L_check_fill_8_bytes; 1696 // Fill 32-byte chunks 1697 __ subcc(count, 8 << shift, count); 1698 __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes); 1699 __ delayed()->nop(); 1700 1701 Label L_fill_32_bytes_loop; 1702 __ align(16); 1703 __ BIND(L_fill_32_bytes_loop); 1704 1705 __ stx(value, to, 0); 1706 __ stx(value, to, 8); 1707 __ stx(value, to, 16); 1708 __ stx(value, to, 24); 1709 1710 __ subcc(count, 8 << shift, count); 1711 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_fill_32_bytes_loop); 1712 __ delayed()->add(to, 32, to); 1713 1714 __ BIND(L_check_fill_8_bytes); 1715 __ addcc(count, 8 << shift, count); 1716 __ brx(Assembler::zero, false, Assembler::pn, L_exit); 1717 __ delayed()->subcc(count, 1 << (shift + 1), count); 1718 __ brx(Assembler::less, false, Assembler::pn, L_fill_4_bytes); 1719 __ delayed()->andcc(count, 1<<shift, G0); 1720 1721 // 1722 // length is too short, just fill 8 bytes at a time 1723 // 1724 Label L_fill_8_bytes_loop; 1725 __ BIND(L_fill_8_bytes_loop); 1726 __ stx(value, to, 0); 1727 __ subcc(count, 1 << (shift + 1), count); 1728 __ brx(Assembler::greaterEqual, false, Assembler::pn, L_fill_8_bytes_loop); 1729 __ delayed()->add(to, 8, to); 1730 1731 // fill trailing 4 bytes 1732 __ andcc(count, 1<<shift, G0); // in delay slot of branches 1733 __ BIND(L_fill_4_bytes); 1734 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2_bytes); 1735 if (t == T_BYTE || t == T_SHORT) { 1736 __ delayed()->andcc(count, 1<<(shift-1), G0); 1737 } else { 1738 __ delayed()->nop(); 1739 } 1740 __ stw(value, to, 0); 1741 if (t == T_BYTE || t == T_SHORT) { 1742 __ inc(to, 4); 1743 // fill trailing 2 bytes 1744 __ andcc(count, 1<<(shift-1), G0); // in delay slot of branches 1745 __ BIND(L_fill_2_bytes); 1746 __ brx(Assembler::zero, false, Assembler::pt, L_fill_byte); 1747 __ delayed()->andcc(count, 1, count); 1748 __ sth(value, to, 0); 1749 if (t == T_BYTE) { 1750 __ inc(to, 2); 1751 // fill trailing byte 1752 __ andcc(count, 1, count); // in delay slot of branches 1753 __ BIND(L_fill_byte); 1754 __ brx(Assembler::zero, false, Assembler::pt, L_exit); 1755 __ delayed()->nop(); 1756 __ stb(value, to, 0); 1757 } else { 1758 __ BIND(L_fill_byte); 1759 } 1760 } else { 1761 __ BIND(L_fill_2_bytes); 1762 } 1763 __ BIND(L_exit); 1764 __ retl(); 1765 __ delayed()->mov(G0, O0); // return 0 1766 return start; 1767 } 1768 1769 // 1770 // Generate stub for conjoint short copy. If "aligned" is true, the 1771 // "from" and "to" addresses are assumed to be heapword aligned. 1772 // 1773 // Arguments for generated stub: 1774 // from: O0 1775 // to: O1 1776 // count: O2 treated as signed 1777 // 1778 address generate_conjoint_short_copy(bool aligned, const char * name) { 1779 // Do reverse copy. 1780 1781 __ align(CodeEntryAlignment); 1782 StubCodeMark mark(this, "StubRoutines", name); 1783 address start = __ pc(); 1784 address nooverlap_target = aligned ? 1785 StubRoutines::arrayof_jshort_disjoint_arraycopy() : | 1592 // "to" address is assumed to be heapword aligned. 1593 // 1594 // Arguments for generated stub: 1595 // to: O0 1596 // value: O1 1597 // count: O2 treated as signed 1598 // 1599 address generate_fill(BasicType t, bool aligned, const char* name) { 1600 __ align(CodeEntryAlignment); 1601 StubCodeMark mark(this, "StubRoutines", name); 1602 address start = __ pc(); 1603 1604 const Register to = O0; // source array address 1605 const Register value = O1; // fill value 1606 const Register count = O2; // elements count 1607 // O3 is used as a temp register 1608 1609 assert_clean_int(count, O3); // Make sure 'count' is clean int. 1610 1611 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 1612 Label L_fill_2_bytes, L_fill_elements, L_fill_32_bytes; 1613 1614 int shift = -1; 1615 switch (t) { 1616 case T_BYTE: 1617 shift = 2; 1618 break; 1619 case T_SHORT: 1620 shift = 1; 1621 break; 1622 case T_INT: 1623 shift = 0; 1624 break; 1625 default: ShouldNotReachHere(); 1626 } 1627 1628 BLOCK_COMMENT("Entry:"); 1629 1630 if (t == T_BYTE) { 1631 // Zero extend value 1632 __ and3(value, 0xff, value); 1633 __ sllx(value, 8, O3); 1634 __ or3(value, O3, value); 1635 } 1636 if (t == T_SHORT) { 1637 // Zero extend value 1638 __ sllx(value, 48, value); 1639 __ srlx(value, 48, value); 1640 } 1641 if (t == T_BYTE || t == T_SHORT) { 1642 __ sllx(value, 16, O3); 1643 __ or3(value, O3, value); 1644 } 1645 1646 __ cmp(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 1647 __ brx(Assembler::lessUnsigned, false, Assembler::pn, L_fill_elements); // use unsigned cmp 1648 __ delayed()->andcc(count, 1, G0); 1649 1650 if (!aligned && (t == T_BYTE || t == T_SHORT)) { 1651 // align source address at 4 bytes address boundary 1652 if (t == T_BYTE) { 1653 // One byte misalignment happens only for byte arrays 1654 __ andcc(to, 1, G0); 1655 __ br(Assembler::zero, false, Assembler::pt, L_skip_align1); 1656 __ delayed()->nop(); 1657 __ stb(value, to, 0); 1658 __ inc(to, 1); 1659 __ dec(count, 1); 1660 __ BIND(L_skip_align1); 1661 } 1662 // Two bytes misalignment happens only for byte and short (char) arrays 1663 __ andcc(to, 2, G0); 1664 __ br(Assembler::zero, false, Assembler::pt, L_skip_align2); 1665 __ delayed()->nop(); 1666 __ sth(value, to, 0); 1667 __ inc(to, 2); 1668 __ dec(count, 1 << (shift - 1)); 1681 __ BIND(L_fill_32_bytes); 1682 #ifdef _LP64 1683 } 1684 #endif 1685 1686 if (t == T_INT) { 1687 // Zero extend value 1688 __ srl(value, 0, value); 1689 } 1690 if (t == T_BYTE || t == T_SHORT || t == T_INT) { 1691 __ sllx(value, 32, O3); 1692 __ or3(value, O3, value); 1693 } 1694 1695 Label L_check_fill_8_bytes; 1696 // Fill 32-byte chunks 1697 __ subcc(count, 8 << shift, count); 1698 __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes); 1699 __ delayed()->nop(); 1700 1701 Label L_fill_32_bytes_loop, L_fill_4_bytes; 1702 __ align(16); 1703 __ BIND(L_fill_32_bytes_loop); 1704 1705 __ stx(value, to, 0); 1706 __ stx(value, to, 8); 1707 __ stx(value, to, 16); 1708 __ stx(value, to, 24); 1709 1710 __ subcc(count, 8 << shift, count); 1711 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_fill_32_bytes_loop); 1712 __ delayed()->add(to, 32, to); 1713 1714 __ BIND(L_check_fill_8_bytes); 1715 __ addcc(count, 8 << shift, count); 1716 __ brx(Assembler::zero, false, Assembler::pn, L_exit); 1717 __ delayed()->subcc(count, 1 << (shift + 1), count); 1718 __ brx(Assembler::less, false, Assembler::pn, L_fill_4_bytes); 1719 __ delayed()->andcc(count, 1<<shift, G0); 1720 1721 // 1722 // length is too short, just fill 8 bytes at a time 1723 // 1724 Label L_fill_8_bytes_loop; 1725 __ BIND(L_fill_8_bytes_loop); 1726 __ stx(value, to, 0); 1727 __ subcc(count, 1 << (shift + 1), count); 1728 __ brx(Assembler::greaterEqual, false, Assembler::pn, L_fill_8_bytes_loop); 1729 __ delayed()->add(to, 8, to); 1730 1731 // fill trailing 4 bytes 1732 __ andcc(count, 1<<shift, G0); // in delay slot of branches 1733 if (t == T_INT) { 1734 __ BIND(L_fill_elements); 1735 } 1736 __ BIND(L_fill_4_bytes); 1737 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2_bytes); 1738 if (t == T_BYTE || t == T_SHORT) { 1739 __ delayed()->andcc(count, 1<<(shift-1), G0); 1740 } else { 1741 __ delayed()->nop(); 1742 } 1743 __ stw(value, to, 0); 1744 if (t == T_BYTE || t == T_SHORT) { 1745 __ inc(to, 4); 1746 // fill trailing 2 bytes 1747 __ andcc(count, 1<<(shift-1), G0); // in delay slot of branches 1748 __ BIND(L_fill_2_bytes); 1749 __ brx(Assembler::zero, false, Assembler::pt, L_fill_byte); 1750 __ delayed()->andcc(count, 1, count); 1751 __ sth(value, to, 0); 1752 if (t == T_BYTE) { 1753 __ inc(to, 2); 1754 // fill trailing byte 1755 __ andcc(count, 1, count); // in delay slot of branches 1756 __ BIND(L_fill_byte); 1757 __ brx(Assembler::zero, false, Assembler::pt, L_exit); 1758 __ delayed()->nop(); 1759 __ stb(value, to, 0); 1760 } else { 1761 __ BIND(L_fill_byte); 1762 } 1763 } else { 1764 __ BIND(L_fill_2_bytes); 1765 } 1766 __ BIND(L_exit); 1767 __ retl(); 1768 __ delayed()->nop(); 1769 1770 // Handle copies less than 8 bytes. Int is handled elsewhere. 1771 if (t == T_BYTE) { 1772 __ BIND(L_fill_elements); 1773 Label L_fill_2, L_fill_4; 1774 // in delay slot __ andcc(count, 1, G0); 1775 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2); 1776 __ delayed()->andcc(count, 2, G0); 1777 __ stb(value, to, 0); 1778 __ inc(to, 1); 1779 __ BIND(L_fill_2); 1780 __ brx(Assembler::zero, false, Assembler::pt, L_fill_4); 1781 __ delayed()->andcc(count, 4, G0); 1782 __ stb(value, to, 0); 1783 __ stb(value, to, 1); 1784 __ inc(to, 2); 1785 __ BIND(L_fill_4); 1786 __ brx(Assembler::zero, false, Assembler::pt, L_exit); 1787 __ delayed()->nop(); 1788 __ stb(value, to, 0); 1789 __ stb(value, to, 1); 1790 __ stb(value, to, 2); 1791 __ retl(); 1792 __ delayed()->stb(value, to, 3); 1793 } 1794 1795 if (t == T_SHORT) { 1796 Label L_fill_2; 1797 __ BIND(L_fill_elements); 1798 // in delay slot __ andcc(count, 1, G0); 1799 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2); 1800 __ delayed()->andcc(count, 2, G0); 1801 __ sth(value, to, 0); 1802 __ inc(to, 2); 1803 __ BIND(L_fill_2); 1804 __ brx(Assembler::zero, false, Assembler::pt, L_exit); 1805 __ delayed()->nop(); 1806 __ sth(value, to, 0); 1807 __ retl(); 1808 __ delayed()->sth(value, to, 2); 1809 } 1810 return start; 1811 } 1812 1813 // 1814 // Generate stub for conjoint short copy. If "aligned" is true, the 1815 // "from" and "to" addresses are assumed to be heapword aligned. 1816 // 1817 // Arguments for generated stub: 1818 // from: O0 1819 // to: O1 1820 // count: O2 treated as signed 1821 // 1822 address generate_conjoint_short_copy(bool aligned, const char * name) { 1823 // Do reverse copy. 1824 1825 __ align(CodeEntryAlignment); 1826 StubCodeMark mark(this, "StubRoutines", name); 1827 address start = __ pc(); 1828 address nooverlap_target = aligned ? 1829 StubRoutines::arrayof_jshort_disjoint_arraycopy() : |