src/cpu/sparc/vm/stubGenerator_sparc.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 6982370 Sdiff src/cpu/sparc/vm

src/cpu/sparc/vm/stubGenerator_sparc.cpp

Print this page




1592   //  "to" address is assumed to be heapword aligned.
1593   //
1594   // Arguments for generated stub:
1595   //      to:    O0
1596   //      value: O1
1597   //      count: O2 treated as signed
1598   //
1599   address generate_fill(BasicType t, bool aligned, const char* name) {
1600     __ align(CodeEntryAlignment);
1601     StubCodeMark mark(this, "StubRoutines", name);
1602     address start = __ pc();
1603 
1604     const Register to        = O0;   // source array address
1605     const Register value     = O1;   // fill value
1606     const Register count     = O2;   // elements count
1607     // O3 is used as a temp register
1608 
1609     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
1610 
1611     Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
1612     Label L_fill_2_bytes, L_fill_4_bytes, L_fill_32_bytes;
1613 
1614     int shift = -1;
1615     switch (t) {
1616        case T_BYTE:
1617         shift = 2;
1618         break;
1619        case T_SHORT:
1620         shift = 1;
1621         break;
1622       case T_INT:
1623          shift = 0;
1624         break;
1625       default: ShouldNotReachHere();
1626     }
1627 
1628     BLOCK_COMMENT("Entry:");
1629 
1630     if (t == T_BYTE) {
1631       // Zero extend value
1632       __ and3(value, 0xff, value);
1633       __ sllx(value, 8, O3);
1634       __ or3(value, O3, value);
1635     }
1636     if (t == T_SHORT) {
1637       // Zero extend value
1638       __ sethi(0xffff0000, O3);
1639       __ andn(value, O3, value);
1640     }
1641     if (t == T_BYTE || t == T_SHORT) {
1642       __ sllx(value, 16, O3);
1643       __ or3(value, O3, value);
1644     }
1645 
1646     __ cmp(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
1647     __ brx(Assembler::lessUnsigned, false, Assembler::pn, L_fill_4_bytes); // use unsigned cmp
1648     __ delayed()->andcc(count, 1<<shift, G0);
1649 
1650     if (!aligned && (t == T_BYTE || t == T_SHORT)) {
1651       // align source address at 4 bytes address boundary
1652       if (t == T_BYTE) {
1653         // One byte misalignment happens only for byte arrays
1654         __ andcc(to, 1, G0);
1655         __ br(Assembler::zero, false, Assembler::pt, L_skip_align1);
1656         __ delayed()->nop();
1657         __ stb(value, to, 0);
1658         __ inc(to, 1);
1659         __ dec(count, 1);
1660         __ BIND(L_skip_align1);
1661       }
1662       // Two bytes misalignment happens only for byte and short (char) arrays
1663       __ andcc(to, 2, G0);
1664       __ br(Assembler::zero, false, Assembler::pt, L_skip_align2);
1665       __ delayed()->nop();
1666       __ sth(value, to, 0);
1667       __ inc(to, 2);
1668       __ dec(count, 1 << (shift - 1));


1681     __ BIND(L_fill_32_bytes);
1682 #ifdef _LP64
1683     }
1684 #endif
1685 
1686     if (t == T_INT) {
1687       // Zero extend value
1688       __ srl(value, 0, value);
1689     }
1690     if (t == T_BYTE || t == T_SHORT || t == T_INT) {
1691       __ sllx(value, 32, O3);
1692       __ or3(value, O3, value);
1693     }
1694 
1695     Label L_check_fill_8_bytes;
1696     // Fill 32-byte chunks
1697     __ subcc(count, 8 << shift, count);
1698     __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes);
1699     __ delayed()->nop();
1700 
1701     Label L_fill_32_bytes_loop;
1702     __ align(16);
1703     __ BIND(L_fill_32_bytes_loop);
1704 
1705     __ stx(value, to, 0);
1706     __ stx(value, to, 8);
1707     __ stx(value, to, 16);
1708     __ stx(value, to, 24);
1709 
1710     __ subcc(count, 8 << shift, count);
1711     __ brx(Assembler::greaterEqual, false, Assembler::pt, L_fill_32_bytes_loop);
1712     __ delayed()->add(to, 32, to);
1713 
1714     __ BIND(L_check_fill_8_bytes);
1715     __ addcc(count, 8 << shift, count);
1716     __ brx(Assembler::zero, false, Assembler::pn, L_exit);
1717     __ delayed()->subcc(count, 1 << (shift + 1), count);
1718     __ brx(Assembler::less, false, Assembler::pn, L_fill_4_bytes);
1719     __ delayed()->andcc(count, 1<<shift, G0);
1720 
1721     //
1722     // length is too short, just fill 8 bytes at a time
1723     //
1724     Label L_fill_8_bytes_loop;
1725     __ BIND(L_fill_8_bytes_loop);
1726     __ stx(value, to, 0);
1727     __ subcc(count, 1 << (shift + 1), count);
1728     __ brx(Assembler::greaterEqual, false, Assembler::pn, L_fill_8_bytes_loop);
1729     __ delayed()->add(to, 8, to);
1730 
1731     // fill trailing 4 bytes
1732     __ andcc(count, 1<<shift, G0);  // in delay slot of branches



1733     __ BIND(L_fill_4_bytes);
1734     __ brx(Assembler::zero, false, Assembler::pt, L_fill_2_bytes);
1735     if (t == T_BYTE || t == T_SHORT) {
1736       __ delayed()->andcc(count, 1<<(shift-1), G0);
1737     } else {
1738       __ delayed()->nop();
1739     }
1740     __ stw(value, to, 0);
1741     if (t == T_BYTE || t == T_SHORT) {
1742       __ inc(to, 4);
1743       // fill trailing 2 bytes
1744       __ andcc(count, 1<<(shift-1), G0); // in delay slot of branches
1745       __ BIND(L_fill_2_bytes);
1746       __ brx(Assembler::zero, false, Assembler::pt, L_fill_byte);
1747       __ delayed()->andcc(count, 1, count);
1748       __ sth(value, to, 0);
1749       if (t == T_BYTE) {
1750         __ inc(to, 2);
1751         // fill trailing byte
1752         __ andcc(count, 1, count);  // in delay slot of branches
1753         __ BIND(L_fill_byte);
1754         __ brx(Assembler::zero, false, Assembler::pt, L_exit);
1755         __ delayed()->nop();
1756         __ stb(value, to, 0);
1757       } else {
1758         __ BIND(L_fill_byte);
1759       }
1760     } else {
1761       __ BIND(L_fill_2_bytes);
1762     }
1763     __ BIND(L_exit);
1764     __ retl();
1765     __ delayed()->mov(G0, O0); // return 0









































1766     return start;
1767   }
1768 
1769   //
1770   //  Generate stub for conjoint short copy.  If "aligned" is true, the
1771   //  "from" and "to" addresses are assumed to be heapword aligned.
1772   //
1773   // Arguments for generated stub:
1774   //      from:  O0
1775   //      to:    O1
1776   //      count: O2 treated as signed
1777   //
1778   address generate_conjoint_short_copy(bool aligned, const char * name) {
1779     // Do reverse copy.
1780 
1781     __ align(CodeEntryAlignment);
1782     StubCodeMark mark(this, "StubRoutines", name);
1783     address start = __ pc();
1784     address nooverlap_target = aligned ?
1785         StubRoutines::arrayof_jshort_disjoint_arraycopy() :




1592   //  "to" address is assumed to be heapword aligned.
1593   //
1594   // Arguments for generated stub:
1595   //      to:    O0
1596   //      value: O1
1597   //      count: O2 treated as signed
1598   //
1599   address generate_fill(BasicType t, bool aligned, const char* name) {
1600     __ align(CodeEntryAlignment);
1601     StubCodeMark mark(this, "StubRoutines", name);
1602     address start = __ pc();
1603 
1604     const Register to        = O0;   // source array address
1605     const Register value     = O1;   // fill value
1606     const Register count     = O2;   // elements count
1607     // O3 is used as a temp register
1608 
1609     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
1610 
1611     Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
1612     Label L_fill_2_bytes, L_fill_elements, L_fill_32_bytes;
1613 
1614     int shift = -1;
1615     switch (t) {
1616        case T_BYTE:
1617         shift = 2;
1618         break;
1619        case T_SHORT:
1620         shift = 1;
1621         break;
1622       case T_INT:
1623          shift = 0;
1624         break;
1625       default: ShouldNotReachHere();
1626     }
1627 
1628     BLOCK_COMMENT("Entry:");
1629 
1630     if (t == T_BYTE) {
1631       // Zero extend value
1632       __ and3(value, 0xff, value);
1633       __ sllx(value, 8, O3);
1634       __ or3(value, O3, value);
1635     }
1636     if (t == T_SHORT) {
1637       // Zero extend value
1638       __ sllx(value, 48, value);
1639       __ srlx(value, 48, value);
1640     }
1641     if (t == T_BYTE || t == T_SHORT) {
1642       __ sllx(value, 16, O3);
1643       __ or3(value, O3, value);
1644     }
1645 
1646     __ cmp(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
1647     __ brx(Assembler::lessUnsigned, false, Assembler::pn, L_fill_elements); // use unsigned cmp
1648     __ delayed()->andcc(count, 1, G0);
1649 
1650     if (!aligned && (t == T_BYTE || t == T_SHORT)) {
1651       // align source address at 4 bytes address boundary
1652       if (t == T_BYTE) {
1653         // One byte misalignment happens only for byte arrays
1654         __ andcc(to, 1, G0);
1655         __ br(Assembler::zero, false, Assembler::pt, L_skip_align1);
1656         __ delayed()->nop();
1657         __ stb(value, to, 0);
1658         __ inc(to, 1);
1659         __ dec(count, 1);
1660         __ BIND(L_skip_align1);
1661       }
1662       // Two bytes misalignment happens only for byte and short (char) arrays
1663       __ andcc(to, 2, G0);
1664       __ br(Assembler::zero, false, Assembler::pt, L_skip_align2);
1665       __ delayed()->nop();
1666       __ sth(value, to, 0);
1667       __ inc(to, 2);
1668       __ dec(count, 1 << (shift - 1));


1681     __ BIND(L_fill_32_bytes);
1682 #ifdef _LP64
1683     }
1684 #endif
1685 
1686     if (t == T_INT) {
1687       // Zero extend value
1688       __ srl(value, 0, value);
1689     }
1690     if (t == T_BYTE || t == T_SHORT || t == T_INT) {
1691       __ sllx(value, 32, O3);
1692       __ or3(value, O3, value);
1693     }
1694 
1695     Label L_check_fill_8_bytes;
1696     // Fill 32-byte chunks
1697     __ subcc(count, 8 << shift, count);
1698     __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes);
1699     __ delayed()->nop();
1700 
1701     Label L_fill_32_bytes_loop, L_fill_4_bytes;
1702     __ align(16);
1703     __ BIND(L_fill_32_bytes_loop);
1704 
1705     __ stx(value, to, 0);
1706     __ stx(value, to, 8);
1707     __ stx(value, to, 16);
1708     __ stx(value, to, 24);
1709 
1710     __ subcc(count, 8 << shift, count);
1711     __ brx(Assembler::greaterEqual, false, Assembler::pt, L_fill_32_bytes_loop);
1712     __ delayed()->add(to, 32, to);
1713 
1714     __ BIND(L_check_fill_8_bytes);
1715     __ addcc(count, 8 << shift, count);
1716     __ brx(Assembler::zero, false, Assembler::pn, L_exit);
1717     __ delayed()->subcc(count, 1 << (shift + 1), count);
1718     __ brx(Assembler::less, false, Assembler::pn, L_fill_4_bytes);
1719     __ delayed()->andcc(count, 1<<shift, G0);
1720 
1721     //
1722     // length is too short, just fill 8 bytes at a time
1723     //
1724     Label L_fill_8_bytes_loop;
1725     __ BIND(L_fill_8_bytes_loop);
1726     __ stx(value, to, 0);
1727     __ subcc(count, 1 << (shift + 1), count);
1728     __ brx(Assembler::greaterEqual, false, Assembler::pn, L_fill_8_bytes_loop);
1729     __ delayed()->add(to, 8, to);
1730 
1731     // fill trailing 4 bytes
1732     __ andcc(count, 1<<shift, G0);  // in delay slot of branches
1733     if (t == T_INT) {
1734       __ BIND(L_fill_elements);
1735     }
1736     __ BIND(L_fill_4_bytes);
1737     __ brx(Assembler::zero, false, Assembler::pt, L_fill_2_bytes);
1738     if (t == T_BYTE || t == T_SHORT) {
1739       __ delayed()->andcc(count, 1<<(shift-1), G0);
1740     } else {
1741       __ delayed()->nop();
1742     }
1743     __ stw(value, to, 0);
1744     if (t == T_BYTE || t == T_SHORT) {
1745       __ inc(to, 4);
1746       // fill trailing 2 bytes
1747       __ andcc(count, 1<<(shift-1), G0); // in delay slot of branches
1748       __ BIND(L_fill_2_bytes);
1749       __ brx(Assembler::zero, false, Assembler::pt, L_fill_byte);
1750       __ delayed()->andcc(count, 1, count);
1751       __ sth(value, to, 0);
1752       if (t == T_BYTE) {
1753         __ inc(to, 2);
1754         // fill trailing byte
1755         __ andcc(count, 1, count);  // in delay slot of branches
1756         __ BIND(L_fill_byte);
1757         __ brx(Assembler::zero, false, Assembler::pt, L_exit);
1758         __ delayed()->nop();
1759         __ stb(value, to, 0);
1760       } else {
1761         __ BIND(L_fill_byte);
1762       }
1763     } else {
1764       __ BIND(L_fill_2_bytes);
1765     }
1766     __ BIND(L_exit);
1767     __ retl();
1768     __ delayed()->nop();
1769 
1770     // Handle copies less than 8 bytes.  Int is handled elsewhere.
1771     if (t == T_BYTE) {
1772       __ BIND(L_fill_elements);
1773       Label L_fill_2, L_fill_4;
1774       // in delay slot __ andcc(count, 1, G0);
1775       __ brx(Assembler::zero, false, Assembler::pt, L_fill_2);
1776       __ delayed()->andcc(count, 2, G0);
1777       __ stb(value, to, 0);
1778       __ inc(to, 1);
1779       __ BIND(L_fill_2);
1780       __ brx(Assembler::zero, false, Assembler::pt, L_fill_4);
1781       __ delayed()->andcc(count, 4, G0);
1782       __ stb(value, to, 0);
1783       __ stb(value, to, 1);
1784       __ inc(to, 2);
1785       __ BIND(L_fill_4);
1786       __ brx(Assembler::zero, false, Assembler::pt, L_exit);
1787       __ delayed()->nop();
1788       __ stb(value, to, 0);
1789       __ stb(value, to, 1);
1790       __ stb(value, to, 2);
1791       __ retl();
1792       __ delayed()->stb(value, to, 3);
1793     }
1794     
1795     if (t == T_SHORT) {
1796       Label L_fill_2;
1797       __ BIND(L_fill_elements);
1798       // in delay slot __ andcc(count, 1, G0);
1799       __ brx(Assembler::zero, false, Assembler::pt, L_fill_2);
1800       __ delayed()->andcc(count, 2, G0);
1801       __ sth(value, to, 0);
1802       __ inc(to, 2);
1803       __ BIND(L_fill_2);
1804       __ brx(Assembler::zero, false, Assembler::pt, L_exit);
1805       __ delayed()->nop();
1806       __ sth(value, to, 0);
1807       __ retl();
1808       __ delayed()->sth(value, to, 2);
1809     }
1810     return start;
1811   }
1812 
1813   //
1814   //  Generate stub for conjoint short copy.  If "aligned" is true, the
1815   //  "from" and "to" addresses are assumed to be heapword aligned.
1816   //
1817   // Arguments for generated stub:
1818   //      from:  O0
1819   //      to:    O1
1820   //      count: O2 treated as signed
1821   //
1822   address generate_conjoint_short_copy(bool aligned, const char * name) {
1823     // Do reverse copy.
1824 
1825     __ align(CodeEntryAlignment);
1826     StubCodeMark mark(this, "StubRoutines", name);
1827     address start = __ pc();
1828     address nooverlap_target = aligned ?
1829         StubRoutines::arrayof_jshort_disjoint_arraycopy() :


src/cpu/sparc/vm/stubGenerator_sparc.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File