1760 void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1761 void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1762 void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1763
1764 // Bitwise Logical AND of Packed Floating-Point Values
1765 void andpd(XMMRegister dst, XMMRegister src);
1766 void andps(XMMRegister dst, XMMRegister src);
1767 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1768 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1769 void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1770 void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1771
1772 // Bitwise Logical XOR of Packed Floating-Point Values
1773 void xorpd(XMMRegister dst, XMMRegister src);
1774 void xorps(XMMRegister dst, XMMRegister src);
1775 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1776 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1777 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1778 void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1779
1780 // Add packed integers
1781 void paddb(XMMRegister dst, XMMRegister src);
1782 void paddw(XMMRegister dst, XMMRegister src);
1783 void paddd(XMMRegister dst, XMMRegister src);
1784 void paddq(XMMRegister dst, XMMRegister src);
1785 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1786 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1787 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1788 void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1789 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1790 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1791 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1792 void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1793
1794 // Sub packed integers
1795 void psubb(XMMRegister dst, XMMRegister src);
1796 void psubw(XMMRegister dst, XMMRegister src);
1797 void psubd(XMMRegister dst, XMMRegister src);
1798 void psubq(XMMRegister dst, XMMRegister src);
1799 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1852 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1853
1854 // And packed integers
1855 void pand(XMMRegister dst, XMMRegister src);
1856 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1857 void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1858
1859 // Or packed integers
1860 void por(XMMRegister dst, XMMRegister src);
1861 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1862 void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1863
1864 // Xor packed integers
1865 void pxor(XMMRegister dst, XMMRegister src);
1866 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1867 void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1868
1869 // Copy low 128bit into high 128bit of YMM registers.
1870 void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1871 void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1872
1873 // Load/store high 128bit of YMM registers which does not destroy other half.
1874 void vinsertf128h(XMMRegister dst, Address src);
1875 void vinserti128h(XMMRegister dst, Address src);
1876 void vextractf128h(Address dst, XMMRegister src);
1877 void vextracti128h(Address dst, XMMRegister src);
1878
1879 // duplicate 4-bytes integer data from src into 8 locations in dest
1880 void vpbroadcastd(XMMRegister dst, XMMRegister src);
1881
1882 // Carry-Less Multiplication Quadword
1883 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
1884 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
1885
1886 // AVX instruction which is used to clear upper 128 bits of YMM registers and
1887 // to avoid transaction penalty between AVX and SSE states. There is no
1888 // penalty if legacy SSE instructions are encoded using VEX prefix because
1889 // they always clear upper 128 bits. It should be used before calling
1890 // runtime code and native libraries.
1891 void vzeroupper();
|
1760 void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1761 void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1762 void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1763
1764 // Bitwise Logical AND of Packed Floating-Point Values
1765 void andpd(XMMRegister dst, XMMRegister src);
1766 void andps(XMMRegister dst, XMMRegister src);
1767 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1768 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1769 void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1770 void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1771
1772 // Bitwise Logical XOR of Packed Floating-Point Values
1773 void xorpd(XMMRegister dst, XMMRegister src);
1774 void xorps(XMMRegister dst, XMMRegister src);
1775 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1776 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1777 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1778 void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1779
1780 // Add horizontal packed integers
1781 void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1782 void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1783 void phaddw(XMMRegister dst, XMMRegister src);
1784 void phaddd(XMMRegister dst, XMMRegister src);
1785
1786 // Add packed integers
1787 void paddb(XMMRegister dst, XMMRegister src);
1788 void paddw(XMMRegister dst, XMMRegister src);
1789 void paddd(XMMRegister dst, XMMRegister src);
1790 void paddq(XMMRegister dst, XMMRegister src);
1791 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1792 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1793 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1794 void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1795 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1796 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1797 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1798 void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1799
1800 // Sub packed integers
1801 void psubb(XMMRegister dst, XMMRegister src);
1802 void psubw(XMMRegister dst, XMMRegister src);
1803 void psubd(XMMRegister dst, XMMRegister src);
1804 void psubq(XMMRegister dst, XMMRegister src);
1805 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1858 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1859
1860 // And packed integers
1861 void pand(XMMRegister dst, XMMRegister src);
1862 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1863 void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1864
1865 // Or packed integers
1866 void por(XMMRegister dst, XMMRegister src);
1867 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1868 void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1869
1870 // Xor packed integers
1871 void pxor(XMMRegister dst, XMMRegister src);
1872 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1873 void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1874
1875 // Copy low 128bit into high 128bit of YMM registers.
1876 void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1877 void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1878 void vextractf128h(XMMRegister dst, XMMRegister src);
1879
1880 // Load/store high 128bit of YMM registers which does not destroy other half.
1881 void vinsertf128h(XMMRegister dst, Address src);
1882 void vinserti128h(XMMRegister dst, Address src);
1883 void vextractf128h(Address dst, XMMRegister src);
1884 void vextracti128h(Address dst, XMMRegister src);
1885
1886 // duplicate 4-bytes integer data from src into 8 locations in dest
1887 void vpbroadcastd(XMMRegister dst, XMMRegister src);
1888
1889 // Carry-Less Multiplication Quadword
1890 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
1891 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
1892
1893 // AVX instruction which is used to clear upper 128 bits of YMM registers and
1894 // to avoid transaction penalty between AVX and SSE states. There is no
1895 // penalty if legacy SSE instructions are encoded using VEX prefix because
1896 // they always clear upper 128 bits. It should be used before calling
1897 // runtime code and native libraries.
1898 void vzeroupper();
|