< prev index next >

src/cpu/x86/vm/assembler_x86.hpp

Print this page




1636   void orq(Register dst, Address src);
1637   void orq(Register dst, Register src);
1638 
1639   // Pack with unsigned saturation
1640   void packuswb(XMMRegister dst, XMMRegister src);
1641   void packuswb(XMMRegister dst, Address src);
1642   void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1643 
1644   // Pemutation of 64bit words
1645   void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1646 
1647   void pause();
1648 
1649   // SSE4.2 string instructions
1650   void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1651   void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1652 
1653   // SSE 4.1 extract
1654   void pextrd(Register dst, XMMRegister src, int imm8);
1655   void pextrq(Register dst, XMMRegister src, int imm8);


1656 
1657   // SSE 4.1 insert
1658   void pinsrd(XMMRegister dst, Register src, int imm8);
1659   void pinsrq(XMMRegister dst, Register src, int imm8);


1660 
1661   // SSE4.1 packed move
1662   void pmovzxbw(XMMRegister dst, XMMRegister src);
1663   void pmovzxbw(XMMRegister dst, Address src);
1664 
1665 #ifndef _LP64 // no 32bit push/pop on amd64
1666   void popl(Address dst);
1667 #endif
1668 
1669 #ifdef _LP64
1670   void popq(Address dst);
1671 #endif
1672 
1673   void popcntl(Register dst, Address src);
1674   void popcntl(Register dst, Register src);
1675 
1676 #ifdef _LP64
1677   void popcntq(Register dst, Address src);
1678   void popcntq(Register dst, Register src);
1679 #endif


1889   //====================VECTOR ARITHMETIC=====================================
1890 
1891   // Add Packed Floating-Point Values
1892   void addpd(XMMRegister dst, XMMRegister src);
1893   void addps(XMMRegister dst, XMMRegister src);
1894   void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1895   void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1896   void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1897   void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1898 
1899   // Subtract Packed Floating-Point Values
1900   void subpd(XMMRegister dst, XMMRegister src);
1901   void subps(XMMRegister dst, XMMRegister src);
1902   void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1903   void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1904   void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1905   void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1906 
1907   // Multiply Packed Floating-Point Values
1908   void mulpd(XMMRegister dst, XMMRegister src);

1909   void mulps(XMMRegister dst, XMMRegister src);
1910   void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1911   void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1912   void vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1913   void vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1914 
1915   // Divide Packed Floating-Point Values
1916   void divpd(XMMRegister dst, XMMRegister src);
1917   void divps(XMMRegister dst, XMMRegister src);
1918   void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1919   void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1920   void vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1921   void vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1922 
1923   // Bitwise Logical AND of Packed Floating-Point Values
1924   void andpd(XMMRegister dst, XMMRegister src);
1925   void andps(XMMRegister dst, XMMRegister src);
1926   void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1927   void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1928   void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1929   void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1930 



1931   // Bitwise Logical XOR of Packed Floating-Point Values
1932   void xorpd(XMMRegister dst, XMMRegister src);
1933   void xorps(XMMRegister dst, XMMRegister src);
1934   void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1935   void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1936   void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1937   void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1938 
1939   // Add horizontal packed integers
1940   void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1941   void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1942   void phaddw(XMMRegister dst, XMMRegister src);
1943   void phaddd(XMMRegister dst, XMMRegister src);
1944 
1945   // Add packed integers
1946   void paddb(XMMRegister dst, XMMRegister src);
1947   void paddw(XMMRegister dst, XMMRegister src);
1948   void paddd(XMMRegister dst, XMMRegister src);
1949   void paddq(XMMRegister dst, XMMRegister src);
1950   void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);


2003   void psrlq(XMMRegister dst, XMMRegister shift);
2004   void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2005   void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2006   void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2007   void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2008   void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2009   void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2010 
2011   // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2012   void psraw(XMMRegister dst, int shift);
2013   void psrad(XMMRegister dst, int shift);
2014   void psraw(XMMRegister dst, XMMRegister shift);
2015   void psrad(XMMRegister dst, XMMRegister shift);
2016   void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2017   void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2018   void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2019   void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2020 
2021   // And packed integers
2022   void pand(XMMRegister dst, XMMRegister src);

2023   void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2024   void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2025 
2026   // Or packed integers
2027   void por(XMMRegister dst, XMMRegister src);
2028   void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2029   void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2030 
2031   // Xor packed integers
2032   void pxor(XMMRegister dst, XMMRegister src);
2033   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2034   void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2035 
2036   // Copy low 128bit into high 128bit of YMM registers.
2037   void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2038   void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2039   void vextractf128h(XMMRegister dst, XMMRegister src);
2040   void vextracti128h(XMMRegister dst, XMMRegister src);
2041 
2042   // Load/store high 128bit of YMM registers which does not destroy other half.




1636   void orq(Register dst, Address src);
1637   void orq(Register dst, Register src);
1638 
1639   // Pack with unsigned saturation
1640   void packuswb(XMMRegister dst, XMMRegister src);
1641   void packuswb(XMMRegister dst, Address src);
1642   void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1643 
1644   // Pemutation of 64bit words
1645   void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1646 
1647   void pause();
1648 
1649   // SSE4.2 string instructions
1650   void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1651   void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1652 
1653   // SSE 4.1 extract
1654   void pextrd(Register dst, XMMRegister src, int imm8);
1655   void pextrq(Register dst, XMMRegister src, int imm8);
1656   // SSE 2 extract
1657   void pextrw(Register dst, XMMRegister src, int imm8);
1658 
1659   // SSE 4.1 insert
1660   void pinsrd(XMMRegister dst, Register src, int imm8);
1661   void pinsrq(XMMRegister dst, Register src, int imm8);
1662   // SSE 2 insert
1663   void pinsrw(XMMRegister dst, Register src, int imm8);
1664 
1665   // SSE4.1 packed move
1666   void pmovzxbw(XMMRegister dst, XMMRegister src);
1667   void pmovzxbw(XMMRegister dst, Address src);
1668 
1669 #ifndef _LP64 // no 32bit push/pop on amd64
1670   void popl(Address dst);
1671 #endif
1672 
1673 #ifdef _LP64
1674   void popq(Address dst);
1675 #endif
1676 
1677   void popcntl(Register dst, Address src);
1678   void popcntl(Register dst, Register src);
1679 
1680 #ifdef _LP64
1681   void popcntq(Register dst, Address src);
1682   void popcntq(Register dst, Register src);
1683 #endif


1893   //====================VECTOR ARITHMETIC=====================================
1894 
1895   // Add Packed Floating-Point Values
1896   void addpd(XMMRegister dst, XMMRegister src);
1897   void addps(XMMRegister dst, XMMRegister src);
1898   void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1899   void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1900   void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1901   void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1902 
1903   // Subtract Packed Floating-Point Values
1904   void subpd(XMMRegister dst, XMMRegister src);
1905   void subps(XMMRegister dst, XMMRegister src);
1906   void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1907   void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1908   void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1909   void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1910 
1911   // Multiply Packed Floating-Point Values
1912   void mulpd(XMMRegister dst, XMMRegister src);
1913   void mulpd(XMMRegister dst, Address src);
1914   void mulps(XMMRegister dst, XMMRegister src);
1915   void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1916   void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1917   void vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1918   void vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1919 
1920   // Divide Packed Floating-Point Values
1921   void divpd(XMMRegister dst, XMMRegister src);
1922   void divps(XMMRegister dst, XMMRegister src);
1923   void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1924   void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1925   void vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1926   void vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1927 
1928   // Bitwise Logical AND of Packed Floating-Point Values
1929   void andpd(XMMRegister dst, XMMRegister src);
1930   void andps(XMMRegister dst, XMMRegister src);
1931   void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1932   void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1933   void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1934   void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1935 
1936   void unpckhpd(XMMRegister dst, XMMRegister src);
1937   void unpcklpd(XMMRegister dst, XMMRegister src);
1938 
1939   // Bitwise Logical XOR of Packed Floating-Point Values
1940   void xorpd(XMMRegister dst, XMMRegister src);
1941   void xorps(XMMRegister dst, XMMRegister src);
1942   void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1943   void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1944   void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1945   void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1946 
1947   // Add horizontal packed integers
1948   void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1949   void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1950   void phaddw(XMMRegister dst, XMMRegister src);
1951   void phaddd(XMMRegister dst, XMMRegister src);
1952 
1953   // Add packed integers
1954   void paddb(XMMRegister dst, XMMRegister src);
1955   void paddw(XMMRegister dst, XMMRegister src);
1956   void paddd(XMMRegister dst, XMMRegister src);
1957   void paddq(XMMRegister dst, XMMRegister src);
1958   void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);


2011   void psrlq(XMMRegister dst, XMMRegister shift);
2012   void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2013   void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2014   void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2015   void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2016   void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2017   void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2018 
2019   // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2020   void psraw(XMMRegister dst, int shift);
2021   void psrad(XMMRegister dst, int shift);
2022   void psraw(XMMRegister dst, XMMRegister shift);
2023   void psrad(XMMRegister dst, XMMRegister shift);
2024   void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2025   void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2026   void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2027   void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2028 
2029   // And packed integers
2030   void pand(XMMRegister dst, XMMRegister src);
2031   void pandn(XMMRegister dst, XMMRegister src);
2032   void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2033   void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2034 
2035   // Or packed integers
2036   void por(XMMRegister dst, XMMRegister src);
2037   void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2038   void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2039 
2040   // Xor packed integers
2041   void pxor(XMMRegister dst, XMMRegister src);
2042   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2043   void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2044 
2045   // Copy low 128bit into high 128bit of YMM registers.
2046   void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2047   void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2048   void vextractf128h(XMMRegister dst, XMMRegister src);
2049   void vextracti128h(XMMRegister dst, XMMRegister src);
2050 
2051   // Load/store high 128bit of YMM registers which does not destroy other half.


< prev index next >