< prev index next >

src/cpu/x86/vm/assembler_x86.hpp

Print this page




1655   void orq(Register dst, Register src);
1656 
1657   // Pack with unsigned saturation
1658   void packuswb(XMMRegister dst, XMMRegister src);
1659   void packuswb(XMMRegister dst, Address src);
1660   void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1661 
1662   // Pemutation of 64bit words
1663   void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1664   void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1665 
1666   void pause();
1667 
1668   // SSE4.2 string instructions
1669   void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1670   void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1671 
1672   // SSE 4.1 extract
1673   void pextrd(Register dst, XMMRegister src, int imm8);
1674   void pextrq(Register dst, XMMRegister src, int imm8);


1675 
1676   // SSE 4.1 insert
1677   void pinsrd(XMMRegister dst, Register src, int imm8);
1678   void pinsrq(XMMRegister dst, Register src, int imm8);


1679 
1680   // SSE4.1 packed move
1681   void pmovzxbw(XMMRegister dst, XMMRegister src);
1682   void pmovzxbw(XMMRegister dst, Address src);
1683 
1684 #ifndef _LP64 // no 32bit push/pop on amd64
1685   void popl(Address dst);
1686 #endif
1687 
1688 #ifdef _LP64
1689   void popq(Address dst);
1690 #endif
1691 
1692   void popcntl(Register dst, Address src);
1693   void popcntl(Register dst, Register src);
1694 
1695 #ifdef _LP64
1696   void popcntq(Register dst, Address src);
1697   void popcntq(Register dst, Register src);
1698 #endif


1908   //====================VECTOR ARITHMETIC=====================================
1909 
1910   // Add Packed Floating-Point Values
1911   void addpd(XMMRegister dst, XMMRegister src);
1912   void addps(XMMRegister dst, XMMRegister src);
1913   void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1914   void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1915   void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1916   void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1917 
1918   // Subtract Packed Floating-Point Values
1919   void subpd(XMMRegister dst, XMMRegister src);
1920   void subps(XMMRegister dst, XMMRegister src);
1921   void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1922   void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1923   void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1924   void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1925 
1926   // Multiply Packed Floating-Point Values
1927   void mulpd(XMMRegister dst, XMMRegister src);

1928   void mulps(XMMRegister dst, XMMRegister src);
1929   void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1930   void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1931   void vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1932   void vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1933 
1934   // Divide Packed Floating-Point Values
1935   void divpd(XMMRegister dst, XMMRegister src);
1936   void divps(XMMRegister dst, XMMRegister src);
1937   void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1938   void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1939   void vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1940   void vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1941 
1942   // Sqrt Packed Floating-Point Values - Double precision only
1943   void vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len);
1944   void vsqrtpd(XMMRegister dst, Address src, int vector_len);
1945 
1946   // Bitwise Logical AND of Packed Floating-Point Values
1947   void andpd(XMMRegister dst, XMMRegister src);
1948   void andps(XMMRegister dst, XMMRegister src);
1949   void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1950   void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1951   void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1952   void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1953 



1954   // Bitwise Logical XOR of Packed Floating-Point Values
1955   void xorpd(XMMRegister dst, XMMRegister src);
1956   void xorps(XMMRegister dst, XMMRegister src);
1957   void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1958   void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1959   void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1960   void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1961 
1962   // Add horizontal packed integers
1963   void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1964   void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1965   void phaddw(XMMRegister dst, XMMRegister src);
1966   void phaddd(XMMRegister dst, XMMRegister src);
1967 
1968   // Add packed integers
1969   void paddb(XMMRegister dst, XMMRegister src);
1970   void paddw(XMMRegister dst, XMMRegister src);
1971   void paddd(XMMRegister dst, XMMRegister src);
1972   void paddq(XMMRegister dst, XMMRegister src);
1973   void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);


2028   void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2029   void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2030   void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2031   void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2032   void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2033 
2034   // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2035   void psraw(XMMRegister dst, int shift);
2036   void psrad(XMMRegister dst, int shift);
2037   void psraw(XMMRegister dst, XMMRegister shift);
2038   void psrad(XMMRegister dst, XMMRegister shift);
2039   void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2040   void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2041   void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2042   void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2043 
2044   // And packed integers
2045   void pand(XMMRegister dst, XMMRegister src);
2046   void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2047   void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);



2048 
2049   // Or packed integers
2050   void por(XMMRegister dst, XMMRegister src);
2051   void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2052   void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2053 
2054   // Xor packed integers
2055   void pxor(XMMRegister dst, XMMRegister src);
2056   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2057   void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2058 
2059   // Copy low 128bit into high 128bit of YMM registers.
2060   void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2061   void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2062   void vextractf128h(XMMRegister dst, XMMRegister src);
2063   void vextracti128h(XMMRegister dst, XMMRegister src);
2064 
2065   // Load/store high 128bit of YMM registers which does not destroy other half.
2066   void vinsertf128h(XMMRegister dst, Address src);
2067   void vinserti128h(XMMRegister dst, Address src);




1655   void orq(Register dst, Register src);
1656 
1657   // Pack with unsigned saturation
1658   void packuswb(XMMRegister dst, XMMRegister src);
1659   void packuswb(XMMRegister dst, Address src);
1660   void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1661 
1662   // Pemutation of 64bit words
1663   void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1664   void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1665 
1666   void pause();
1667 
1668   // SSE4.2 string instructions
1669   void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1670   void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1671 
1672   // SSE 4.1 extract
1673   void pextrd(Register dst, XMMRegister src, int imm8);
1674   void pextrq(Register dst, XMMRegister src, int imm8);
1675   // SSE 2 extract
1676   void pextrw(Register dst, XMMRegister src, int imm8);
1677 
1678   // SSE 4.1 insert
1679   void pinsrd(XMMRegister dst, Register src, int imm8);
1680   void pinsrq(XMMRegister dst, Register src, int imm8);
1681   // SSE 2 insert
1682   void pinsrw(XMMRegister dst, Register src, int imm8);
1683 
1684   // SSE4.1 packed move
1685   void pmovzxbw(XMMRegister dst, XMMRegister src);
1686   void pmovzxbw(XMMRegister dst, Address src);
1687 
1688 #ifndef _LP64 // no 32bit push/pop on amd64
1689   void popl(Address dst);
1690 #endif
1691 
1692 #ifdef _LP64
1693   void popq(Address dst);
1694 #endif
1695 
1696   void popcntl(Register dst, Address src);
1697   void popcntl(Register dst, Register src);
1698 
1699 #ifdef _LP64
1700   void popcntq(Register dst, Address src);
1701   void popcntq(Register dst, Register src);
1702 #endif


1912   //====================VECTOR ARITHMETIC=====================================
1913 
1914   // Add Packed Floating-Point Values
1915   void addpd(XMMRegister dst, XMMRegister src);
1916   void addps(XMMRegister dst, XMMRegister src);
1917   void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1918   void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1919   void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1920   void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1921 
1922   // Subtract Packed Floating-Point Values
1923   void subpd(XMMRegister dst, XMMRegister src);
1924   void subps(XMMRegister dst, XMMRegister src);
1925   void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1926   void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1927   void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1928   void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1929 
1930   // Multiply Packed Floating-Point Values
1931   void mulpd(XMMRegister dst, XMMRegister src);
1932   void mulpd(XMMRegister dst, Address src);
1933   void mulps(XMMRegister dst, XMMRegister src);
1934   void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1935   void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1936   void vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1937   void vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1938 
1939   // Divide Packed Floating-Point Values
1940   void divpd(XMMRegister dst, XMMRegister src);
1941   void divps(XMMRegister dst, XMMRegister src);
1942   void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1943   void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1944   void vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1945   void vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1946 
1947   // Sqrt Packed Floating-Point Values - Double precision only
1948   void vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len);
1949   void vsqrtpd(XMMRegister dst, Address src, int vector_len);
1950 
1951   // Bitwise Logical AND of Packed Floating-Point Values
1952   void andpd(XMMRegister dst, XMMRegister src);
1953   void andps(XMMRegister dst, XMMRegister src);
1954   void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1955   void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1956   void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1957   void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1958 
1959   void unpckhpd(XMMRegister dst, XMMRegister src);
1960   void unpcklpd(XMMRegister dst, XMMRegister src);
1961 
1962   // Bitwise Logical XOR of Packed Floating-Point Values
1963   void xorpd(XMMRegister dst, XMMRegister src);
1964   void xorps(XMMRegister dst, XMMRegister src);
1965   void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1966   void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1967   void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1968   void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1969 
1970   // Add horizontal packed integers
1971   void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1972   void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1973   void phaddw(XMMRegister dst, XMMRegister src);
1974   void phaddd(XMMRegister dst, XMMRegister src);
1975 
1976   // Add packed integers
1977   void paddb(XMMRegister dst, XMMRegister src);
1978   void paddw(XMMRegister dst, XMMRegister src);
1979   void paddd(XMMRegister dst, XMMRegister src);
1980   void paddq(XMMRegister dst, XMMRegister src);
1981   void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);


2036   void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2037   void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2038   void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2039   void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2040   void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2041 
2042   // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2043   void psraw(XMMRegister dst, int shift);
2044   void psrad(XMMRegister dst, int shift);
2045   void psraw(XMMRegister dst, XMMRegister shift);
2046   void psrad(XMMRegister dst, XMMRegister shift);
2047   void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2048   void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2049   void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2050   void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2051 
2052   // And packed integers
2053   void pand(XMMRegister dst, XMMRegister src);
2054   void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2055   void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2056 
2057   // Andn packed integers
2058   void pandn(XMMRegister dst, XMMRegister src);
2059 
2060   // Or packed integers
2061   void por(XMMRegister dst, XMMRegister src);
2062   void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2063   void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2064 
2065   // Xor packed integers
2066   void pxor(XMMRegister dst, XMMRegister src);
2067   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2068   void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2069 
2070   // Copy low 128bit into high 128bit of YMM registers.
2071   void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2072   void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2073   void vextractf128h(XMMRegister dst, XMMRegister src);
2074   void vextracti128h(XMMRegister dst, XMMRegister src);
2075 
2076   // Load/store high 128bit of YMM registers which does not destroy other half.
2077   void vinsertf128h(XMMRegister dst, Address src);
2078   void vinserti128h(XMMRegister dst, Address src);


< prev index next >