1941 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); 1942 1943 // And packed integers 1944 void pand(XMMRegister dst, XMMRegister src); 1945 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1946 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len); 1947 1948 // Andn packed integers 1949 void pandn(XMMRegister dst, XMMRegister src); 1950 1951 // Or packed integers 1952 void por(XMMRegister dst, XMMRegister src); 1953 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1954 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len); 1955 1956 // Xor packed integers 1957 void pxor(XMMRegister dst, XMMRegister src); 1958 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1959 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len); 1960 1961 // Copy low 128bit into high 128bit of YMM registers. 1962 void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src); 1963 void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src); 1964 void vextractf128h(XMMRegister dst, XMMRegister src); 1965 void vextracti128h(XMMRegister dst, XMMRegister src); 1966 1967 // Load/store high 128bit of YMM registers which does not destroy other half. 1968 void vinsertf128h(XMMRegister dst, Address src); 1969 void vinserti128h(XMMRegister dst, Address src); 1970 void vextractf128h(Address dst, XMMRegister src); 1971 void vextracti128h(Address dst, XMMRegister src); 1972 1973 // Copy low 256bit into high 256bit of ZMM registers. 1974 void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value); 1975 void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value); 1976 void vextracti64x4h(XMMRegister dst, XMMRegister src, int value); 1977 void vextractf64x4h(XMMRegister dst, XMMRegister src, int value); 1978 void vextractf64x4h(Address dst, XMMRegister src, int value); 1979 void vinsertf64x4h(XMMRegister dst, Address src, int value); 1980 1981 // Copy targeted 128bit segments of the ZMM registers 1982 void vextracti64x2h(XMMRegister dst, XMMRegister src, int value); 1983 void vextractf64x2h(XMMRegister dst, XMMRegister src, int value); 1984 void vextractf32x4h(XMMRegister dst, XMMRegister src, int value); 1985 void vextractf32x4h(Address dst, XMMRegister src, int value); 1986 void vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value); 1987 void vinsertf32x4h(XMMRegister dst, Address src, int value); 1988 1989 // duplicate 4-bytes integer data from src into 8 locations in dest 1990 void vpbroadcastd(XMMRegister dst, XMMRegister src); 1991 1992 // duplicate 2-bytes integer data from src into 16 locations in dest 1993 void vpbroadcastw(XMMRegister dst, XMMRegister src); 1994 1995 // duplicate n-bytes integer data from src into vector_len locations in dest 1996 void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len); 1997 void evpbroadcastb(XMMRegister dst, Address src, int vector_len); 1998 void evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len); 1999 void evpbroadcastw(XMMRegister dst, Address src, int vector_len); 2000 void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len); 2001 void evpbroadcastd(XMMRegister dst, Address src, int vector_len); 2002 void evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len); 2003 void evpbroadcastq(XMMRegister dst, Address src, int vector_len); 2004 2005 void evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len); 2006 void evpbroadcastss(XMMRegister dst, Address src, int vector_len); 2007 void evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len); | 1941 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); 1942 1943 // And packed integers 1944 void pand(XMMRegister dst, XMMRegister src); 1945 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1946 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len); 1947 1948 // Andn packed integers 1949 void pandn(XMMRegister dst, XMMRegister src); 1950 1951 // Or packed integers 1952 void por(XMMRegister dst, XMMRegister src); 1953 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1954 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len); 1955 1956 // Xor packed integers 1957 void pxor(XMMRegister dst, XMMRegister src); 1958 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); 1959 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len); 1960 1961 // 128bit copy from/to 256bit (YMM) vector registers 1962 void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8); 1963 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8); 1964 void vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8); 1965 void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8); 1966 void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8); 1967 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8); 1968 void vextractf128(Address dst, XMMRegister src, uint8_t imm8); 1969 void vextracti128(Address dst, XMMRegister src, uint8_t imm8); 1970 1971 // 256bit copy from/to 512bit (ZMM) vector registers 1972 void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8); 1973 void vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8); 1974 void vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8); 1975 void vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8); 1976 void vextractf64x4(Address dst, XMMRegister src, uint8_t imm8); 1977 void vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8); 1978 1979 // 128bit copy from/to 256bit (YMM) or 512bit (ZMM) vector registers 1980 void vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8); 1981 void vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8); 1982 void vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8); 1983 void vextractf32x4(Address dst, XMMRegister src, uint8_t imm8); 1984 void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8); 1985 void vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8); 1986 1987 // duplicate 4-bytes integer data from src into 8 locations in dest 1988 void vpbroadcastd(XMMRegister dst, XMMRegister src); 1989 1990 // duplicate 2-bytes integer data from src into 16 locations in dest 1991 void vpbroadcastw(XMMRegister dst, XMMRegister src); 1992 1993 // duplicate n-bytes integer data from src into vector_len locations in dest 1994 void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len); 1995 void evpbroadcastb(XMMRegister dst, Address src, int vector_len); 1996 void evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len); 1997 void evpbroadcastw(XMMRegister dst, Address src, int vector_len); 1998 void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len); 1999 void evpbroadcastd(XMMRegister dst, Address src, int vector_len); 2000 void evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len); 2001 void evpbroadcastq(XMMRegister dst, Address src, int vector_len); 2002 2003 void evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len); 2004 void evpbroadcastss(XMMRegister dst, Address src, int vector_len); 2005 void evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len); |