2004 void vinserti128h(XMMRegister dst, Address src);
2005 void vextractf128h(Address dst, XMMRegister src);
2006 void vextracti128h(Address dst, XMMRegister src);
2007
2008 // Copy low 256bit into high 256bit of ZMM registers.
2009 void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2010 void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2011 void vextracti64x4h(XMMRegister dst, XMMRegister src);
2012 void vextractf64x4h(XMMRegister dst, XMMRegister src);
2013 void vextractf64x4h(Address dst, XMMRegister src);
2014 void vinsertf64x4h(XMMRegister dst, Address src);
2015
2016 // Copy targeted 128bit segments of the ZMM registers
2017 void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
2018 void vextractf64x2h(XMMRegister dst, XMMRegister src, int value);
2019 void vextractf32x4h(XMMRegister dst, XMMRegister src, int value);
2020
2021 // duplicate 4-bytes integer data from src into 8 locations in dest
2022 void vpbroadcastd(XMMRegister dst, XMMRegister src);
2023
2024 // duplicate 4-bytes integer data from src into vector_len locations in dest
2025 void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2026
2027 // Carry-Less Multiplication Quadword
2028 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2029 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2030
2031 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2032 // to avoid transaction penalty between AVX and SSE states. There is no
2033 // penalty if legacy SSE instructions are encoded using VEX prefix because
2034 // they always clear upper 128 bits. It should be used before calling
2035 // runtime code and native libraries.
2036 void vzeroupper();
2037
2038 protected:
2039 // Next instructions require address alignment 16 bytes SSE mode.
2040 // They should be called only from corresponding MacroAssembler instructions.
2041 void andpd(XMMRegister dst, Address src);
2042 void andps(XMMRegister dst, Address src);
2043 void xorpd(XMMRegister dst, Address src);
2044 void xorps(XMMRegister dst, Address src);
2045
|
2004 void vinserti128h(XMMRegister dst, Address src);
2005 void vextractf128h(Address dst, XMMRegister src);
2006 void vextracti128h(Address dst, XMMRegister src);
2007
2008 // Copy low 256bit into high 256bit of ZMM registers.
2009 void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2010 void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2011 void vextracti64x4h(XMMRegister dst, XMMRegister src);
2012 void vextractf64x4h(XMMRegister dst, XMMRegister src);
2013 void vextractf64x4h(Address dst, XMMRegister src);
2014 void vinsertf64x4h(XMMRegister dst, Address src);
2015
2016 // Copy targeted 128bit segments of the ZMM registers
2017 void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
2018 void vextractf64x2h(XMMRegister dst, XMMRegister src, int value);
2019 void vextractf32x4h(XMMRegister dst, XMMRegister src, int value);
2020
2021 // duplicate 4-bytes integer data from src into 8 locations in dest
2022 void vpbroadcastd(XMMRegister dst, XMMRegister src);
2023
2024 // duplicate n-bytes integer data from src into vector_len locations in dest
2025 void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
2026 void evpbroadcastb(XMMRegister dst, Address src, int vector_len);
2027 void evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
2028 void evpbroadcastw(XMMRegister dst, Address src, int vector_len);
2029 void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2030 void evpbroadcastd(XMMRegister dst, Address src, int vector_len);
2031 void evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
2032 void evpbroadcastq(XMMRegister dst, Address src, int vector_len);
2033
2034 void evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
2035 void evpbroadcastss(XMMRegister dst, Address src, int vector_len);
2036 void evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
2037 void evpbroadcastsd(XMMRegister dst, Address src, int vector_len);
2038
2039 void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2040 void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2041 void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2042 void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
2043
2044 // Carry-Less Multiplication Quadword
2045 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2046 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2047
2048 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2049 // to avoid transaction penalty between AVX and SSE states. There is no
2050 // penalty if legacy SSE instructions are encoded using VEX prefix because
2051 // they always clear upper 128 bits. It should be used before calling
2052 // runtime code and native libraries.
2053 void vzeroupper();
2054
2055 protected:
2056 // Next instructions require address alignment 16 bytes SSE mode.
2057 // They should be called only from corresponding MacroAssembler instructions.
2058 void andpd(XMMRegister dst, Address src);
2059 void andps(XMMRegister dst, Address src);
2060 void xorpd(XMMRegister dst, Address src);
2061 void xorps(XMMRegister dst, Address src);
2062
|