644 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
645 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, legacy_mode, no_mask_reg);
646 }
647
648 void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
649 VexSimdPrefix pre, int vector_len = AVX_128bit,
650 bool no_mask_reg = false) {
651 int dst_enc = dst->encoding();
652 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
653 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
654 }
655
656 void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) {
657 bool vex_w = false;
658 int vector_len = AVX_128bit;
659 vex_prefix(src, nds->encoding(), dst->encoding(),
660 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
661 vector_len, no_mask_reg);
662 }
663
664 void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) {
665 bool vex_w = true;
666 int vector_len = AVX_128bit;
667 vex_prefix(src, nds->encoding(), dst->encoding(),
668 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
669 vector_len, no_mask_reg);
670 }
671 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
672 VexSimdPrefix pre, VexOpcode opc,
673 bool vex_w, int vector_len,
674 bool legacy_mode, bool no_mask_reg);
675
676 int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) {
677 bool vex_w = false;
678 int vector_len = AVX_128bit;
679 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
680 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
681 false, no_mask_reg);
682 }
683 int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) {
684 bool vex_w = true;
685 int vector_len = AVX_128bit;
686 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
687 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
688 false, no_mask_reg);
689 }
690 int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
691 VexSimdPrefix pre, int vector_len = AVX_128bit,
692 VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false,
693 bool no_mask_reg = false) {
694 int src_enc = src->encoding();
695 int dst_enc = dst->encoding();
696 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
697 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg);
698 }
699
700 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
701 VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F,
702 bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false);
703
704 void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre,
705 bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) {
706 simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc);
707 }
708
709 void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
2004 void vinserti128h(XMMRegister dst, Address src);
2005 void vextractf128h(Address dst, XMMRegister src);
2006 void vextracti128h(Address dst, XMMRegister src);
2007
2008 // Copy low 256bit into high 256bit of ZMM registers.
2009 void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2010 void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2011 void vextracti64x4h(XMMRegister dst, XMMRegister src);
2012 void vextractf64x4h(XMMRegister dst, XMMRegister src);
2013 void vextractf64x4h(Address dst, XMMRegister src);
2014 void vinsertf64x4h(XMMRegister dst, Address src);
2015
2016 // Copy targeted 128bit segments of the ZMM registers
2017 void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
2018 void vextractf64x2h(XMMRegister dst, XMMRegister src, int value);
2019 void vextractf32x4h(XMMRegister dst, XMMRegister src, int value);
2020
2021 // duplicate 4-bytes integer data from src into 8 locations in dest
2022 void vpbroadcastd(XMMRegister dst, XMMRegister src);
2023
2024 // duplicate 4-bytes integer data from src into vector_len locations in dest
2025 void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2026
2027 // Carry-Less Multiplication Quadword
2028 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2029 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2030
2031 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2032 // to avoid transaction penalty between AVX and SSE states. There is no
2033 // penalty if legacy SSE instructions are encoded using VEX prefix because
2034 // they always clear upper 128 bits. It should be used before calling
2035 // runtime code and native libraries.
2036 void vzeroupper();
2037
2038 protected:
2039 // Next instructions require address alignment 16 bytes SSE mode.
2040 // They should be called only from corresponding MacroAssembler instructions.
2041 void andpd(XMMRegister dst, Address src);
2042 void andps(XMMRegister dst, Address src);
2043 void xorpd(XMMRegister dst, Address src);
2044 void xorps(XMMRegister dst, Address src);
2045
|
644 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
645 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, legacy_mode, no_mask_reg);
646 }
647
648 void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
649 VexSimdPrefix pre, int vector_len = AVX_128bit,
650 bool no_mask_reg = false) {
651 int dst_enc = dst->encoding();
652 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
653 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
654 }
655
656 void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) {
657 bool vex_w = false;
658 int vector_len = AVX_128bit;
659 vex_prefix(src, nds->encoding(), dst->encoding(),
660 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
661 vector_len, no_mask_reg);
662 }
663
664 void vex_prefix_0F38_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
665 bool vex_w = false;
666 int vector_len = AVX_128bit;
667 vex_prefix(src, nds->encoding(), dst->encoding(),
668 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
669 vector_len, true, no_mask_reg);
670 }
671
672 void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) {
673 bool vex_w = true;
674 int vector_len = AVX_128bit;
675 vex_prefix(src, nds->encoding(), dst->encoding(),
676 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
677 vector_len, no_mask_reg);
678 }
679
680 void vex_prefix_0F38_q_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
681 bool vex_w = true;
682 int vector_len = AVX_128bit;
683 vex_prefix(src, nds->encoding(), dst->encoding(),
684 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
685 vector_len, true, no_mask_reg);
686 }
687
688 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
689 VexSimdPrefix pre, VexOpcode opc,
690 bool vex_w, int vector_len,
691 bool legacy_mode, bool no_mask_reg);
692
693 int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) {
694 bool vex_w = false;
695 int vector_len = AVX_128bit;
696 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
697 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
698 false, no_mask_reg);
699 }
700
701 int vex_prefix_0F38_and_encode_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
702 bool vex_w = false;
703 int vector_len = AVX_128bit;
704 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
705 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
706 true, no_mask_reg);
707 }
708
709 int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) {
710 bool vex_w = true;
711 int vector_len = AVX_128bit;
712 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
713 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
714 false, no_mask_reg);
715 }
716
717 int vex_prefix_0F38_and_encode_q_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
718 bool vex_w = true;
719 int vector_len = AVX_128bit;
720 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
721 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
722 true, no_mask_reg);
723 }
724
725 int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
726 VexSimdPrefix pre, int vector_len = AVX_128bit,
727 VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false,
728 bool no_mask_reg = false) {
729 int src_enc = src->encoding();
730 int dst_enc = dst->encoding();
731 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
732 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg);
733 }
734
735 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
736 VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F,
737 bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false);
738
739 void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre,
740 bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) {
741 simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc);
742 }
743
744 void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
2039 void vinserti128h(XMMRegister dst, Address src);
2040 void vextractf128h(Address dst, XMMRegister src);
2041 void vextracti128h(Address dst, XMMRegister src);
2042
2043 // Copy low 256bit into high 256bit of ZMM registers.
2044 void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2045 void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2046 void vextracti64x4h(XMMRegister dst, XMMRegister src);
2047 void vextractf64x4h(XMMRegister dst, XMMRegister src);
2048 void vextractf64x4h(Address dst, XMMRegister src);
2049 void vinsertf64x4h(XMMRegister dst, Address src);
2050
2051 // Copy targeted 128bit segments of the ZMM registers
2052 void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
2053 void vextractf64x2h(XMMRegister dst, XMMRegister src, int value);
2054 void vextractf32x4h(XMMRegister dst, XMMRegister src, int value);
2055
2056 // duplicate 4-bytes integer data from src into 8 locations in dest
2057 void vpbroadcastd(XMMRegister dst, XMMRegister src);
2058
2059 // duplicate n-bytes integer data from src into vector_len locations in dest
2060 void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
2061 void evpbroadcastb(XMMRegister dst, Address src, int vector_len);
2062 void evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
2063 void evpbroadcastw(XMMRegister dst, Address src, int vector_len);
2064 void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2065 void evpbroadcastd(XMMRegister dst, Address src, int vector_len);
2066 void evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
2067 void evpbroadcastq(XMMRegister dst, Address src, int vector_len);
2068
2069 void evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
2070 void evpbroadcastss(XMMRegister dst, Address src, int vector_len);
2071 void evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
2072 void evpbroadcastsd(XMMRegister dst, Address src, int vector_len);
2073
2074 void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2075 void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2076 void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2077 void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
2078
2079 // Carry-Less Multiplication Quadword
2080 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2081 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2082
2083 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2084 // to avoid transaction penalty between AVX and SSE states. There is no
2085 // penalty if legacy SSE instructions are encoded using VEX prefix because
2086 // they always clear upper 128 bits. It should be used before calling
2087 // runtime code and native libraries.
2088 void vzeroupper();
2089
2090 protected:
2091 // Next instructions require address alignment 16 bytes SSE mode.
2092 // They should be called only from corresponding MacroAssembler instructions.
2093 void andpd(XMMRegister dst, Address src);
2094 void andps(XMMRegister dst, Address src);
2095 void xorpd(XMMRegister dst, Address src);
2096 void xorps(XMMRegister dst, Address src);
2097
|