644 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
645 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, legacy_mode, no_mask_reg);
646 }
647
648 void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
649 VexSimdPrefix pre, int vector_len = AVX_128bit,
650 bool no_mask_reg = false) {
651 int dst_enc = dst->encoding();
652 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
653 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
654 }
655
656 void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) {
657 bool vex_w = false;
658 int vector_len = AVX_128bit;
659 vex_prefix(src, nds->encoding(), dst->encoding(),
660 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
661 vector_len, no_mask_reg);
662 }
663
664 void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) {
665 bool vex_w = true;
666 int vector_len = AVX_128bit;
667 vex_prefix(src, nds->encoding(), dst->encoding(),
668 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
669 vector_len, no_mask_reg);
670 }
671 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
672 VexSimdPrefix pre, VexOpcode opc,
673 bool vex_w, int vector_len,
674 bool legacy_mode, bool no_mask_reg);
675
676 int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) {
677 bool vex_w = false;
678 int vector_len = AVX_128bit;
679 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
680 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
681 false, no_mask_reg);
682 }
683 int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) {
684 bool vex_w = true;
685 int vector_len = AVX_128bit;
686 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
687 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
688 false, no_mask_reg);
689 }
690 int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
691 VexSimdPrefix pre, int vector_len = AVX_128bit,
692 VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false,
693 bool no_mask_reg = false) {
694 int src_enc = src->encoding();
695 int dst_enc = dst->encoding();
696 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
697 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg);
698 }
699
700 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
701 VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F,
702 bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false);
703
704 void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre,
705 bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) {
706 simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc);
707 }
708
709 void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
2009 void vinserti128h(XMMRegister dst, Address src);
2010 void vextractf128h(Address dst, XMMRegister src);
2011 void vextracti128h(Address dst, XMMRegister src);
2012
2013 // Copy low 256bit into high 256bit of ZMM registers.
2014 void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2015 void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2016 void vextracti64x4h(XMMRegister dst, XMMRegister src);
2017 void vextractf64x4h(XMMRegister dst, XMMRegister src);
2018 void vextractf64x4h(Address dst, XMMRegister src);
2019 void vinsertf64x4h(XMMRegister dst, Address src);
2020
2021 // Copy targeted 128bit segments of the ZMM registers
2022 void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
2023 void vextractf64x2h(XMMRegister dst, XMMRegister src, int value);
2024 void vextractf32x4h(XMMRegister dst, XMMRegister src, int value);
2025
2026 // duplicate 4-bytes integer data from src into 8 locations in dest
2027 void vpbroadcastd(XMMRegister dst, XMMRegister src);
2028
2029 // duplicate 4-bytes integer data from src into vector_len locations in dest
2030 void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2031
2032 // Carry-Less Multiplication Quadword
2033 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2034 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2035
2036 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2037 // to avoid transaction penalty between AVX and SSE states. There is no
2038 // penalty if legacy SSE instructions are encoded using VEX prefix because
2039 // they always clear upper 128 bits. It should be used before calling
2040 // runtime code and native libraries.
2041 void vzeroupper();
2042
2043 protected:
2044 // Next instructions require address alignment 16 bytes SSE mode.
2045 // They should be called only from corresponding MacroAssembler instructions.
2046 void andpd(XMMRegister dst, Address src);
2047 void andps(XMMRegister dst, Address src);
2048 void xorpd(XMMRegister dst, Address src);
2049 void xorps(XMMRegister dst, Address src);
2050
|
644 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
645 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, legacy_mode, no_mask_reg);
646 }
647
648 void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
649 VexSimdPrefix pre, int vector_len = AVX_128bit,
650 bool no_mask_reg = false) {
651 int dst_enc = dst->encoding();
652 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
653 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
654 }
655
656 void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) {
657 bool vex_w = false;
658 int vector_len = AVX_128bit;
659 vex_prefix(src, nds->encoding(), dst->encoding(),
660 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
661 vector_len, no_mask_reg);
662 }
663
664 void vex_prefix_0F38_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
665 bool vex_w = false;
666 int vector_len = AVX_128bit;
667 vex_prefix(src, nds->encoding(), dst->encoding(),
668 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
669 vector_len, true, no_mask_reg);
670 }
671
672 void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) {
673 bool vex_w = true;
674 int vector_len = AVX_128bit;
675 vex_prefix(src, nds->encoding(), dst->encoding(),
676 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
677 vector_len, no_mask_reg);
678 }
679
680 void vex_prefix_0F38_q_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
681 bool vex_w = true;
682 int vector_len = AVX_128bit;
683 vex_prefix(src, nds->encoding(), dst->encoding(),
684 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
685 vector_len, true, no_mask_reg);
686 }
687
688 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
689 VexSimdPrefix pre, VexOpcode opc,
690 bool vex_w, int vector_len,
691 bool legacy_mode, bool no_mask_reg);
692
693 int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) {
694 bool vex_w = false;
695 int vector_len = AVX_128bit;
696 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
697 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
698 false, no_mask_reg);
699 }
700
701 int vex_prefix_0F38_and_encode_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
702 bool vex_w = false;
703 int vector_len = AVX_128bit;
704 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
705 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
706 true, no_mask_reg);
707 }
708
709 int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) {
710 bool vex_w = true;
711 int vector_len = AVX_128bit;
712 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
713 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
714 false, no_mask_reg);
715 }
716
717 int vex_prefix_0F38_and_encode_q_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
718 bool vex_w = true;
719 int vector_len = AVX_128bit;
720 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
721 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
722 true, no_mask_reg);
723 }
724
725 int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
726 VexSimdPrefix pre, int vector_len = AVX_128bit,
727 VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false,
728 bool no_mask_reg = false) {
729 int src_enc = src->encoding();
730 int dst_enc = dst->encoding();
731 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
732 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg);
733 }
734
735 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
736 VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F,
737 bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false);
738
739 void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre,
740 bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) {
741 simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc);
742 }
743
744 void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
2044 void vinserti128h(XMMRegister dst, Address src);
2045 void vextractf128h(Address dst, XMMRegister src);
2046 void vextracti128h(Address dst, XMMRegister src);
2047
2048 // Copy low 256bit into high 256bit of ZMM registers.
2049 void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2050 void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2051 void vextracti64x4h(XMMRegister dst, XMMRegister src);
2052 void vextractf64x4h(XMMRegister dst, XMMRegister src);
2053 void vextractf64x4h(Address dst, XMMRegister src);
2054 void vinsertf64x4h(XMMRegister dst, Address src);
2055
2056 // Copy targeted 128bit segments of the ZMM registers
2057 void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
2058 void vextractf64x2h(XMMRegister dst, XMMRegister src, int value);
2059 void vextractf32x4h(XMMRegister dst, XMMRegister src, int value);
2060
2061 // duplicate 4-bytes integer data from src into 8 locations in dest
2062 void vpbroadcastd(XMMRegister dst, XMMRegister src);
2063
2064 // duplicate n-bytes integer data from src into vector_len locations in dest
2065 void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
2066 void evpbroadcastb(XMMRegister dst, Address src, int vector_len);
2067 void evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
2068 void evpbroadcastw(XMMRegister dst, Address src, int vector_len);
2069 void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2070 void evpbroadcastd(XMMRegister dst, Address src, int vector_len);
2071 void evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
2072 void evpbroadcastq(XMMRegister dst, Address src, int vector_len);
2073
2074 void evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
2075 void evpbroadcastss(XMMRegister dst, Address src, int vector_len);
2076 void evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
2077 void evpbroadcastsd(XMMRegister dst, Address src, int vector_len);
2078
2079 void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2080 void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2081 void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2082 void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
2083
2084 // Carry-Less Multiplication Quadword
2085 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2086 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2087
2088 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2089 // to avoid transaction penalty between AVX and SSE states. There is no
2090 // penalty if legacy SSE instructions are encoded using VEX prefix because
2091 // they always clear upper 128 bits. It should be used before calling
2092 // runtime code and native libraries.
2093 void vzeroupper();
2094
2095 protected:
2096 // Next instructions require address alignment 16 bytes SSE mode.
2097 // They should be called only from corresponding MacroAssembler instructions.
2098 void andpd(XMMRegister dst, Address src);
2099 void andps(XMMRegister dst, Address src);
2100 void xorpd(XMMRegister dst, Address src);
2101 void xorps(XMMRegister dst, Address src);
2102
|