< prev index next >

src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java

Print this page
rev 56282 : [mq]: graal


  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 
  25 package org.graalvm.compiler.asm.amd64;
  26 
  27 import static jdk.vm.ci.amd64.AMD64.CPU;
  28 import static jdk.vm.ci.amd64.AMD64.MASK;
  29 import static jdk.vm.ci.amd64.AMD64.XMM;





  30 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD;
  31 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop;
  32 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseIntelNops;
  33 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop;
  34 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD;
  35 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND;
  36 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP;
  37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR;
  38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB;
  39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB;
  40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR;
  41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC;
  42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC;
  43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG;
  44 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT;
  45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0;
  46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0;
  47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1;
  48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.BYTE;
  49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.DWORD;
  50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PD;
  51 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PS;
  52 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.QWORD;
  53 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SD;
  54 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SS;
  55 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.WORD;
  56 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128;
  57 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256;

  58 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ;
  59 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F;
  60 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38;
  61 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A;
  62 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_;
  63 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66;
  64 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2;
  65 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3;
  66 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0;
  67 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1;
  68 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG;
  69 import static org.graalvm.compiler.core.common.NumUtil.isByte;
  70 import static org.graalvm.compiler.core.common.NumUtil.isInt;
  71 import static org.graalvm.compiler.core.common.NumUtil.isShiftCount;
  72 import static org.graalvm.compiler.core.common.NumUtil.isUByte;
  73 
  74 import java.util.EnumSet;
  75 
  76 import org.graalvm.compiler.asm.Label;
  77 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;


 878         public static final AMD64Shift ROL = new AMD64Shift("ROL", 0);
 879         public static final AMD64Shift ROR = new AMD64Shift("ROR", 1);
 880         public static final AMD64Shift RCL = new AMD64Shift("RCL", 2);
 881         public static final AMD64Shift RCR = new AMD64Shift("RCR", 3);
 882         public static final AMD64Shift SHL = new AMD64Shift("SHL", 4);
 883         public static final AMD64Shift SHR = new AMD64Shift("SHR", 5);
 884         public static final AMD64Shift SAR = new AMD64Shift("SAR", 7);
 885         // @formatter:on
 886 
 887         public final AMD64MOp m1Op;
 888         public final AMD64MOp mcOp;
 889         public final AMD64MIOp miOp;
 890 
 891         private AMD64Shift(String opcode, int code) {
 892             m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.WordOrLargerAssertion);
 893             mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.WordOrLargerAssertion);
 894             miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.WordOrLargerAssertion);
 895         }
 896     }
 897 















































 898     private enum VEXOpAssertion {
 899         AVX1(CPUFeature.AVX, CPUFeature.AVX),
 900         AVX1_2(CPUFeature.AVX, CPUFeature.AVX2),
 901         AVX2(CPUFeature.AVX2, CPUFeature.AVX2),
 902         AVX1_128ONLY(CPUFeature.AVX, null),
 903         AVX1_256ONLY(null, CPUFeature.AVX),
 904         AVX2_256ONLY(null, CPUFeature.AVX2),
 905         XMM_CPU(CPUFeature.AVX, null, XMM, null, CPU, null),
 906         XMM_XMM_CPU(CPUFeature.AVX, null, XMM, XMM, CPU, null),
 907         CPU_XMM(CPUFeature.AVX, null, CPU, null, XMM, null),
 908         AVX1_2_CPU_XMM(CPUFeature.AVX, CPUFeature.AVX2, CPU, null, XMM, null),
 909         BMI1(CPUFeature.BMI1, null, CPU, CPU, CPU, null),
 910         BMI2(CPUFeature.BMI2, null, CPU, CPU, CPU, null),
 911         FMA(CPUFeature.FMA, null, XMM, XMM, XMM, null);




 912 
 913         private final CPUFeature l128feature;
 914         private final CPUFeature l256feature;

 915 
 916         private final RegisterCategory rCategory;
 917         private final RegisterCategory vCategory;
 918         private final RegisterCategory mCategory;
 919         private final RegisterCategory imm8Category;
 920 
 921         VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature) {
 922             this(l128feature, l256feature, XMM, XMM, XMM, XMM);
 923         }
 924 
 925         VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature, RegisterCategory rCategory, RegisterCategory vCategory, RegisterCategory mCategory, RegisterCategory imm8Category) {

 926             this.l128feature = l128feature;
 927             this.l256feature = l256feature;

 928             this.rCategory = rCategory;
 929             this.vCategory = vCategory;
 930             this.mCategory = mCategory;
 931             this.imm8Category = imm8Category;
 932         }
 933 
 934         public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m) {
 935             return check(arch, getLFlag(size), r, v, m, null);
 936         }
 937 
 938         public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m, Register imm8) {
 939             return check(arch, getLFlag(size), r, v, m, imm8);
 940         }
 941 
 942         public boolean check(AMD64 arch, int l, Register r, Register v, Register m, Register imm8) {
 943             switch (l) {
 944                 case L128:

 945                     assert l128feature != null && arch.getFeatures().contains(l128feature) : "emitting illegal 128 bit instruction";
 946                     break;
 947                 case L256:
 948                     assert l256feature != null && arch.getFeatures().contains(l256feature) : "emitting illegal 256 bit instruction";
 949                     break;
 950             }
 951             if (r != null) {
 952                 assert r.getRegisterCategory().equals(rCategory);
 953             }
 954             if (v != null) {
 955                 assert v.getRegisterCategory().equals(vCategory);
 956             }
 957             if (m != null) {
 958                 assert m.getRegisterCategory().equals(mCategory);
 959             }
 960             if (imm8 != null) {
 961                 assert imm8.getRegisterCategory().equals(imm8Category);
 962             }
 963             return true;
 964         }
 965 
 966         public boolean supports(EnumSet<CPUFeature> features, AVXSize avxSize) {
 967             switch (avxSize) {
 968                 case XMM:

 969                     return l128feature != null && features.contains(l128feature);
 970                 case YMM:
 971                     return l256feature != null && features.contains(l256feature);
 972                 default:
 973                     throw GraalError.shouldNotReachHere();
 974             }

 975         }
 976     }
 977 
 978     /**
 979      * Base class for VEX-encoded instructions.
 980      */
 981     public static class VexOp {
 982         protected final int pp;
 983         protected final int mmmmm;
 984         protected final int w;
 985         protected final int op;
 986 
 987         private final String opcode;
 988         protected final VEXOpAssertion assertion;
 989 
 990         protected VexOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {



 991             this.pp = pp;
 992             this.mmmmm = mmmmm;
 993             this.w = w;
 994             this.op = op;
 995             this.opcode = opcode;
 996             this.assertion = assertion;






 997         }
 998 
 999         public final boolean isSupported(AMD64Assembler vasm, AVXSize size) {
1000             return assertion.supports(((AMD64) vasm.target.arch).getFeatures(), size);




1001         }
1002 
1003         @Override
1004         public String toString() {
1005             return opcode;
1006         }





1007     }
1008 
1009     /**
1010      * VEX-encoded instructions with an operand order of RM, but the M operand must be a register.
1011      */
1012     public static class VexRROp extends VexOp {
1013         // @formatter:off
1014         public static final VexRROp VMASKMOVDQU = new VexRROp("VMASKMOVDQU", P_66, M_0F, WIG, 0xF7, VEXOpAssertion.AVX1_128ONLY);
1015         // @formatter:on
1016 
1017         protected VexRROp(String opcode, int pp, int mmmmm, int w, int op) {
1018             this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1019         }
1020 
1021         protected VexRROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1022             super(opcode, pp, mmmmm, w, op, assertion);
1023         }
1024 
1025         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1026             assert assertion.check((AMD64) asm.target.arch, size, dst, null, src);
1027             assert op != 0x1A || op != 0x5A;
1028             asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false);
1029             asm.emitByte(op);
1030             asm.emitModRM(dst, src);
1031         }
1032     }
1033 
1034     /**
1035      * VEX-encoded instructions with an operand order of RM.
1036      */
1037     public static class VexRMOp extends VexRROp {
1038         // @formatter:off
1039         public static final VexRMOp VCVTTSS2SI      = new VexRMOp("VCVTTSS2SI",      P_F3, M_0F,   W0,  0x2C, VEXOpAssertion.CPU_XMM);
1040         public static final VexRMOp VCVTTSS2SQ      = new VexRMOp("VCVTTSS2SQ",      P_F3, M_0F,   W1,  0x2C, VEXOpAssertion.CPU_XMM);
1041         public static final VexRMOp VCVTTSD2SI      = new VexRMOp("VCVTTSD2SI",      P_F2, M_0F,   W0,  0x2C, VEXOpAssertion.CPU_XMM);
1042         public static final VexRMOp VCVTTSD2SQ      = new VexRMOp("VCVTTSD2SQ",      P_F2, M_0F,   W1,  0x2C, VEXOpAssertion.CPU_XMM);
1043         public static final VexRMOp VCVTPS2PD       = new VexRMOp("VCVTPS2PD",       P_,   M_0F,   WIG, 0x5A);
1044         public static final VexRMOp VCVTPD2PS       = new VexRMOp("VCVTPD2PS",       P_66, M_0F,   WIG, 0x5A);
1045         public static final VexRMOp VCVTDQ2PS       = new VexRMOp("VCVTDQ2PS",       P_,   M_0F,   WIG, 0x5B);
1046         public static final VexRMOp VCVTTPS2DQ      = new VexRMOp("VCVTTPS2DQ",      P_F3, M_0F,   WIG, 0x5B);
1047         public static final VexRMOp VCVTTPD2DQ      = new VexRMOp("VCVTTPD2DQ",      P_66, M_0F,   WIG, 0xE6);
1048         public static final VexRMOp VCVTDQ2PD       = new VexRMOp("VCVTDQ2PD",       P_F3, M_0F,   WIG, 0xE6);


1060         public static final VexRMOp VPMOVSXBQ       = new VexRMOp("VPMOVSXBQ",       P_66, M_0F38, WIG, 0x22);
1061         public static final VexRMOp VPMOVSXWD       = new VexRMOp("VPMOVSXWD",       P_66, M_0F38, WIG, 0x23);
1062         public static final VexRMOp VPMOVSXWQ       = new VexRMOp("VPMOVSXWQ",       P_66, M_0F38, WIG, 0x24);
1063         public static final VexRMOp VPMOVSXDQ       = new VexRMOp("VPMOVSXDQ",       P_66, M_0F38, WIG, 0x25);
1064         public static final VexRMOp VPMOVZXBW       = new VexRMOp("VPMOVZXBW",       P_66, M_0F38, WIG, 0x30);
1065         public static final VexRMOp VPMOVZXBD       = new VexRMOp("VPMOVZXBD",       P_66, M_0F38, WIG, 0x31);
1066         public static final VexRMOp VPMOVZXBQ       = new VexRMOp("VPMOVZXBQ",       P_66, M_0F38, WIG, 0x32);
1067         public static final VexRMOp VPMOVZXWD       = new VexRMOp("VPMOVZXWD",       P_66, M_0F38, WIG, 0x33);
1068         public static final VexRMOp VPMOVZXWQ       = new VexRMOp("VPMOVZXWQ",       P_66, M_0F38, WIG, 0x34);
1069         public static final VexRMOp VPMOVZXDQ       = new VexRMOp("VPMOVZXDQ",       P_66, M_0F38, WIG, 0x35);
1070         public static final VexRMOp VPTEST          = new VexRMOp("VPTEST",          P_66, M_0F38, WIG, 0x17);
1071         public static final VexRMOp VSQRTPD         = new VexRMOp("VSQRTPD",         P_66, M_0F,   WIG, 0x51);
1072         public static final VexRMOp VSQRTPS         = new VexRMOp("VSQRTPS",         P_,   M_0F,   WIG, 0x51);
1073         public static final VexRMOp VSQRTSD         = new VexRMOp("VSQRTSD",         P_F2, M_0F,   WIG, 0x51);
1074         public static final VexRMOp VSQRTSS         = new VexRMOp("VSQRTSS",         P_F3, M_0F,   WIG, 0x51);
1075         public static final VexRMOp VUCOMISS        = new VexRMOp("VUCOMISS",        P_,   M_0F,   WIG, 0x2E);
1076         public static final VexRMOp VUCOMISD        = new VexRMOp("VUCOMISD",        P_66, M_0F,   WIG, 0x2E);
1077         // @formatter:on
1078 
1079         protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op) {
1080             this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1081         }
1082 
1083         protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1084             super(opcode, pp, mmmmm, w, op, assertion);




1085         }
1086 
1087         public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) {
1088             assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1089             asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false);
1090             asm.emitByte(op);
1091             asm.emitOperandHelper(dst, src, 0);
1092         }
1093     }
1094 
1095     /**
1096      * VEX-encoded move instructions.
1097      * <p>
1098      * These instructions have two opcodes: op is the forward move instruction with an operand order
1099      * of RM, and opReverse is the reverse move instruction with an operand order of MR.
1100      */
1101     public static final class VexMoveOp extends VexRMOp {
1102         // @formatter:off
1103         public static final VexMoveOp VMOVDQA = new VexMoveOp("VMOVDQA", P_66, M_0F, WIG, 0x6F, 0x7F);
1104         public static final VexMoveOp VMOVDQU = new VexMoveOp("VMOVDQU", P_F3, M_0F, WIG, 0x6F, 0x7F);
1105         public static final VexMoveOp VMOVAPS = new VexMoveOp("VMOVAPS", P_,   M_0F, WIG, 0x28, 0x29);
1106         public static final VexMoveOp VMOVAPD = new VexMoveOp("VMOVAPD", P_66, M_0F, WIG, 0x28, 0x29);
1107         public static final VexMoveOp VMOVUPS = new VexMoveOp("VMOVUPS", P_,   M_0F, WIG, 0x10, 0x11);
1108         public static final VexMoveOp VMOVUPD = new VexMoveOp("VMOVUPD", P_66, M_0F, WIG, 0x10, 0x11);
1109         public static final VexMoveOp VMOVSS  = new VexMoveOp("VMOVSS",  P_F3, M_0F, WIG, 0x10, 0x11);
1110         public static final VexMoveOp VMOVSD  = new VexMoveOp("VMOVSD",  P_F2, M_0F, WIG, 0x10, 0x11);
1111         public static final VexMoveOp VMOVD   = new VexMoveOp("VMOVD",   P_66, M_0F, W0,  0x6E, 0x7E, VEXOpAssertion.XMM_CPU);
1112         public static final VexMoveOp VMOVQ   = new VexMoveOp("VMOVQ",   P_66, M_0F, W1,  0x6E, 0x7E, VEXOpAssertion.XMM_CPU);


1113         // @formatter:on
1114 
1115         private final int opReverse;
1116 
1117         private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) {
1118             this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1);
1119         }
1120 
1121         private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) {
1122             super(opcode, pp, mmmmm, w, op, assertion);




1123             this.opReverse = opReverse;
1124         }
1125 
1126         public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src) {
1127             assert assertion.check((AMD64) asm.target.arch, size, src, null, null);
1128             asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false);
1129             asm.emitByte(opReverse);
1130             asm.emitOperandHelper(src, dst, 0);
1131         }
1132 
1133         public void emitReverse(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1134             assert assertion.check((AMD64) asm.target.arch, size, src, null, dst);
1135             asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false);
1136             asm.emitByte(opReverse);
1137             asm.emitModRM(src, dst);
1138         }
1139     }
1140 
1141     public interface VexRRIOp {
1142         void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8);
1143     }
1144 
1145     /**
1146      * VEX-encoded instructions with an operand order of RMI.
1147      */
1148     public static final class VexRMIOp extends VexOp implements VexRRIOp {
1149         // @formatter:off
1150         public static final VexRMIOp VPERMQ   = new VexRMIOp("VPERMQ",   P_66, M_0F3A, W1,  0x00, VEXOpAssertion.AVX2_256ONLY);
1151         public static final VexRMIOp VPSHUFLW = new VexRMIOp("VPSHUFLW", P_F2, M_0F,   WIG, 0x70, VEXOpAssertion.AVX1_2);
1152         public static final VexRMIOp VPSHUFHW = new VexRMIOp("VPSHUFHW", P_F3, M_0F,   WIG, 0x70, VEXOpAssertion.AVX1_2);
1153         public static final VexRMIOp VPSHUFD  = new VexRMIOp("VPSHUFD",  P_66, M_0F,   WIG, 0x70, VEXOpAssertion.AVX1_2);
1154         // @formatter:on
1155 
1156         private VexRMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1157             super(opcode, pp, mmmmm, w, op, assertion);
1158         }
1159 
1160         @Override
1161         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1162             assert assertion.check((AMD64) asm.target.arch, size, dst, null, src);
1163             asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false);
1164             asm.emitByte(op);
1165             asm.emitModRM(dst, src);
1166             asm.emitByte(imm8);
1167         }
1168 
1169         public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src, int imm8) {
1170             assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1171             asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false);
1172             asm.emitByte(op);
1173             asm.emitOperandHelper(dst, src, 1);
1174             asm.emitByte(imm8);
1175         }
1176     }
1177 
1178     /**
1179      * VEX-encoded instructions with an operand order of MRI.
1180      */
1181     public static final class VexMRIOp extends VexOp implements VexRRIOp {
1182         // @formatter:off
1183         public static final VexMRIOp VEXTRACTF128 = new VexMRIOp("VEXTRACTF128", P_66, M_0F3A, W0, 0x19, VEXOpAssertion.AVX1_256ONLY);
1184         public static final VexMRIOp VEXTRACTI128 = new VexMRIOp("VEXTRACTI128", P_66, M_0F3A, W0, 0x39, VEXOpAssertion.AVX2_256ONLY);
1185         public static final VexMRIOp VPEXTRB      = new VexMRIOp("VPEXTRB",      P_66, M_0F3A, W0, 0x14, VEXOpAssertion.XMM_CPU);
1186         public static final VexMRIOp VPEXTRW      = new VexMRIOp("VPEXTRW",      P_66, M_0F3A, W0, 0x15, VEXOpAssertion.XMM_CPU);
1187         public static final VexMRIOp VPEXTRD      = new VexMRIOp("VPEXTRD",      P_66, M_0F3A, W0, 0x16, VEXOpAssertion.XMM_CPU);
1188         public static final VexMRIOp VPEXTRQ      = new VexMRIOp("VPEXTRQ",      P_66, M_0F3A, W1, 0x16, VEXOpAssertion.XMM_CPU);
1189         // @formatter:on
1190 
1191         private VexMRIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1192             super(opcode, pp, mmmmm, w, op, assertion);
1193         }
1194 
1195         @Override
1196         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1197             assert assertion.check((AMD64) asm.target.arch, size, src, null, dst);
1198             asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false);
1199             asm.emitByte(op);
1200             asm.emitModRM(src, dst);
1201             asm.emitByte(imm8);
1202         }
1203 
1204         public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src, int imm8) {
1205             assert assertion.check((AMD64) asm.target.arch, size, src, null, null);
1206             asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false);
1207             asm.emitByte(op);
1208             asm.emitOperandHelper(src, dst, 1);
1209             asm.emitByte(imm8);
1210         }
1211     }
1212 
1213     /**
1214      * VEX-encoded instructions with an operand order of RVMR.
1215      */
1216     public static class VexRVMROp extends VexOp {
1217         // @formatter:off
1218         public static final VexRVMROp VPBLENDVB  = new VexRVMROp("VPBLENDVB",  P_66, M_0F3A, W0, 0x4C, VEXOpAssertion.AVX1_2);
1219         public static final VexRVMROp VPBLENDVPS = new VexRVMROp("VPBLENDVPS", P_66, M_0F3A, W0, 0x4A, VEXOpAssertion.AVX1);
1220         public static final VexRVMROp VPBLENDVPD = new VexRVMROp("VPBLENDVPD", P_66, M_0F3A, W0, 0x4B, VEXOpAssertion.AVX1);
1221         // @formatter:on
1222 
1223         protected VexRVMROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1224             super(opcode, pp, mmmmm, w, op, assertion);
1225         }
1226 
1227         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, Register src2) {
1228             assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, src2);
1229             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1230             asm.emitByte(op);
1231             asm.emitModRM(dst, src2);
1232             asm.emitByte(mask.encoding() << 4);
1233         }
1234 
1235         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, AMD64Address src2) {
1236             assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, null);
1237             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1238             asm.emitByte(op);
1239             asm.emitOperandHelper(dst, src2, 0);
1240             asm.emitByte(mask.encoding() << 4);
1241         }
1242     }
1243 
1244     /**
1245      * VEX-encoded instructions with an operand order of RVM.
1246      */
1247     public static class VexRVMOp extends VexOp {
1248         // @formatter:off
1249         public static final VexRVMOp VANDPS    = new VexRVMOp("VANDPS",    P_,   M_0F,   WIG, 0x54);
1250         public static final VexRVMOp VANDPD    = new VexRVMOp("VANDPD",    P_66, M_0F,   WIG, 0x54);
1251         public static final VexRVMOp VANDNPS   = new VexRVMOp("VANDNPS",   P_,   M_0F,   WIG, 0x55);
1252         public static final VexRVMOp VANDNPD   = new VexRVMOp("VANDNPD",   P_66, M_0F,   WIG, 0x55);
1253         public static final VexRVMOp VORPS     = new VexRVMOp("VORPS",     P_,   M_0F,   WIG, 0x56);
1254         public static final VexRVMOp VORPD     = new VexRVMOp("VORPD",     P_66, M_0F,   WIG, 0x56);
1255         public static final VexRVMOp VXORPS    = new VexRVMOp("VXORPS",    P_,   M_0F,   WIG, 0x57);
1256         public static final VexRVMOp VXORPD    = new VexRVMOp("VXORPD",    P_66, M_0F,   WIG, 0x57);
1257         public static final VexRVMOp VADDPS    = new VexRVMOp("VADDPS",    P_,   M_0F,   WIG, 0x58);
1258         public static final VexRVMOp VADDPD    = new VexRVMOp("VADDPD",    P_66, M_0F,   WIG, 0x58);
1259         public static final VexRVMOp VADDSS    = new VexRVMOp("VADDSS",    P_F3, M_0F,   WIG, 0x58);


1307         public static final VexRVMOp VPCMPEQD  = new VexRVMOp("VPCMPEQD",  P_66, M_0F,   WIG, 0x76, VEXOpAssertion.AVX1_2);
1308         public static final VexRVMOp VPCMPEQQ  = new VexRVMOp("VPCMPEQQ",  P_66, M_0F38, WIG, 0x29, VEXOpAssertion.AVX1_2);
1309         public static final VexRVMOp VPCMPGTB  = new VexRVMOp("VPCMPGTB",  P_66, M_0F,   WIG, 0x64, VEXOpAssertion.AVX1_2);
1310         public static final VexRVMOp VPCMPGTW  = new VexRVMOp("VPCMPGTW",  P_66, M_0F,   WIG, 0x65, VEXOpAssertion.AVX1_2);
1311         public static final VexRVMOp VPCMPGTD  = new VexRVMOp("VPCMPGTD",  P_66, M_0F,   WIG, 0x66, VEXOpAssertion.AVX1_2);
1312         public static final VexRVMOp VPCMPGTQ  = new VexRVMOp("VPCMPGTQ",  P_66, M_0F38, WIG, 0x37, VEXOpAssertion.AVX1_2);
1313         public static final VexRVMOp VFMADD231SS = new VexRVMOp("VFMADD231SS", P_66, M_0F38, W0, 0xB9, VEXOpAssertion.FMA);
1314         public static final VexRVMOp VFMADD231SD = new VexRVMOp("VFMADD231SD", P_66, M_0F38, W1, 0xB9, VEXOpAssertion.FMA);
1315         // @formatter:on
1316 
1317         private VexRVMOp(String opcode, int pp, int mmmmm, int w, int op) {
1318             this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1319         }
1320 
1321         protected VexRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1322             super(opcode, pp, mmmmm, w, op, assertion);
1323         }
1324 
1325         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1326             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1327             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1328             asm.emitByte(op);
1329             asm.emitModRM(dst, src2);
1330         }
1331 
1332         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) {
1333             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1334             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1335             asm.emitByte(op);
1336             asm.emitOperandHelper(dst, src2, 0);
1337         }
1338     }
1339 
1340     public static final class VexGeneralPurposeRVMOp extends VexRVMOp {
1341         // @formatter:off
1342         public static final VexGeneralPurposeRVMOp ANDN   = new VexGeneralPurposeRVMOp("ANDN",   P_,   M_0F38, WIG, 0xF2, VEXOpAssertion.BMI1);
1343         public static final VexGeneralPurposeRVMOp MULX   = new VexGeneralPurposeRVMOp("MULX",   P_F2, M_0F38, WIG, 0xF6, VEXOpAssertion.BMI2);
1344         public static final VexGeneralPurposeRVMOp PDEP   = new VexGeneralPurposeRVMOp("PDEP",   P_F2, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1345         public static final VexGeneralPurposeRVMOp PEXT   = new VexGeneralPurposeRVMOp("PEXT",   P_F3, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1346         // @formatter:on
1347 
1348         private VexGeneralPurposeRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1349             super(opcode, pp, mmmmm, w, op, assertion);
1350         }
1351 
1352         @Override
1353         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1354             assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, src2, null);
1355             assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1356             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1357             asm.emitByte(op);
1358             asm.emitModRM(dst, src2);
1359         }
1360 
1361         @Override
1362         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) {
1363             assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, null, null);
1364             assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1365             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1366             asm.emitByte(op);
1367             asm.emitOperandHelper(dst, src2, 0);
1368         }
1369     }
1370 
1371     public static final class VexGeneralPurposeRMVOp extends VexOp {
1372         // @formatter:off
1373         public static final VexGeneralPurposeRMVOp BEXTR  = new VexGeneralPurposeRMVOp("BEXTR",  P_,   M_0F38, WIG, 0xF7, VEXOpAssertion.BMI1);
1374         public static final VexGeneralPurposeRMVOp BZHI   = new VexGeneralPurposeRMVOp("BZHI",   P_,   M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1375         public static final VexGeneralPurposeRMVOp SARX   = new VexGeneralPurposeRMVOp("SARX",   P_F3, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1376         public static final VexGeneralPurposeRMVOp SHRX   = new VexGeneralPurposeRMVOp("SHRX",   P_F2, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1377         public static final VexGeneralPurposeRMVOp SHLX   = new VexGeneralPurposeRMVOp("SHLX",   P_66, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1378         // @formatter:on
1379 
1380         private VexGeneralPurposeRMVOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1381             super(opcode, pp, mmmmm, w, op, assertion);
1382         }
1383 
1384         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1385             assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, src1, null);
1386             assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1387             asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1388             asm.emitByte(op);
1389             asm.emitModRM(dst, src1);
1390         }
1391 
1392         public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src1, Register src2) {
1393             assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, null, null);
1394             assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1395             asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1396             asm.emitByte(op);
1397             asm.emitOperandHelper(dst, src1, 0);
1398         }
1399     }
1400 
1401     public static final class VexGeneralPurposeRMOp extends VexRMOp {
1402         // @formatter:off
1403         public static final VexGeneralPurposeRMOp BLSI    = new VexGeneralPurposeRMOp("BLSI",   P_,    M_0F38, WIG, 0xF3, 3, VEXOpAssertion.BMI1);
1404         public static final VexGeneralPurposeRMOp BLSMSK  = new VexGeneralPurposeRMOp("BLSMSK", P_,    M_0F38, WIG, 0xF3, 2, VEXOpAssertion.BMI1);
1405         public static final VexGeneralPurposeRMOp BLSR    = new VexGeneralPurposeRMOp("BLSR",   P_,    M_0F38, WIG, 0xF3, 1, VEXOpAssertion.BMI1);
1406         // @formatter:on
1407         private final int ext;
1408 
1409         private VexGeneralPurposeRMOp(String opcode, int pp, int mmmmm, int w, int op, int ext, VEXOpAssertion assertion) {
1410             super(opcode, pp, mmmmm, w, op, assertion);
1411             this.ext = ext;
1412         }
1413 
1414         @Override
1415         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1416             assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1417             asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1418             asm.emitByte(op);
1419             asm.emitModRM(ext, src);
1420         }
1421 
1422         @Override
1423         public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) {
1424             assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1425             asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1426             asm.emitByte(op);
1427             asm.emitOperandHelper(ext, src, 0);
1428         }
1429     }
1430 
1431     /**
1432      * VEX-encoded shift instructions with an operand order of either RVM or VMI.
1433      */
1434     public static final class VexShiftOp extends VexRVMOp implements VexRRIOp {
1435         // @formatter:off
1436         public static final VexShiftOp VPSRLW = new VexShiftOp("VPSRLW", P_66, M_0F, WIG, 0xD1, 0x71, 2);
1437         public static final VexShiftOp VPSRLD = new VexShiftOp("VPSRLD", P_66, M_0F, WIG, 0xD2, 0x72, 2);
1438         public static final VexShiftOp VPSRLQ = new VexShiftOp("VPSRLQ", P_66, M_0F, WIG, 0xD3, 0x73, 2);
1439         public static final VexShiftOp VPSRAW = new VexShiftOp("VPSRAW", P_66, M_0F, WIG, 0xE1, 0x71, 4);
1440         public static final VexShiftOp VPSRAD = new VexShiftOp("VPSRAD", P_66, M_0F, WIG, 0xE2, 0x72, 4);
1441         public static final VexShiftOp VPSLLW = new VexShiftOp("VPSLLW", P_66, M_0F, WIG, 0xF1, 0x71, 6);
1442         public static final VexShiftOp VPSLLD = new VexShiftOp("VPSLLD", P_66, M_0F, WIG, 0xF2, 0x72, 6);
1443         public static final VexShiftOp VPSLLQ = new VexShiftOp("VPSLLQ", P_66, M_0F, WIG, 0xF3, 0x73, 6);
1444         // @formatter:on
1445 
1446         private final int immOp;
1447         private final int r;
1448 
1449         private VexShiftOp(String opcode, int pp, int mmmmm, int w, int op, int immOp, int r) {
1450             super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1_2);
1451             this.immOp = immOp;
1452             this.r = r;
1453         }
1454 
1455         @Override
1456         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1457             assert assertion.check((AMD64) asm.target.arch, size, null, dst, src);
1458             asm.vexPrefix(null, dst, src, size, pp, mmmmm, w, false);
1459             asm.emitByte(immOp);
1460             asm.emitModRM(r, src);
1461             asm.emitByte(imm8);
1462         }
1463     }
1464 
1465     public static final class VexMaskMoveOp extends VexOp {
1466         // @formatter:off
1467         public static final VexMaskMoveOp VMASKMOVPS = new VexMaskMoveOp("VMASKMOVPS", P_66, M_0F38, W0, 0x2C, 0x2E);
1468         public static final VexMaskMoveOp VMASKMOVPD = new VexMaskMoveOp("VMASKMOVPD", P_66, M_0F38, W0, 0x2D, 0x2F);
1469         public static final VexMaskMoveOp VPMASKMOVD = new VexMaskMoveOp("VPMASKMOVD", P_66, M_0F38, W0, 0x8C, 0x8E, VEXOpAssertion.AVX2);
1470         public static final VexMaskMoveOp VPMASKMOVQ = new VexMaskMoveOp("VPMASKMOVQ", P_66, M_0F38, W1, 0x8C, 0x8E, VEXOpAssertion.AVX2);
1471         // @formatter:on
1472 
1473         private final int opReverse;
1474 
1475         private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) {
1476             this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1);
1477         }
1478 
1479         private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) {
1480             super(opcode, pp, mmmmm, w, op, assertion);
1481             this.opReverse = opReverse;
1482         }
1483 
1484         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, AMD64Address src) {
1485             assert assertion.check((AMD64) asm.target.arch, size, dst, mask, null);
1486             asm.vexPrefix(dst, mask, src, size, pp, mmmmm, w, false);
1487             asm.emitByte(op);
1488             asm.emitOperandHelper(dst, src, 0);
1489         }
1490 
1491         public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register mask, Register src) {
1492             assert assertion.check((AMD64) asm.target.arch, size, src, mask, null);
1493             asm.vexPrefix(src, mask, dst, size, pp, mmmmm, w, false);
1494             asm.emitByte(opReverse);
1495             asm.emitOperandHelper(src, dst, 0);
1496         }
1497     }
1498 
1499     /**
1500      * VEX-encoded instructions with an operand order of RVMI.
1501      */
1502     public static final class VexRVMIOp extends VexOp {
1503         // @formatter:off
1504         public static final VexRVMIOp VSHUFPS     = new VexRVMIOp("VSHUFPS",     P_,   M_0F,   WIG, 0xC6);
1505         public static final VexRVMIOp VSHUFPD     = new VexRVMIOp("VSHUFPD",     P_66, M_0F,   WIG, 0xC6);
1506         public static final VexRVMIOp VINSERTF128 = new VexRVMIOp("VINSERTF128", P_66, M_0F3A, W0,  0x18, VEXOpAssertion.AVX1_256ONLY);
1507         public static final VexRVMIOp VINSERTI128 = new VexRVMIOp("VINSERTI128", P_66, M_0F3A, W0,  0x38, VEXOpAssertion.AVX2_256ONLY);
1508         // @formatter:on
1509 
1510         private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op) {
1511             this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1512         }
1513 
1514         private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1515             super(opcode, pp, mmmmm, w, op, assertion);
1516         }
1517 
1518         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, int imm8) {
1519             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1520             assert (imm8 & 0xFF) == imm8;
1521             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1522             asm.emitByte(op);
1523             asm.emitModRM(dst, src2);
1524             asm.emitByte(imm8);
1525         }
1526 
1527         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, int imm8) {
1528             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1529             assert (imm8 & 0xFF) == imm8;
1530             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1531             asm.emitByte(op);
1532             asm.emitOperandHelper(dst, src2, 1);
1533             asm.emitByte(imm8);
1534         }
1535     }
1536 
1537     /**
1538      * VEX-encoded comparison operation with an operand order of RVMI. The immediate operand is a
1539      * comparison operator.
1540      */
1541     public static final class VexFloatCompareOp extends VexOp {
1542         // @formatter:off
1543         public static final VexFloatCompareOp VCMPPS = new VexFloatCompareOp("VCMPPS", P_,   M_0F, WIG, 0xC2);
1544         public static final VexFloatCompareOp VCMPPD = new VexFloatCompareOp("VCMPPD", P_66, M_0F, WIG, 0xC2);
1545         public static final VexFloatCompareOp VCMPSS = new VexFloatCompareOp("VCMPSS", P_F2, M_0F, WIG, 0xC2);
1546         public static final VexFloatCompareOp VCMPSD = new VexFloatCompareOp("VCMPSD", P_F2, M_0F, WIG, 0xC2);
1547         // @formatter:on
1548 
1549         public enum Predicate {
1550             EQ_OQ(0x00),
1551             LT_OS(0x01),
1552             LE_OS(0x02),


1614                             return LT_OQ;
1615                         case LE:
1616                             return LE_OQ;
1617                         case GT:
1618                             return GT_OQ;
1619                         case GE:
1620                             return GE_OQ;
1621                         default:
1622                             throw GraalError.shouldNotReachHere();
1623                     }
1624                 }
1625             }
1626         }
1627 
1628         private VexFloatCompareOp(String opcode, int pp, int mmmmm, int w, int op) {
1629             super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1630         }
1631 
1632         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, Predicate p) {
1633             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1634             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1635             asm.emitByte(op);
1636             asm.emitModRM(dst, src2);
1637             asm.emitByte(p.imm8);
1638         }
1639 
1640         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, Predicate p) {
1641             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1642             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1643             asm.emitByte(op);
1644             asm.emitOperandHelper(dst, src2, 1);
1645             asm.emitByte(p.imm8);
1646         }
1647     }
1648 
1649     public final void addl(AMD64Address dst, int imm32) {
1650         ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1651     }
1652 
1653     public final void addl(Register dst, int imm32) {
1654         ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1655     }
1656 
1657     public final void addl(Register dst, Register src) {
1658         ADD.rmOp.emit(this, DWORD, dst, src);
1659     }
1660 
1661     public final void addpd(Register dst, Register src) {
1662         SSEOp.ADD.emit(this, PD, dst, src);
1663     }
1664 


3731 
3732     public void lfence() {
3733         emitByte(0x0f);
3734         emitByte(0xae);
3735         emitByte(0xe8);
3736     }
3737 
3738     public final void vptest(Register dst, Register src) {
3739         VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src);
3740     }
3741 
3742     public final void vpxor(Register dst, Register nds, Register src) {
3743         VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
3744     }
3745 
3746     public final void vpxor(Register dst, Register nds, AMD64Address src) {
3747         VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
3748     }
3749 
3750     public final void vmovdqu(Register dst, AMD64Address src) {
3751         VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src);
3752     }
3753 
3754     public final void vmovdqu(AMD64Address dst, Register src) {
3755         assert inRC(XMM, src);
3756         VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src);
3757     }
3758 
3759     public final void vpmovzxbw(Register dst, AMD64Address src) {
3760         assert supports(CPUFeature.AVX2);
3761         VexRMOp.VPMOVZXBW.emit(this, AVXSize.YMM, dst, src);
3762     }
3763 
3764     public final void vzeroupper() {
3765         emitVEX(L128, P_, M_0F, W0, 0, 0, true);
3766         emitByte(0x77);
3767     }
3768 
3769     // Insn: KORTESTD k1, k2
3770 
3771     // This instruction produces ZF or CF flags
3772     public final void kortestd(Register src1, Register src2) {
3773         assert supports(CPUFeature.AVX512BW);
3774         assert inRC(MASK, src1) && inRC(MASK, src2);
3775         // Code: VEX.L0.66.0F.W1 98 /r
3776         vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true);
3777         emitByte(0x98);
3778         emitModRM(src1, src2);
3779     }
3780 
3781     // Insn: KORTESTQ k1, k2
3782 
3783     // This instruction produces ZF or CF flags
3784     public final void kortestq(Register src1, Register src2) {
3785         assert supports(CPUFeature.AVX512BW);
3786         assert inRC(MASK, src1) && inRC(MASK, src2);
3787         // Code: VEX.L0.0F.W1 98 /r
3788         vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_, M_0F, W1, true);
3789         emitByte(0x98);
3790         emitModRM(src1, src2);
3791     }
3792 
3793     public final void kmovd(Register dst, Register src) {
3794         assert supports(CPUFeature.AVX512BW);
3795         assert inRC(MASK, dst) || inRC(CPU, dst);
3796         assert inRC(MASK, src) || inRC(CPU, src);
3797         assert !(inRC(CPU, dst) && inRC(CPU, src));
3798 
3799         if (inRC(MASK, dst)) {
3800             if (inRC(MASK, src)) {
3801                 // kmovd(KRegister dst, KRegister src):
3802                 // Insn: KMOVD k1, k2/m32
3803                 // Code: VEX.L0.66.0F.W1 90 /r
3804                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_66, M_0F, W1, true);
3805                 emitByte(0x90);
3806                 emitModRM(dst, src);
3807             } else {
3808                 // kmovd(KRegister dst, Register src)
3809                 // Insn: KMOVD k1, r32
3810                 // Code: VEX.L0.F2.0F.W0 92 /r
3811                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true);
3812                 emitByte(0x92);
3813                 emitModRM(dst, src);
3814             }
3815         } else {
3816             if (inRC(MASK, src)) {
3817                 // kmovd(Register dst, KRegister src)
3818                 // Insn: KMOVD r32, k1
3819                 // Code: VEX.L0.F2.0F.W0 93 /r
3820                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true);
3821                 emitByte(0x93);
3822                 emitModRM(dst, src);
3823             } else {
3824                 throw GraalError.shouldNotReachHere();
3825             }
3826         }
3827     }
3828 
3829     public final void kmovq(Register dst, Register src) {
3830         assert supports(CPUFeature.AVX512BW);
3831         assert inRC(MASK, dst) || inRC(CPU, dst);
3832         assert inRC(MASK, src) || inRC(CPU, src);
3833         assert !(inRC(CPU, dst) && inRC(CPU, src));
3834 
3835         if (inRC(MASK, dst)) {
3836             if (inRC(MASK, src)) {
3837                 // kmovq(KRegister dst, KRegister src):
3838                 // Insn: KMOVQ k1, k2/m64
3839                 // Code: VEX.L0.0F.W1 90 /r
3840                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_, M_0F, W1, true);
3841                 emitByte(0x90);
3842                 emitModRM(dst, src);
3843             } else {
3844                 // kmovq(KRegister dst, Register src)
3845                 // Insn: KMOVQ k1, r64
3846                 // Code: VEX.L0.F2.0F.W1 92 /r
3847                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true);
3848                 emitByte(0x92);
3849                 emitModRM(dst, src);
3850             }
3851         } else {
3852             if (inRC(MASK, src)) {
3853                 // kmovq(Register dst, KRegister src)
3854                 // Insn: KMOVQ r64, k1
3855                 // Code: VEX.L0.F2.0F.W1 93 /r
3856                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true);
3857                 emitByte(0x93);
3858                 emitModRM(dst, src);
3859             } else {
3860                 throw GraalError.shouldNotReachHere();
3861             }
3862         }
3863     }
3864 
3865     // Insn: KTESTD k1, k2
3866 
3867     public final void ktestd(Register src1, Register src2) {
3868         assert supports(CPUFeature.AVX512BW);
3869         assert inRC(MASK, src1) && inRC(MASK, src2);
3870         // Code: VEX.L0.66.0F.W1 99 /r
3871         vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true);
3872         emitByte(0x99);
3873         emitModRM(src1, src2);
3874     }
3875 
3876     public final void evmovdqu64(Register dst, AMD64Address src) {
3877         assert supports(CPUFeature.AVX512F);
3878         assert inRC(XMM, dst);
3879         evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F3, M_0F, W1, Z0, B0);
3880         emitByte(0x6F);
3881         emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3882     }
3883 
3884     // Insn: VPMOVZXBW zmm1, m256
3885 
3886     public final void evpmovzxbw(Register dst, AMD64Address src) {
3887         assert supports(CPUFeature.AVX512BW);
3888         assert inRC(XMM, dst);
3889         // Code: EVEX.512.66.0F38.WIG 30 /r
3890         evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0);
3891         emitByte(0x30);
3892         emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
3893     }
3894 
3895     public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src) {
3896         assert supports(CPUFeature.AVX512BW);
3897         assert inRC(MASK, kdst) && inRC(XMM, nds);
3898         evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F, WIG, Z0, B0);
3899         emitByte(0x74);
3900         emitEVEXOperandHelper(kdst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3901     }
3902 
3903     // Insn: VMOVDQU16 zmm1 {k1}{z}, zmm2/m512
3904     // -----
3905     // Insn: VMOVDQU16 zmm1, m512
3906 
3907     public final void evmovdqu16(Register dst, AMD64Address src) {
3908         assert supports(CPUFeature.AVX512BW);
3909         assert inRC(XMM, dst);
3910         // Code: EVEX.512.F2.0F.W1 6F /r
3911         evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
3912         emitByte(0x6F);
3913         emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3914     }
3915 
3916     // Insn: VMOVDQU16 zmm1, k1:z, m512
3917 
3918     public final void evmovdqu16(Register dst, Register mask, AMD64Address src) {
3919         assert supports(CPUFeature.AVX512BW);
3920         assert inRC(XMM, dst) && inRC(MASK, mask);
3921         // Code: EVEX.512.F2.0F.W1 6F /r
3922         evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z1, B0);
3923         emitByte(0x6F);
3924         emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3925     }
3926 
3927     // Insn: VMOVDQU16 zmm2/m512 {k1}{z}, zmm1
3928     // -----
3929     // Insn: VMOVDQU16 m512, zmm1
3930 
3931     public final void evmovdqu16(AMD64Address dst, Register src) {
3932         assert supports(CPUFeature.AVX512BW);
3933         assert inRC(XMM, src);
3934         // Code: EVEX.512.F2.0F.W1 7F /r
3935         evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
3936         emitByte(0x7F);
3937         emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3938     }
3939 
3940     // Insn: VMOVDQU16 m512, k1, zmm1
3941 
3942     public final void evmovdqu16(AMD64Address dst, Register mask, Register src) {
3943         assert supports(CPUFeature.AVX512BW);
3944         assert inRC(MASK, mask) && inRC(XMM, src);
3945         // Code: EVEX.512.F2.0F.W1 7F /r
3946         evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
3947         emitByte(0x7F);
3948         emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3949     }
3950 
3951     // Insn: VPBROADCASTW zmm1 {k1}{z}, reg
3952     // -----
3953     // Insn: VPBROADCASTW zmm1, reg
3954 
3955     public final void evpbroadcastw(Register dst, Register src) {
3956         assert supports(CPUFeature.AVX512BW);
3957         assert inRC(XMM, dst) && inRC(CPU, src);
3958         // Code: EVEX.512.66.0F38.W0 7B /r
3959         evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, W0, Z0, B0);
3960         emitByte(0x7B);
3961         emitModRM(dst, src);
3962     }
3963 
3964     // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8
3965     // -----
3966     // Insn: VPCMPUW k1, zmm2, zmm3, imm8
3967 
3968     public final void evpcmpuw(Register kdst, Register nds, Register src, int vcc) {


3983         assert supports(CPUFeature.AVX512BW);
3984         assert inRC(MASK, kdst) && inRC(MASK, mask);
3985         assert inRC(XMM, nds) && inRC(XMM, src);
3986         // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib
3987         evexPrefix(kdst, mask, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0);
3988         emitByte(0x3E);
3989         emitModRM(kdst, src);
3990         emitByte(vcc);
3991     }
3992 
3993     // Insn: VPMOVWB ymm1/m256 {k1}{z}, zmm2
3994     // -----
3995     // Insn: VPMOVWB m256, zmm2
3996 
3997     public final void evpmovwb(AMD64Address dst, Register src) {
3998         assert supports(CPUFeature.AVX512BW);
3999         assert inRC(XMM, src);
4000         // Code: EVEX.512.F3.0F38.W0 30 /r
4001         evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0);
4002         emitByte(0x30);
4003         emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
4004     }
4005 
4006     // Insn: VPMOVWB m256, k1, zmm2
4007 
4008     public final void evpmovwb(AMD64Address dst, Register mask, Register src) {
4009         assert supports(CPUFeature.AVX512BW);
4010         assert inRC(MASK, mask) && inRC(XMM, src);
4011         // Code: EVEX.512.F3.0F38.W0 30 /r
4012         evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0);
4013         emitByte(0x30);
4014         emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
4015     }
4016 
4017     // Insn: VPMOVZXBW zmm1 {k1}{z}, ymm2/m256
4018     // -----
4019     // Insn: VPMOVZXBW zmm1, k1, m256
4020 
4021     public final void evpmovzxbw(Register dst, Register mask, AMD64Address src) {
4022         assert supports(CPUFeature.AVX512BW);
4023         assert inRC(MASK, mask) && inRC(XMM, dst);
4024         // Code: EVEX.512.66.0F38.WIG 30 /r
4025         evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0);
4026         emitByte(0x30);
4027         emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
4028     }
4029 
4030 }


  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 
  25 package org.graalvm.compiler.asm.amd64;
  26 
  27 import static jdk.vm.ci.amd64.AMD64.CPU;
  28 import static jdk.vm.ci.amd64.AMD64.MASK;
  29 import static jdk.vm.ci.amd64.AMD64.XMM;
  30 import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX512BW;
  31 import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX512CD;
  32 import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX512DQ;
  33 import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX512F;
  34 import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX512VL;
  35 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD;
  36 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop;
  37 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseIntelNops;
  38 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop;
  39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD;
  40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND;
  41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP;
  42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR;
  43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB;
  44 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB;
  45 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR;
  46 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC;
  47 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC;
  48 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG;
  49 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT;
  50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0;
  51 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0;
  52 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1;
  53 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.BYTE;
  54 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.DWORD;
  55 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PD;
  56 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PS;
  57 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.QWORD;
  58 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SD;
  59 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SS;
  60 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.WORD;
  61 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128;
  62 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256;
  63 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L512;
  64 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ;
  65 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F;
  66 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38;
  67 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A;
  68 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_;
  69 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66;
  70 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2;
  71 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3;
  72 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0;
  73 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1;
  74 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG;
  75 import static org.graalvm.compiler.core.common.NumUtil.isByte;
  76 import static org.graalvm.compiler.core.common.NumUtil.isInt;
  77 import static org.graalvm.compiler.core.common.NumUtil.isShiftCount;
  78 import static org.graalvm.compiler.core.common.NumUtil.isUByte;
  79 
  80 import java.util.EnumSet;
  81 
  82 import org.graalvm.compiler.asm.Label;
  83 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;


 884         public static final AMD64Shift ROL = new AMD64Shift("ROL", 0);
 885         public static final AMD64Shift ROR = new AMD64Shift("ROR", 1);
 886         public static final AMD64Shift RCL = new AMD64Shift("RCL", 2);
 887         public static final AMD64Shift RCR = new AMD64Shift("RCR", 3);
 888         public static final AMD64Shift SHL = new AMD64Shift("SHL", 4);
 889         public static final AMD64Shift SHR = new AMD64Shift("SHR", 5);
 890         public static final AMD64Shift SAR = new AMD64Shift("SAR", 7);
 891         // @formatter:on
 892 
 893         public final AMD64MOp m1Op;
 894         public final AMD64MOp mcOp;
 895         public final AMD64MIOp miOp;
 896 
 897         private AMD64Shift(String opcode, int code) {
 898             m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.WordOrLargerAssertion);
 899             mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.WordOrLargerAssertion);
 900             miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.WordOrLargerAssertion);
 901         }
 902     }
 903 
 904     private enum EVEXFeatureAssertion {
 905         AVX512F_ALL(EnumSet.of(AVX512F), EnumSet.of(AVX512F), EnumSet.of(AVX512F)),
 906         AVX512F_128ONLY(EnumSet.of(AVX512F), null, null),
 907         AVX512F_VL(EnumSet.of(AVX512F, AVX512VL), EnumSet.of(AVX512F, AVX512VL), EnumSet.of(AVX512F)),
 908         AVX512CD_VL(EnumSet.of(AVX512F, AVX512CD, AVX512VL), EnumSet.of(AVX512F, AVX512CD, AVX512VL), EnumSet.of(AVX512F, AVX512CD)),
 909         AVX512DQ_VL(EnumSet.of(AVX512F, AVX512DQ, AVX512VL), EnumSet.of(AVX512F, AVX512DQ, AVX512VL), EnumSet.of(AVX512F, AVX512DQ)),
 910         AVX512BW_VL(EnumSet.of(AVX512F, AVX512BW, AVX512VL), EnumSet.of(AVX512F, AVX512BW, AVX512VL), EnumSet.of(AVX512F, AVX512BW));
 911 
 912         private final EnumSet<CPUFeature> l128features;
 913         private final EnumSet<CPUFeature> l256features;
 914         private final EnumSet<CPUFeature> l512features;
 915 
 916         EVEXFeatureAssertion(EnumSet<CPUFeature> l128features, EnumSet<CPUFeature> l256features, EnumSet<CPUFeature> l512features) {
 917             this.l128features = l128features;
 918             this.l256features = l256features;
 919             this.l512features = l512features;
 920         }
 921 
 922         public boolean check(AMD64 arch, int l) {
 923             switch (l) {
 924                 case L128:
 925                     assert l128features != null && arch.getFeatures().containsAll(l128features) : "emitting illegal 128 bit instruction";
 926                     break;
 927                 case L256:
 928                     assert l256features != null && arch.getFeatures().containsAll(l256features) : "emitting illegal 256 bit instruction";
 929                     break;
 930                 case L512:
 931                     assert l512features != null && arch.getFeatures().containsAll(l512features) : "emitting illegal 512 bit instruction";
 932                     break;
 933             }
 934             return true;
 935         }
 936 
 937         public boolean supports(EnumSet<CPUFeature> features, AVXSize avxSize) {
 938             switch (avxSize) {
 939                 case XMM:
 940                     return l128features != null && features.containsAll(l128features);
 941                 case YMM:
 942                     return l256features != null && features.containsAll(l256features);
 943                 case ZMM:
 944                     return l512features != null && features.containsAll(l512features);
 945                 default:
 946                     throw GraalError.shouldNotReachHere();
 947             }
 948         }
 949     }
 950 
 951     private enum VEXOpAssertion {
 952         AVX1(CPUFeature.AVX, CPUFeature.AVX, null),
 953         AVX1_2(CPUFeature.AVX, CPUFeature.AVX2, null),
 954         AVX2(CPUFeature.AVX2, CPUFeature.AVX2, null),
 955         AVX1_128ONLY(CPUFeature.AVX, null, null),
 956         AVX1_256ONLY(null, CPUFeature.AVX, null),
 957         AVX2_256ONLY(null, CPUFeature.AVX2, null),
 958         XMM_CPU(CPUFeature.AVX, null, null, XMM, null, CPU, null),
 959         XMM_XMM_CPU(CPUFeature.AVX, null, null, XMM, XMM, CPU, null),
 960         CPU_XMM(CPUFeature.AVX, null, null, CPU, null, XMM, null),
 961         AVX1_2_CPU_XMM(CPUFeature.AVX, CPUFeature.AVX2, null, CPU, null, XMM, null),
 962         BMI1(CPUFeature.BMI1, null, null, CPU, CPU, CPU, null),
 963         BMI2(CPUFeature.BMI2, null, null, CPU, CPU, CPU, null),
 964         FMA(CPUFeature.FMA, null, null, XMM, XMM, XMM, null),
 965 
 966         XMM_CPU_AVX512F_128ONLY(CPUFeature.AVX, null, EVEXFeatureAssertion.AVX512F_128ONLY, XMM, null, CPU, null),
 967         AVX1_AVX512F_ALL(CPUFeature.AVX, CPUFeature.AVX, EVEXFeatureAssertion.AVX512F_ALL),
 968         AVX1_AVX512F_VL(CPUFeature.AVX, CPUFeature.AVX, EVEXFeatureAssertion.AVX512F_VL);
 969 
 970         private final CPUFeature l128feature;
 971         private final CPUFeature l256feature;
 972         private final EVEXFeatureAssertion l512features;
 973 
 974         private final RegisterCategory rCategory;
 975         private final RegisterCategory vCategory;
 976         private final RegisterCategory mCategory;
 977         private final RegisterCategory imm8Category;
 978 
 979         VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature, EVEXFeatureAssertion l512features) {
 980             this(l128feature, l256feature, l512features, XMM, XMM, XMM, XMM);
 981         }
 982 
 983         VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature, EVEXFeatureAssertion l512features, RegisterCategory rCategory, RegisterCategory vCategory, RegisterCategory mCategory,
 984                         RegisterCategory imm8Category) {
 985             this.l128feature = l128feature;
 986             this.l256feature = l256feature;
 987             this.l512features = l512features;
 988             this.rCategory = rCategory;
 989             this.vCategory = vCategory;
 990             this.mCategory = mCategory;
 991             this.imm8Category = imm8Category;
 992         }
 993 
 994         public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m) {
 995             return check(arch, getLFlag(size), r, v, m, null);
 996         }
 997 
 998         public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m, Register imm8) {
 999             return check(arch, getLFlag(size), r, v, m, imm8);
1000         }
1001 
1002         public boolean check(AMD64 arch, int l, Register r, Register v, Register m, Register imm8) {
1003             if (isAVX512Register(r) || isAVX512Register(v) || isAVX512Register(m) || l == L512) {
1004                 assert l512features != null && l512features.check(arch, l);
1005             } else if (l == L128) {
1006                 assert l128feature != null && arch.getFeatures().contains(l128feature) : "emitting illegal 128 bit instruction";
1007             } else if (l == L256) {

1008                 assert l256feature != null && arch.getFeatures().contains(l256feature) : "emitting illegal 256 bit instruction";

1009             }
1010             if (r != null) {
1011                 assert r.getRegisterCategory().equals(rCategory);
1012             }
1013             if (v != null) {
1014                 assert v.getRegisterCategory().equals(vCategory);
1015             }
1016             if (m != null) {
1017                 assert m.getRegisterCategory().equals(mCategory);
1018             }
1019             if (imm8 != null) {
1020                 assert imm8.getRegisterCategory().equals(imm8Category);
1021             }
1022             return true;
1023         }
1024 
1025         public boolean supports(EnumSet<CPUFeature> features, AVXSize avxSize, boolean useZMMRegisters) {
1026             if (useZMMRegisters || avxSize == AVXSize.ZMM) {
1027                 return l512features != null && l512features.supports(features, avxSize);
1028             } else if (avxSize == AVXSize.XMM) {
1029                 return l128feature != null && features.contains(l128feature);
1030             } else if (avxSize == AVXSize.YMM) {
1031                 return l256feature != null && features.contains(l256feature);


1032             }
1033             throw GraalError.shouldNotReachHere();
1034         }
1035     }
1036 
1037     /**
1038      * Base class for VEX-encoded instructions.
1039      */
1040     public static class VexOp {
1041         protected final int pp;
1042         protected final int mmmmm;
1043         protected final int w;
1044         protected final int op;
1045 
1046         private final String opcode;
1047         protected final VEXOpAssertion assertion;
1048 
1049         protected final EVEXTuple evexTuple;
1050         protected final int wEvex;
1051 
1052         protected VexOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex) {
1053             this.pp = pp;
1054             this.mmmmm = mmmmm;
1055             this.w = w;
1056             this.op = op;
1057             this.opcode = opcode;
1058             this.assertion = assertion;
1059             this.evexTuple = evexTuple;
1060             this.wEvex = wEvex;
1061         }
1062 
1063         protected VexOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1064             this(opcode, pp, mmmmm, w, op, assertion, EVEXTuple.INVALID, WIG);
1065         }
1066 
1067         public final boolean isSupported(AMD64Assembler vasm, AVXSize size) {
1068             return isSupported(vasm, size, false);
1069         }
1070 
1071         public final boolean isSupported(AMD64Assembler vasm, AVXSize size, boolean useZMMRegisters) {
1072             return assertion.supports(((AMD64) vasm.target.arch).getFeatures(), size, useZMMRegisters);
1073         }
1074 
1075         @Override
1076         public String toString() {
1077             return opcode;
1078         }
1079 
1080         protected final int getDisp8Scale(boolean useEvex, AVXSize size) {
1081             return useEvex ? evexTuple.getDisp8ScalingFactor(size) : DEFAULT_DISP8_SCALE;
1082         }
1083 
1084     }
1085 
1086     /**
1087      * VEX-encoded instructions with an operand order of RM, but the M operand must be a register.
1088      */
1089     public static class VexRROp extends VexOp {
1090         // @formatter:off
1091         public static final VexRROp VMASKMOVDQU = new VexRROp("VMASKMOVDQU", P_66, M_0F, WIG, 0xF7, VEXOpAssertion.AVX1_128ONLY, EVEXTuple.INVALID, WIG);
1092         // @formatter:on
1093 
1094         protected VexRROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex) {
1095             super(opcode, pp, mmmmm, w, op, assertion, evexTuple, wEvex);




1096         }
1097 
1098         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1099             assert assertion.check((AMD64) asm.target.arch, size, dst, null, src);
1100             assert op != 0x1A || op != 0x5A;
1101             asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, wEvex, false);
1102             asm.emitByte(op);
1103             asm.emitModRM(dst, src);
1104         }
1105     }
1106 
1107     /**
1108      * VEX-encoded instructions with an operand order of RM.
1109      */
1110     public static class VexRMOp extends VexRROp {
1111         // @formatter:off
1112         public static final VexRMOp VCVTTSS2SI      = new VexRMOp("VCVTTSS2SI",      P_F3, M_0F,   W0,  0x2C, VEXOpAssertion.CPU_XMM);
1113         public static final VexRMOp VCVTTSS2SQ      = new VexRMOp("VCVTTSS2SQ",      P_F3, M_0F,   W1,  0x2C, VEXOpAssertion.CPU_XMM);
1114         public static final VexRMOp VCVTTSD2SI      = new VexRMOp("VCVTTSD2SI",      P_F2, M_0F,   W0,  0x2C, VEXOpAssertion.CPU_XMM);
1115         public static final VexRMOp VCVTTSD2SQ      = new VexRMOp("VCVTTSD2SQ",      P_F2, M_0F,   W1,  0x2C, VEXOpAssertion.CPU_XMM);
1116         public static final VexRMOp VCVTPS2PD       = new VexRMOp("VCVTPS2PD",       P_,   M_0F,   WIG, 0x5A);
1117         public static final VexRMOp VCVTPD2PS       = new VexRMOp("VCVTPD2PS",       P_66, M_0F,   WIG, 0x5A);
1118         public static final VexRMOp VCVTDQ2PS       = new VexRMOp("VCVTDQ2PS",       P_,   M_0F,   WIG, 0x5B);
1119         public static final VexRMOp VCVTTPS2DQ      = new VexRMOp("VCVTTPS2DQ",      P_F3, M_0F,   WIG, 0x5B);
1120         public static final VexRMOp VCVTTPD2DQ      = new VexRMOp("VCVTTPD2DQ",      P_66, M_0F,   WIG, 0xE6);
1121         public static final VexRMOp VCVTDQ2PD       = new VexRMOp("VCVTDQ2PD",       P_F3, M_0F,   WIG, 0xE6);


1133         public static final VexRMOp VPMOVSXBQ       = new VexRMOp("VPMOVSXBQ",       P_66, M_0F38, WIG, 0x22);
1134         public static final VexRMOp VPMOVSXWD       = new VexRMOp("VPMOVSXWD",       P_66, M_0F38, WIG, 0x23);
1135         public static final VexRMOp VPMOVSXWQ       = new VexRMOp("VPMOVSXWQ",       P_66, M_0F38, WIG, 0x24);
1136         public static final VexRMOp VPMOVSXDQ       = new VexRMOp("VPMOVSXDQ",       P_66, M_0F38, WIG, 0x25);
1137         public static final VexRMOp VPMOVZXBW       = new VexRMOp("VPMOVZXBW",       P_66, M_0F38, WIG, 0x30);
1138         public static final VexRMOp VPMOVZXBD       = new VexRMOp("VPMOVZXBD",       P_66, M_0F38, WIG, 0x31);
1139         public static final VexRMOp VPMOVZXBQ       = new VexRMOp("VPMOVZXBQ",       P_66, M_0F38, WIG, 0x32);
1140         public static final VexRMOp VPMOVZXWD       = new VexRMOp("VPMOVZXWD",       P_66, M_0F38, WIG, 0x33);
1141         public static final VexRMOp VPMOVZXWQ       = new VexRMOp("VPMOVZXWQ",       P_66, M_0F38, WIG, 0x34);
1142         public static final VexRMOp VPMOVZXDQ       = new VexRMOp("VPMOVZXDQ",       P_66, M_0F38, WIG, 0x35);
1143         public static final VexRMOp VPTEST          = new VexRMOp("VPTEST",          P_66, M_0F38, WIG, 0x17);
1144         public static final VexRMOp VSQRTPD         = new VexRMOp("VSQRTPD",         P_66, M_0F,   WIG, 0x51);
1145         public static final VexRMOp VSQRTPS         = new VexRMOp("VSQRTPS",         P_,   M_0F,   WIG, 0x51);
1146         public static final VexRMOp VSQRTSD         = new VexRMOp("VSQRTSD",         P_F2, M_0F,   WIG, 0x51);
1147         public static final VexRMOp VSQRTSS         = new VexRMOp("VSQRTSS",         P_F3, M_0F,   WIG, 0x51);
1148         public static final VexRMOp VUCOMISS        = new VexRMOp("VUCOMISS",        P_,   M_0F,   WIG, 0x2E);
1149         public static final VexRMOp VUCOMISD        = new VexRMOp("VUCOMISD",        P_66, M_0F,   WIG, 0x2E);
1150         // @formatter:on
1151 
1152         protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op) {
1153             this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1, EVEXTuple.INVALID, WIG);
1154         }
1155 
1156         protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1157             this(opcode, pp, mmmmm, w, op, assertion, EVEXTuple.INVALID, WIG);
1158         }
1159 
1160         protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex) {
1161             super(opcode, pp, mmmmm, w, op, assertion, evexTuple, wEvex);
1162         }
1163 
1164         public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) {
1165             assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1166             boolean useEvex = asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, wEvex, false);
1167             asm.emitByte(op);
1168             asm.emitOperandHelper(dst, src, 0, getDisp8Scale(useEvex, size));
1169         }
1170     }
1171 
1172     /**
1173      * VEX-encoded move instructions.
1174      * <p>
1175      * These instructions have two opcodes: op is the forward move instruction with an operand order
1176      * of RM, and opReverse is the reverse move instruction with an operand order of MR.
1177      */
1178     public static final class VexMoveOp extends VexRMOp {
1179         // @formatter:off
1180         public static final VexMoveOp VMOVDQA32 = new VexMoveOp("VMOVDQA32", P_66, M_0F, WIG, 0x6F, 0x7F, VEXOpAssertion.AVX1_AVX512F_VL,         EVEXTuple.FVM,       W0);
1181         public static final VexMoveOp VMOVDQA64 = new VexMoveOp("VMOVDQA64", P_66, M_0F, WIG, 0x6F, 0x7F, VEXOpAssertion.AVX1_AVX512F_VL,         EVEXTuple.FVM,       W1);
1182         public static final VexMoveOp VMOVDQU32 = new VexMoveOp("VMOVDQU32", P_F3, M_0F, WIG, 0x6F, 0x7F, VEXOpAssertion.AVX1_AVX512F_VL,         EVEXTuple.FVM,       W0);
1183         public static final VexMoveOp VMOVDQU64 = new VexMoveOp("VMOVDQU64", P_F3, M_0F, WIG, 0x6F, 0x7F, VEXOpAssertion.AVX1_AVX512F_VL,         EVEXTuple.FVM,       W1);
1184         public static final VexMoveOp VMOVAPS   = new VexMoveOp("VMOVAPS",   P_,   M_0F, WIG, 0x28, 0x29, VEXOpAssertion.AVX1_AVX512F_VL,         EVEXTuple.FVM,       W0);
1185         public static final VexMoveOp VMOVAPD   = new VexMoveOp("VMOVAPD",   P_66, M_0F, WIG, 0x28, 0x29, VEXOpAssertion.AVX1_AVX512F_VL,         EVEXTuple.FVM,       W1);
1186         public static final VexMoveOp VMOVUPS   = new VexMoveOp("VMOVUPS",   P_,   M_0F, WIG, 0x10, 0x11, VEXOpAssertion.AVX1_AVX512F_VL,         EVEXTuple.FVM,       W0);
1187         public static final VexMoveOp VMOVUPD   = new VexMoveOp("VMOVUPD",   P_66, M_0F, WIG, 0x10, 0x11, VEXOpAssertion.AVX1_AVX512F_VL,         EVEXTuple.FVM,       W1);
1188         public static final VexMoveOp VMOVSS    = new VexMoveOp("VMOVSS",    P_F3, M_0F, WIG, 0x10, 0x11, VEXOpAssertion.AVX1_AVX512F_ALL,        EVEXTuple.T1S_32BIT, W0);
1189         public static final VexMoveOp VMOVSD    = new VexMoveOp("VMOVSD",    P_F2, M_0F, WIG, 0x10, 0x11, VEXOpAssertion.AVX1_AVX512F_ALL,        EVEXTuple.T1S_64BIT, W1);
1190         public static final VexMoveOp VMOVD     = new VexMoveOp("VMOVD",     P_66, M_0F, W0,  0x6E, 0x7E, VEXOpAssertion.XMM_CPU_AVX512F_128ONLY, EVEXTuple.T1F_32BIT, W0);
1191         public static final VexMoveOp VMOVQ     = new VexMoveOp("VMOVQ",     P_66, M_0F, W1,  0x6E, 0x7E, VEXOpAssertion.XMM_CPU_AVX512F_128ONLY, EVEXTuple.T1S_64BIT, W1);
1192         // @formatter:on
1193 
1194         private final int opReverse;
1195 
1196         private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) {
1197             this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1, EVEXTuple.INVALID, WIG);
1198         }
1199 
1200         private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) {
1201             this(opcode, pp, mmmmm, w, op, opReverse, assertion, EVEXTuple.INVALID, WIG);
1202         }
1203 
1204         private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex) {
1205             super(opcode, pp, mmmmm, w, op, assertion, evexTuple, wEvex);
1206             this.opReverse = opReverse;
1207         }
1208 
1209         public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src) {
1210             assert assertion.check((AMD64) asm.target.arch, size, src, null, null);
1211             boolean useEvex = asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, wEvex, false);
1212             asm.emitByte(opReverse);
1213             asm.emitOperandHelper(src, dst, 0, getDisp8Scale(useEvex, size));
1214         }
1215 
1216         public void emitReverse(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1217             assert assertion.check((AMD64) asm.target.arch, size, src, null, dst);
1218             asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, wEvex, false);
1219             asm.emitByte(opReverse);
1220             asm.emitModRM(src, dst);
1221         }
1222     }
1223 
1224     public interface VexRRIOp {
1225         void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8);
1226     }
1227 
1228     /**
1229      * VEX-encoded instructions with an operand order of RMI.
1230      */
1231     public static final class VexRMIOp extends VexOp implements VexRRIOp {
1232         // @formatter:off
1233         public static final VexRMIOp VPERMQ   = new VexRMIOp("VPERMQ",   P_66, M_0F3A, W1,  0x00, VEXOpAssertion.AVX2_256ONLY);
1234         public static final VexRMIOp VPSHUFLW = new VexRMIOp("VPSHUFLW", P_F2, M_0F,   WIG, 0x70, VEXOpAssertion.AVX1_2);
1235         public static final VexRMIOp VPSHUFHW = new VexRMIOp("VPSHUFHW", P_F3, M_0F,   WIG, 0x70, VEXOpAssertion.AVX1_2);
1236         public static final VexRMIOp VPSHUFD  = new VexRMIOp("VPSHUFD",  P_66, M_0F,   WIG, 0x70, VEXOpAssertion.AVX1_2);
1237         // @formatter:on
1238 
1239         private VexRMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1240             super(opcode, pp, mmmmm, w, op, assertion);
1241         }
1242 
1243         @Override
1244         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1245             assert assertion.check((AMD64) asm.target.arch, size, dst, null, src);
1246             asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, wEvex, false);
1247             asm.emitByte(op);
1248             asm.emitModRM(dst, src);
1249             asm.emitByte(imm8);
1250         }
1251 
1252         public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src, int imm8) {
1253             assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1254             boolean useEvex = asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, wEvex, false);
1255             asm.emitByte(op);
1256             asm.emitOperandHelper(dst, src, 1, getDisp8Scale(useEvex, size));
1257             asm.emitByte(imm8);
1258         }
1259     }
1260 
1261     /**
1262      * VEX-encoded instructions with an operand order of MRI.
1263      */
1264     public static final class VexMRIOp extends VexOp implements VexRRIOp {
1265         // @formatter:off
1266         public static final VexMRIOp VEXTRACTF128 = new VexMRIOp("VEXTRACTF128", P_66, M_0F3A, W0, 0x19, VEXOpAssertion.AVX1_256ONLY);
1267         public static final VexMRIOp VEXTRACTI128 = new VexMRIOp("VEXTRACTI128", P_66, M_0F3A, W0, 0x39, VEXOpAssertion.AVX2_256ONLY);
1268         public static final VexMRIOp VPEXTRB      = new VexMRIOp("VPEXTRB",      P_66, M_0F3A, W0, 0x14, VEXOpAssertion.XMM_CPU);
1269         public static final VexMRIOp VPEXTRW      = new VexMRIOp("VPEXTRW",      P_66, M_0F3A, W0, 0x15, VEXOpAssertion.XMM_CPU);
1270         public static final VexMRIOp VPEXTRD      = new VexMRIOp("VPEXTRD",      P_66, M_0F3A, W0, 0x16, VEXOpAssertion.XMM_CPU);
1271         public static final VexMRIOp VPEXTRQ      = new VexMRIOp("VPEXTRQ",      P_66, M_0F3A, W1, 0x16, VEXOpAssertion.XMM_CPU);
1272         // @formatter:on
1273 
1274         private VexMRIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1275             super(opcode, pp, mmmmm, w, op, assertion);
1276         }
1277 
1278         @Override
1279         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1280             assert assertion.check((AMD64) asm.target.arch, size, src, null, dst);
1281             asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, wEvex, false);
1282             asm.emitByte(op);
1283             asm.emitModRM(src, dst);
1284             asm.emitByte(imm8);
1285         }
1286 
1287         public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src, int imm8) {
1288             assert assertion.check((AMD64) asm.target.arch, size, src, null, null);
1289             boolean useEvex = asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, wEvex, false);
1290             asm.emitByte(op);
1291             asm.emitOperandHelper(src, dst, 1, getDisp8Scale(useEvex, size));
1292             asm.emitByte(imm8);
1293         }
1294     }
1295 
1296     /**
1297      * VEX-encoded instructions with an operand order of RVMR.
1298      */
1299     public static class VexRVMROp extends VexOp {
1300         // @formatter:off
1301         public static final VexRVMROp VPBLENDVB  = new VexRVMROp("VPBLENDVB",  P_66, M_0F3A, W0, 0x4C, VEXOpAssertion.AVX1_2);
1302         public static final VexRVMROp VPBLENDVPS = new VexRVMROp("VPBLENDVPS", P_66, M_0F3A, W0, 0x4A, VEXOpAssertion.AVX1);
1303         public static final VexRVMROp VPBLENDVPD = new VexRVMROp("VPBLENDVPD", P_66, M_0F3A, W0, 0x4B, VEXOpAssertion.AVX1);
1304         // @formatter:on
1305 
1306         protected VexRVMROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1307             super(opcode, pp, mmmmm, w, op, assertion);
1308         }
1309 
1310         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, Register src2) {
1311             assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, src2);
1312             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false);
1313             asm.emitByte(op);
1314             asm.emitModRM(dst, src2);
1315             asm.emitByte(mask.encoding() << 4);
1316         }
1317 
1318         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, AMD64Address src2) {
1319             assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, null);
1320             boolean useEvex = asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false);
1321             asm.emitByte(op);
1322             asm.emitOperandHelper(dst, src2, 0, getDisp8Scale(useEvex, size));
1323             asm.emitByte(mask.encoding() << 4);
1324         }
1325     }
1326 
1327     /**
1328      * VEX-encoded instructions with an operand order of RVM.
1329      */
1330     public static class VexRVMOp extends VexOp {
1331         // @formatter:off
1332         public static final VexRVMOp VANDPS    = new VexRVMOp("VANDPS",    P_,   M_0F,   WIG, 0x54);
1333         public static final VexRVMOp VANDPD    = new VexRVMOp("VANDPD",    P_66, M_0F,   WIG, 0x54);
1334         public static final VexRVMOp VANDNPS   = new VexRVMOp("VANDNPS",   P_,   M_0F,   WIG, 0x55);
1335         public static final VexRVMOp VANDNPD   = new VexRVMOp("VANDNPD",   P_66, M_0F,   WIG, 0x55);
1336         public static final VexRVMOp VORPS     = new VexRVMOp("VORPS",     P_,   M_0F,   WIG, 0x56);
1337         public static final VexRVMOp VORPD     = new VexRVMOp("VORPD",     P_66, M_0F,   WIG, 0x56);
1338         public static final VexRVMOp VXORPS    = new VexRVMOp("VXORPS",    P_,   M_0F,   WIG, 0x57);
1339         public static final VexRVMOp VXORPD    = new VexRVMOp("VXORPD",    P_66, M_0F,   WIG, 0x57);
1340         public static final VexRVMOp VADDPS    = new VexRVMOp("VADDPS",    P_,   M_0F,   WIG, 0x58);
1341         public static final VexRVMOp VADDPD    = new VexRVMOp("VADDPD",    P_66, M_0F,   WIG, 0x58);
1342         public static final VexRVMOp VADDSS    = new VexRVMOp("VADDSS",    P_F3, M_0F,   WIG, 0x58);


1390         public static final VexRVMOp VPCMPEQD  = new VexRVMOp("VPCMPEQD",  P_66, M_0F,   WIG, 0x76, VEXOpAssertion.AVX1_2);
1391         public static final VexRVMOp VPCMPEQQ  = new VexRVMOp("VPCMPEQQ",  P_66, M_0F38, WIG, 0x29, VEXOpAssertion.AVX1_2);
1392         public static final VexRVMOp VPCMPGTB  = new VexRVMOp("VPCMPGTB",  P_66, M_0F,   WIG, 0x64, VEXOpAssertion.AVX1_2);
1393         public static final VexRVMOp VPCMPGTW  = new VexRVMOp("VPCMPGTW",  P_66, M_0F,   WIG, 0x65, VEXOpAssertion.AVX1_2);
1394         public static final VexRVMOp VPCMPGTD  = new VexRVMOp("VPCMPGTD",  P_66, M_0F,   WIG, 0x66, VEXOpAssertion.AVX1_2);
1395         public static final VexRVMOp VPCMPGTQ  = new VexRVMOp("VPCMPGTQ",  P_66, M_0F38, WIG, 0x37, VEXOpAssertion.AVX1_2);
1396         public static final VexRVMOp VFMADD231SS = new VexRVMOp("VFMADD231SS", P_66, M_0F38, W0, 0xB9, VEXOpAssertion.FMA);
1397         public static final VexRVMOp VFMADD231SD = new VexRVMOp("VFMADD231SD", P_66, M_0F38, W1, 0xB9, VEXOpAssertion.FMA);
1398         // @formatter:on
1399 
1400         private VexRVMOp(String opcode, int pp, int mmmmm, int w, int op) {
1401             this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1402         }
1403 
1404         protected VexRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1405             super(opcode, pp, mmmmm, w, op, assertion);
1406         }
1407 
1408         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1409             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1410             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false);
1411             asm.emitByte(op);
1412             asm.emitModRM(dst, src2);
1413         }
1414 
1415         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) {
1416             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1417             boolean useEvex = asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false);
1418             asm.emitByte(op);
1419             asm.emitOperandHelper(dst, src2, 0, getDisp8Scale(useEvex, size));
1420         }
1421     }
1422 
1423     public static final class VexGeneralPurposeRVMOp extends VexRVMOp {
1424         // @formatter:off
1425         public static final VexGeneralPurposeRVMOp ANDN   = new VexGeneralPurposeRVMOp("ANDN",   P_,   M_0F38, WIG, 0xF2, VEXOpAssertion.BMI1);
1426         public static final VexGeneralPurposeRVMOp MULX   = new VexGeneralPurposeRVMOp("MULX",   P_F2, M_0F38, WIG, 0xF6, VEXOpAssertion.BMI2);
1427         public static final VexGeneralPurposeRVMOp PDEP   = new VexGeneralPurposeRVMOp("PDEP",   P_F2, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1428         public static final VexGeneralPurposeRVMOp PEXT   = new VexGeneralPurposeRVMOp("PEXT",   P_F3, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1429         // @formatter:on
1430 
1431         private VexGeneralPurposeRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1432             super(opcode, pp, mmmmm, w, op, assertion);
1433         }
1434 
1435         @Override
1436         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1437             assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, src2, null);
1438             assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1439             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false);
1440             asm.emitByte(op);
1441             asm.emitModRM(dst, src2);
1442         }
1443 
1444         @Override
1445         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) {
1446             assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, null, null);
1447             assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1448             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false);
1449             asm.emitByte(op);
1450             asm.emitOperandHelper(dst, src2, 0);
1451         }
1452     }
1453 
1454     public static final class VexGeneralPurposeRMVOp extends VexOp {
1455         // @formatter:off
1456         public static final VexGeneralPurposeRMVOp BEXTR  = new VexGeneralPurposeRMVOp("BEXTR",  P_,   M_0F38, WIG, 0xF7, VEXOpAssertion.BMI1);
1457         public static final VexGeneralPurposeRMVOp BZHI   = new VexGeneralPurposeRMVOp("BZHI",   P_,   M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1458         public static final VexGeneralPurposeRMVOp SARX   = new VexGeneralPurposeRMVOp("SARX",   P_F3, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1459         public static final VexGeneralPurposeRMVOp SHRX   = new VexGeneralPurposeRMVOp("SHRX",   P_F2, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1460         public static final VexGeneralPurposeRMVOp SHLX   = new VexGeneralPurposeRMVOp("SHLX",   P_66, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1461         // @formatter:on
1462 
1463         private VexGeneralPurposeRMVOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1464             super(opcode, pp, mmmmm, w, op, assertion);
1465         }
1466 
1467         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1468             assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, src1, null);
1469             assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1470             asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false);
1471             asm.emitByte(op);
1472             asm.emitModRM(dst, src1);
1473         }
1474 
1475         public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src1, Register src2) {
1476             assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, null, null);
1477             assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1478             asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false);
1479             asm.emitByte(op);
1480             asm.emitOperandHelper(dst, src1, 0);
1481         }
1482     }
1483 
1484     public static final class VexGeneralPurposeRMOp extends VexRMOp {
1485         // @formatter:off
1486         public static final VexGeneralPurposeRMOp BLSI    = new VexGeneralPurposeRMOp("BLSI",   P_,    M_0F38, WIG, 0xF3, 3, VEXOpAssertion.BMI1);
1487         public static final VexGeneralPurposeRMOp BLSMSK  = new VexGeneralPurposeRMOp("BLSMSK", P_,    M_0F38, WIG, 0xF3, 2, VEXOpAssertion.BMI1);
1488         public static final VexGeneralPurposeRMOp BLSR    = new VexGeneralPurposeRMOp("BLSR",   P_,    M_0F38, WIG, 0xF3, 1, VEXOpAssertion.BMI1);
1489         // @formatter:on
1490         private final int ext;
1491 
1492         private VexGeneralPurposeRMOp(String opcode, int pp, int mmmmm, int w, int op, int ext, VEXOpAssertion assertion) {
1493             super(opcode, pp, mmmmm, w, op, assertion);
1494             this.ext = ext;
1495         }
1496 
1497         @Override
1498         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1499             assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1500             asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false);
1501             asm.emitByte(op);
1502             asm.emitModRM(ext, src);
1503         }
1504 
1505         @Override
1506         public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) {
1507             assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1508             asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false);
1509             asm.emitByte(op);
1510             asm.emitOperandHelper(ext, src, 0);
1511         }
1512     }
1513 
1514     /**
1515      * VEX-encoded shift instructions with an operand order of either RVM or VMI.
1516      */
1517     public static final class VexShiftOp extends VexRVMOp implements VexRRIOp {
1518         // @formatter:off
1519         public static final VexShiftOp VPSRLW = new VexShiftOp("VPSRLW", P_66, M_0F, WIG, 0xD1, 0x71, 2);
1520         public static final VexShiftOp VPSRLD = new VexShiftOp("VPSRLD", P_66, M_0F, WIG, 0xD2, 0x72, 2);
1521         public static final VexShiftOp VPSRLQ = new VexShiftOp("VPSRLQ", P_66, M_0F, WIG, 0xD3, 0x73, 2);
1522         public static final VexShiftOp VPSRAW = new VexShiftOp("VPSRAW", P_66, M_0F, WIG, 0xE1, 0x71, 4);
1523         public static final VexShiftOp VPSRAD = new VexShiftOp("VPSRAD", P_66, M_0F, WIG, 0xE2, 0x72, 4);
1524         public static final VexShiftOp VPSLLW = new VexShiftOp("VPSLLW", P_66, M_0F, WIG, 0xF1, 0x71, 6);
1525         public static final VexShiftOp VPSLLD = new VexShiftOp("VPSLLD", P_66, M_0F, WIG, 0xF2, 0x72, 6);
1526         public static final VexShiftOp VPSLLQ = new VexShiftOp("VPSLLQ", P_66, M_0F, WIG, 0xF3, 0x73, 6);
1527         // @formatter:on
1528 
1529         private final int immOp;
1530         private final int r;
1531 
1532         private VexShiftOp(String opcode, int pp, int mmmmm, int w, int op, int immOp, int r) {
1533             super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1_2);
1534             this.immOp = immOp;
1535             this.r = r;
1536         }
1537 
1538         @Override
1539         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1540             assert assertion.check((AMD64) asm.target.arch, size, null, dst, src);
1541             asm.vexPrefix(null, dst, src, size, pp, mmmmm, w, wEvex, false);
1542             asm.emitByte(immOp);
1543             asm.emitModRM(r, src);
1544             asm.emitByte(imm8);
1545         }
1546     }
1547 
1548     public static final class VexMaskMoveOp extends VexOp {
1549         // @formatter:off
1550         public static final VexMaskMoveOp VMASKMOVPS = new VexMaskMoveOp("VMASKMOVPS", P_66, M_0F38, W0, 0x2C, 0x2E);
1551         public static final VexMaskMoveOp VMASKMOVPD = new VexMaskMoveOp("VMASKMOVPD", P_66, M_0F38, W0, 0x2D, 0x2F);
1552         public static final VexMaskMoveOp VPMASKMOVD = new VexMaskMoveOp("VPMASKMOVD", P_66, M_0F38, W0, 0x8C, 0x8E, VEXOpAssertion.AVX2);
1553         public static final VexMaskMoveOp VPMASKMOVQ = new VexMaskMoveOp("VPMASKMOVQ", P_66, M_0F38, W1, 0x8C, 0x8E, VEXOpAssertion.AVX2);
1554         // @formatter:on
1555 
1556         private final int opReverse;
1557 
1558         private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) {
1559             this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1);
1560         }
1561 
1562         private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) {
1563             super(opcode, pp, mmmmm, w, op, assertion);
1564             this.opReverse = opReverse;
1565         }
1566 
1567         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, AMD64Address src) {
1568             assert assertion.check((AMD64) asm.target.arch, size, dst, mask, null);
1569             asm.vexPrefix(dst, mask, src, size, pp, mmmmm, w, wEvex, false);
1570             asm.emitByte(op);
1571             asm.emitOperandHelper(dst, src, 0);
1572         }
1573 
1574         public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register mask, Register src) {
1575             assert assertion.check((AMD64) asm.target.arch, size, src, mask, null);
1576             boolean useEvex = asm.vexPrefix(src, mask, dst, size, pp, mmmmm, w, wEvex, false);
1577             asm.emitByte(opReverse);
1578             asm.emitOperandHelper(src, dst, 0, getDisp8Scale(useEvex, size));
1579         }
1580     }
1581 
1582     /**
1583      * VEX-encoded instructions with an operand order of RVMI.
1584      */
1585     public static final class VexRVMIOp extends VexOp {
1586         // @formatter:off
1587         public static final VexRVMIOp VSHUFPS     = new VexRVMIOp("VSHUFPS",     P_,   M_0F,   WIG, 0xC6);
1588         public static final VexRVMIOp VSHUFPD     = new VexRVMIOp("VSHUFPD",     P_66, M_0F,   WIG, 0xC6);
1589         public static final VexRVMIOp VINSERTF128 = new VexRVMIOp("VINSERTF128", P_66, M_0F3A, W0,  0x18, VEXOpAssertion.AVX1_256ONLY);
1590         public static final VexRVMIOp VINSERTI128 = new VexRVMIOp("VINSERTI128", P_66, M_0F3A, W0,  0x38, VEXOpAssertion.AVX2_256ONLY);
1591         // @formatter:on
1592 
1593         private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op) {
1594             this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1595         }
1596 
1597         private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1598             super(opcode, pp, mmmmm, w, op, assertion);
1599         }
1600 
1601         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, int imm8) {
1602             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1603             assert (imm8 & 0xFF) == imm8;
1604             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false);
1605             asm.emitByte(op);
1606             asm.emitModRM(dst, src2);
1607             asm.emitByte(imm8);
1608         }
1609 
1610         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, int imm8) {
1611             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1612             assert (imm8 & 0xFF) == imm8;
1613             boolean useEvex = asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false);
1614             asm.emitByte(op);
1615             asm.emitOperandHelper(dst, src2, 1, getDisp8Scale(useEvex, size));
1616             asm.emitByte(imm8);
1617         }
1618     }
1619 
1620     /**
1621      * VEX-encoded comparison operation with an operand order of RVMI. The immediate operand is a
1622      * comparison operator.
1623      */
1624     public static final class VexFloatCompareOp extends VexOp {
1625         // @formatter:off
1626         public static final VexFloatCompareOp VCMPPS = new VexFloatCompareOp("VCMPPS", P_,   M_0F, WIG, 0xC2);
1627         public static final VexFloatCompareOp VCMPPD = new VexFloatCompareOp("VCMPPD", P_66, M_0F, WIG, 0xC2);
1628         public static final VexFloatCompareOp VCMPSS = new VexFloatCompareOp("VCMPSS", P_F2, M_0F, WIG, 0xC2);
1629         public static final VexFloatCompareOp VCMPSD = new VexFloatCompareOp("VCMPSD", P_F2, M_0F, WIG, 0xC2);
1630         // @formatter:on
1631 
1632         public enum Predicate {
1633             EQ_OQ(0x00),
1634             LT_OS(0x01),
1635             LE_OS(0x02),


1697                             return LT_OQ;
1698                         case LE:
1699                             return LE_OQ;
1700                         case GT:
1701                             return GT_OQ;
1702                         case GE:
1703                             return GE_OQ;
1704                         default:
1705                             throw GraalError.shouldNotReachHere();
1706                     }
1707                 }
1708             }
1709         }
1710 
1711         private VexFloatCompareOp(String opcode, int pp, int mmmmm, int w, int op) {
1712             super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1713         }
1714 
1715         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, Predicate p) {
1716             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1717             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false);
1718             asm.emitByte(op);
1719             asm.emitModRM(dst, src2);
1720             asm.emitByte(p.imm8);
1721         }
1722 
1723         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, Predicate p) {
1724             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1725             boolean useEvex = asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false);
1726             asm.emitByte(op);
1727             asm.emitOperandHelper(dst, src2, 1, getDisp8Scale(useEvex, size));
1728             asm.emitByte(p.imm8);
1729         }
1730     }
1731 
1732     public final void addl(AMD64Address dst, int imm32) {
1733         ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1734     }
1735 
1736     public final void addl(Register dst, int imm32) {
1737         ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1738     }
1739 
1740     public final void addl(Register dst, Register src) {
1741         ADD.rmOp.emit(this, DWORD, dst, src);
1742     }
1743 
1744     public final void addpd(Register dst, Register src) {
1745         SSEOp.ADD.emit(this, PD, dst, src);
1746     }
1747 


3814 
3815     public void lfence() {
3816         emitByte(0x0f);
3817         emitByte(0xae);
3818         emitByte(0xe8);
3819     }
3820 
3821     public final void vptest(Register dst, Register src) {
3822         VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src);
3823     }
3824 
3825     public final void vpxor(Register dst, Register nds, Register src) {
3826         VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
3827     }
3828 
3829     public final void vpxor(Register dst, Register nds, AMD64Address src) {
3830         VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
3831     }
3832 
3833     public final void vmovdqu(Register dst, AMD64Address src) {
3834         VexMoveOp.VMOVDQU32.emit(this, AVXSize.YMM, dst, src);
3835     }
3836 
3837     public final void vmovdqu(AMD64Address dst, Register src) {
3838         assert inRC(XMM, src);
3839         VexMoveOp.VMOVDQU32.emit(this, AVXSize.YMM, dst, src);
3840     }
3841 
3842     public final void vpmovzxbw(Register dst, AMD64Address src) {
3843         assert supports(CPUFeature.AVX2);
3844         VexRMOp.VPMOVZXBW.emit(this, AVXSize.YMM, dst, src);
3845     }
3846 
3847     public final void vzeroupper() {
3848         emitVEX(L128, P_, M_0F, W0, 0, 0, true);
3849         emitByte(0x77);
3850     }
3851 
3852     // Insn: KORTESTD k1, k2
3853 
3854     // This instruction produces ZF or CF flags
3855     public final void kortestd(Register src1, Register src2) {
3856         assert supports(CPUFeature.AVX512BW);
3857         assert inRC(MASK, src1) && inRC(MASK, src2);
3858         // Code: VEX.L0.66.0F.W1 98 /r
3859         vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, W1, true);
3860         emitByte(0x98);
3861         emitModRM(src1, src2);
3862     }
3863 
3864     // Insn: KORTESTQ k1, k2
3865 
3866     // This instruction produces ZF or CF flags
3867     public final void kortestq(Register src1, Register src2) {
3868         assert supports(CPUFeature.AVX512BW);
3869         assert inRC(MASK, src1) && inRC(MASK, src2);
3870         // Code: VEX.L0.0F.W1 98 /r
3871         vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_, M_0F, W1, W1, true);
3872         emitByte(0x98);
3873         emitModRM(src1, src2);
3874     }
3875 
3876     public final void kmovd(Register dst, Register src) {
3877         assert supports(CPUFeature.AVX512BW);
3878         assert inRC(MASK, dst) || inRC(CPU, dst);
3879         assert inRC(MASK, src) || inRC(CPU, src);
3880         assert !(inRC(CPU, dst) && inRC(CPU, src));
3881 
3882         if (inRC(MASK, dst)) {
3883             if (inRC(MASK, src)) {
3884                 // kmovd(KRegister dst, KRegister src):
3885                 // Insn: KMOVD k1, k2/m32
3886                 // Code: VEX.L0.66.0F.W1 90 /r
3887                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_66, M_0F, W1, W1, true);
3888                 emitByte(0x90);
3889                 emitModRM(dst, src);
3890             } else {
3891                 // kmovd(KRegister dst, Register src)
3892                 // Insn: KMOVD k1, r32
3893                 // Code: VEX.L0.F2.0F.W0 92 /r
3894                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, W0, true);
3895                 emitByte(0x92);
3896                 emitModRM(dst, src);
3897             }
3898         } else {
3899             if (inRC(MASK, src)) {
3900                 // kmovd(Register dst, KRegister src)
3901                 // Insn: KMOVD r32, k1
3902                 // Code: VEX.L0.F2.0F.W0 93 /r
3903                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, W0, true);
3904                 emitByte(0x93);
3905                 emitModRM(dst, src);
3906             } else {
3907                 throw GraalError.shouldNotReachHere();
3908             }
3909         }
3910     }
3911 
3912     public final void kmovq(Register dst, Register src) {
3913         assert supports(CPUFeature.AVX512BW);
3914         assert inRC(MASK, dst) || inRC(CPU, dst);
3915         assert inRC(MASK, src) || inRC(CPU, src);
3916         assert !(inRC(CPU, dst) && inRC(CPU, src));
3917 
3918         if (inRC(MASK, dst)) {
3919             if (inRC(MASK, src)) {
3920                 // kmovq(KRegister dst, KRegister src):
3921                 // Insn: KMOVQ k1, k2/m64
3922                 // Code: VEX.L0.0F.W1 90 /r
3923                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_, M_0F, W1, W1, true);
3924                 emitByte(0x90);
3925                 emitModRM(dst, src);
3926             } else {
3927                 // kmovq(KRegister dst, Register src)
3928                 // Insn: KMOVQ k1, r64
3929                 // Code: VEX.L0.F2.0F.W1 92 /r
3930                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, W1, true);
3931                 emitByte(0x92);
3932                 emitModRM(dst, src);
3933             }
3934         } else {
3935             if (inRC(MASK, src)) {
3936                 // kmovq(Register dst, KRegister src)
3937                 // Insn: KMOVQ r64, k1
3938                 // Code: VEX.L0.F2.0F.W1 93 /r
3939                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, W1, true);
3940                 emitByte(0x93);
3941                 emitModRM(dst, src);
3942             } else {
3943                 throw GraalError.shouldNotReachHere();
3944             }
3945         }
3946     }
3947 
3948     // Insn: KTESTD k1, k2
3949 
3950     public final void ktestd(Register src1, Register src2) {
3951         assert supports(CPUFeature.AVX512BW);
3952         assert inRC(MASK, src1) && inRC(MASK, src2);
3953         // Code: VEX.L0.66.0F.W1 99 /r
3954         vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, W1, true);
3955         emitByte(0x99);
3956         emitModRM(src1, src2);
3957     }
3958 
3959     public final void evmovdqu64(Register dst, AMD64Address src) {
3960         assert supports(CPUFeature.AVX512F);
3961         assert inRC(XMM, dst);
3962         evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F3, M_0F, W1, Z0, B0);
3963         emitByte(0x6F);
3964         emitOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3965     }
3966 
3967     // Insn: VPMOVZXBW zmm1, m256
3968 
3969     public final void evpmovzxbw(Register dst, AMD64Address src) {
3970         assert supports(CPUFeature.AVX512BW);
3971         assert inRC(XMM, dst);
3972         // Code: EVEX.512.66.0F38.WIG 30 /r
3973         evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0);
3974         emitByte(0x30);
3975         emitOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
3976     }
3977 
3978     public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src) {
3979         assert supports(CPUFeature.AVX512BW);
3980         assert inRC(MASK, kdst) && inRC(XMM, nds);
3981         evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F, WIG, Z0, B0);
3982         emitByte(0x74);
3983         emitOperandHelper(kdst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3984     }
3985 
3986     // Insn: VMOVDQU16 zmm1 {k1}{z}, zmm2/m512
3987     // -----
3988     // Insn: VMOVDQU16 zmm1, m512
3989 
3990     public final void evmovdqu16(Register dst, AMD64Address src) {
3991         assert supports(CPUFeature.AVX512BW);
3992         assert inRC(XMM, dst);
3993         // Code: EVEX.512.F2.0F.W1 6F /r
3994         evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
3995         emitByte(0x6F);
3996         emitOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3997     }
3998 
3999     // Insn: VMOVDQU16 zmm1, k1:z, m512
4000 
4001     public final void evmovdqu16(Register dst, Register mask, AMD64Address src) {
4002         assert supports(CPUFeature.AVX512BW);
4003         assert inRC(XMM, dst) && inRC(MASK, mask);
4004         // Code: EVEX.512.F2.0F.W1 6F /r
4005         evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z1, B0);
4006         emitByte(0x6F);
4007         emitOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
4008     }
4009 
4010     // Insn: VMOVDQU16 zmm2/m512 {k1}{z}, zmm1
4011     // -----
4012     // Insn: VMOVDQU16 m512, zmm1
4013 
4014     public final void evmovdqu16(AMD64Address dst, Register src) {
4015         assert supports(CPUFeature.AVX512BW);
4016         assert inRC(XMM, src);
4017         // Code: EVEX.512.F2.0F.W1 7F /r
4018         evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
4019         emitByte(0x7F);
4020         emitOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
4021     }
4022 
4023     // Insn: VMOVDQU16 m512, k1, zmm1
4024 
4025     public final void evmovdqu16(AMD64Address dst, Register mask, Register src) {
4026         assert supports(CPUFeature.AVX512BW);
4027         assert inRC(MASK, mask) && inRC(XMM, src);
4028         // Code: EVEX.512.F2.0F.W1 7F /r
4029         evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
4030         emitByte(0x7F);
4031         emitOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
4032     }
4033 
4034     // Insn: VPBROADCASTW zmm1 {k1}{z}, reg
4035     // -----
4036     // Insn: VPBROADCASTW zmm1, reg
4037 
4038     public final void evpbroadcastw(Register dst, Register src) {
4039         assert supports(CPUFeature.AVX512BW);
4040         assert inRC(XMM, dst) && inRC(CPU, src);
4041         // Code: EVEX.512.66.0F38.W0 7B /r
4042         evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, W0, Z0, B0);
4043         emitByte(0x7B);
4044         emitModRM(dst, src);
4045     }
4046 
4047     // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8
4048     // -----
4049     // Insn: VPCMPUW k1, zmm2, zmm3, imm8
4050 
4051     public final void evpcmpuw(Register kdst, Register nds, Register src, int vcc) {


4066         assert supports(CPUFeature.AVX512BW);
4067         assert inRC(MASK, kdst) && inRC(MASK, mask);
4068         assert inRC(XMM, nds) && inRC(XMM, src);
4069         // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib
4070         evexPrefix(kdst, mask, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0);
4071         emitByte(0x3E);
4072         emitModRM(kdst, src);
4073         emitByte(vcc);
4074     }
4075 
4076     // Insn: VPMOVWB ymm1/m256 {k1}{z}, zmm2
4077     // -----
4078     // Insn: VPMOVWB m256, zmm2
4079 
4080     public final void evpmovwb(AMD64Address dst, Register src) {
4081         assert supports(CPUFeature.AVX512BW);
4082         assert inRC(XMM, src);
4083         // Code: EVEX.512.F3.0F38.W0 30 /r
4084         evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0);
4085         emitByte(0x30);
4086         emitOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
4087     }
4088 
4089     // Insn: VPMOVWB m256, k1, zmm2
4090 
4091     public final void evpmovwb(AMD64Address dst, Register mask, Register src) {
4092         assert supports(CPUFeature.AVX512BW);
4093         assert inRC(MASK, mask) && inRC(XMM, src);
4094         // Code: EVEX.512.F3.0F38.W0 30 /r
4095         evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0);
4096         emitByte(0x30);
4097         emitOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
4098     }
4099 
4100     // Insn: VPMOVZXBW zmm1 {k1}{z}, ymm2/m256
4101     // -----
4102     // Insn: VPMOVZXBW zmm1, k1, m256
4103 
4104     public final void evpmovzxbw(Register dst, Register mask, AMD64Address src) {
4105         assert supports(CPUFeature.AVX512BW);
4106         assert inRC(MASK, mask) && inRC(XMM, dst);
4107         // Code: EVEX.512.66.0F38.WIG 30 /r
4108         evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0);
4109         emitByte(0x30);
4110         emitOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
4111     }
4112 
4113 }
< prev index next >