< prev index next >

src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java

Print this page
rev 52509 : [mq]: graal


  26 
  27 import static jdk.vm.ci.amd64.AMD64.CPU;
  28 import static jdk.vm.ci.amd64.AMD64.MASK;
  29 import static jdk.vm.ci.amd64.AMD64.XMM;
  30 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD;
  31 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop;
  32 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop;
  33 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD;
  34 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND;
  35 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP;
  36 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR;
  37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB;
  38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB;
  39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR;
  40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC;
  41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC;
  42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG;
  43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT;
  44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0;
  45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0;

  46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.BYTE;
  47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.DWORD;
  48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PD;
  49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PS;
  50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.QWORD;
  51 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SD;
  52 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SS;
  53 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.WORD;
  54 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128;
  55 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256;
  56 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ;
  57 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F;
  58 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38;
  59 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A;
  60 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_;
  61 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66;
  62 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2;
  63 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3;
  64 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0;
  65 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1;


1006 
1007     /**
1008      * VEX-encoded instructions with an operand order of RM, but the M operand must be a register.
1009      */
1010     public static class VexRROp extends VexOp {
1011         // @formatter:off
1012         public static final VexRROp VMASKMOVDQU = new VexRROp("VMASKMOVDQU", P_66, M_0F, WIG, 0xF7, VEXOpAssertion.AVX1_128ONLY);
1013         // @formatter:on
1014 
1015         protected VexRROp(String opcode, int pp, int mmmmm, int w, int op) {
1016             this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1017         }
1018 
1019         protected VexRROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1020             super(opcode, pp, mmmmm, w, op, assertion);
1021         }
1022 
1023         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1024             assert assertion.check((AMD64) asm.target.arch, size, dst, null, src);
1025             assert op != 0x1A || op != 0x5A;
1026             asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w);
1027             asm.emitByte(op);
1028             asm.emitModRM(dst, src);
1029         }
1030     }
1031 
1032     /**
1033      * VEX-encoded instructions with an operand order of RM.
1034      */
1035     public static class VexRMOp extends VexRROp {
1036         // @formatter:off
1037         public static final VexRMOp VCVTTSS2SI      = new VexRMOp("VCVTTSS2SI",      P_F3, M_0F,   W0,  0x2C, VEXOpAssertion.CPU_XMM);
1038         public static final VexRMOp VCVTTSS2SQ      = new VexRMOp("VCVTTSS2SQ",      P_F3, M_0F,   W1,  0x2C, VEXOpAssertion.CPU_XMM);
1039         public static final VexRMOp VCVTTSD2SI      = new VexRMOp("VCVTTSD2SI",      P_F2, M_0F,   W0,  0x2C, VEXOpAssertion.CPU_XMM);
1040         public static final VexRMOp VCVTTSD2SQ      = new VexRMOp("VCVTTSD2SQ",      P_F2, M_0F,   W1,  0x2C, VEXOpAssertion.CPU_XMM);
1041         public static final VexRMOp VCVTPS2PD       = new VexRMOp("VCVTPS2PD",       P_,   M_0F,   WIG, 0x5A);
1042         public static final VexRMOp VCVTPD2PS       = new VexRMOp("VCVTPD2PS",       P_66, M_0F,   WIG, 0x5A);
1043         public static final VexRMOp VCVTDQ2PS       = new VexRMOp("VCVTDQ2PS",       P_,   M_0F,   WIG, 0x5B);
1044         public static final VexRMOp VCVTTPS2DQ      = new VexRMOp("VCVTTPS2DQ",      P_F3, M_0F,   WIG, 0x5B);
1045         public static final VexRMOp VCVTTPD2DQ      = new VexRMOp("VCVTTPD2DQ",      P_66, M_0F,   WIG, 0xE6);
1046         public static final VexRMOp VCVTDQ2PD       = new VexRMOp("VCVTDQ2PD",       P_F3, M_0F,   WIG, 0xE6);


1067         public static final VexRMOp VPMOVZXDQ       = new VexRMOp("VPMOVZXDQ",       P_66, M_0F38, WIG, 0x35);
1068         public static final VexRMOp VPTEST          = new VexRMOp("VPTEST",          P_66, M_0F38, WIG, 0x17);
1069         public static final VexRMOp VSQRTPD         = new VexRMOp("VSQRTPD",         P_66, M_0F,   WIG, 0x51);
1070         public static final VexRMOp VSQRTPS         = new VexRMOp("VSQRTPS",         P_,   M_0F,   WIG, 0x51);
1071         public static final VexRMOp VSQRTSD         = new VexRMOp("VSQRTSD",         P_F2, M_0F,   WIG, 0x51);
1072         public static final VexRMOp VSQRTSS         = new VexRMOp("VSQRTSS",         P_F3, M_0F,   WIG, 0x51);
1073         public static final VexRMOp VUCOMISS        = new VexRMOp("VUCOMISS",        P_,   M_0F,   WIG, 0x2E);
1074         public static final VexRMOp VUCOMISD        = new VexRMOp("VUCOMISD",        P_66, M_0F,   WIG, 0x2E);
1075         // @formatter:on
1076 
1077         protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op) {
1078             this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1079         }
1080 
1081         protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1082             super(opcode, pp, mmmmm, w, op, assertion);
1083         }
1084 
1085         public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) {
1086             assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1087             asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w);
1088             asm.emitByte(op);
1089             asm.emitOperandHelper(dst, src, 0);
1090         }
1091     }
1092 
1093     /**
1094      * VEX-encoded move instructions.
1095      * <p>
1096      * These instructions have two opcodes: op is the forward move instruction with an operand order
1097      * of RM, and opReverse is the reverse move instruction with an operand order of MR.
1098      */
1099     public static final class VexMoveOp extends VexRMOp {
1100         // @formatter:off
1101         public static final VexMoveOp VMOVDQA = new VexMoveOp("VMOVDQA", P_66, M_0F, WIG, 0x6F, 0x7F);
1102         public static final VexMoveOp VMOVDQU = new VexMoveOp("VMOVDQU", P_F3, M_0F, WIG, 0x6F, 0x7F);
1103         public static final VexMoveOp VMOVAPS = new VexMoveOp("VMOVAPS", P_,   M_0F, WIG, 0x28, 0x29);
1104         public static final VexMoveOp VMOVAPD = new VexMoveOp("VMOVAPD", P_66, M_0F, WIG, 0x28, 0x29);
1105         public static final VexMoveOp VMOVUPS = new VexMoveOp("VMOVUPS", P_,   M_0F, WIG, 0x10, 0x11);
1106         public static final VexMoveOp VMOVUPD = new VexMoveOp("VMOVUPD", P_66, M_0F, WIG, 0x10, 0x11);
1107         public static final VexMoveOp VMOVSS  = new VexMoveOp("VMOVSS",  P_F3, M_0F, WIG, 0x10, 0x11);
1108         public static final VexMoveOp VMOVSD  = new VexMoveOp("VMOVSD",  P_F2, M_0F, WIG, 0x10, 0x11);
1109         public static final VexMoveOp VMOVD   = new VexMoveOp("VMOVD",   P_66, M_0F, W0,  0x6E, 0x7E, VEXOpAssertion.XMM_CPU);
1110         public static final VexMoveOp VMOVQ   = new VexMoveOp("VMOVQ",   P_66, M_0F, W1,  0x6E, 0x7E, VEXOpAssertion.XMM_CPU);
1111         // @formatter:on
1112 
1113         private final int opReverse;
1114 
1115         private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) {
1116             this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1);
1117         }
1118 
1119         private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) {
1120             super(opcode, pp, mmmmm, w, op, assertion);
1121             this.opReverse = opReverse;
1122         }
1123 
1124         public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src) {
1125             assert assertion.check((AMD64) asm.target.arch, size, src, null, null);
1126             asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w);
1127             asm.emitByte(opReverse);
1128             asm.emitOperandHelper(src, dst, 0);
1129         }
1130 
1131         public void emitReverse(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1132             assert assertion.check((AMD64) asm.target.arch, size, src, null, dst);
1133             asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w);
1134             asm.emitByte(opReverse);
1135             asm.emitModRM(src, dst);
1136         }
1137     }
1138 
1139     public interface VexRRIOp {
1140         void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8);
1141     }
1142 
1143     /**
1144      * VEX-encoded instructions with an operand order of RMI.
1145      */
1146     public static final class VexRMIOp extends VexOp implements VexRRIOp {
1147         // @formatter:off
1148         public static final VexRMIOp VPERMQ   = new VexRMIOp("VPERMQ",   P_66, M_0F3A, W1,  0x00, VEXOpAssertion.AVX2_256ONLY);
1149         public static final VexRMIOp VPSHUFLW = new VexRMIOp("VPSHUFLW", P_F2, M_0F,   WIG, 0x70, VEXOpAssertion.AVX1_2);
1150         public static final VexRMIOp VPSHUFHW = new VexRMIOp("VPSHUFHW", P_F3, M_0F,   WIG, 0x70, VEXOpAssertion.AVX1_2);
1151         public static final VexRMIOp VPSHUFD  = new VexRMIOp("VPSHUFD",  P_66, M_0F,   WIG, 0x70, VEXOpAssertion.AVX1_2);
1152         // @formatter:on
1153 
1154         private VexRMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1155             super(opcode, pp, mmmmm, w, op, assertion);
1156         }
1157 
1158         @Override
1159         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1160             assert assertion.check((AMD64) asm.target.arch, size, dst, null, src);
1161             asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w);
1162             asm.emitByte(op);
1163             asm.emitModRM(dst, src);
1164             asm.emitByte(imm8);
1165         }
1166 
1167         public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src, int imm8) {
1168             assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1169             asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w);
1170             asm.emitByte(op);
1171             asm.emitOperandHelper(dst, src, 1);
1172             asm.emitByte(imm8);
1173         }
1174     }
1175 
1176     /**
1177      * VEX-encoded instructions with an operand order of MRI.
1178      */
1179     public static final class VexMRIOp extends VexOp implements VexRRIOp {
1180         // @formatter:off
1181         public static final VexMRIOp VEXTRACTF128 = new VexMRIOp("VEXTRACTF128", P_66, M_0F3A, W0, 0x19, VEXOpAssertion.AVX1_256ONLY);
1182         public static final VexMRIOp VEXTRACTI128 = new VexMRIOp("VEXTRACTI128", P_66, M_0F3A, W0, 0x39, VEXOpAssertion.AVX2_256ONLY);
1183         public static final VexMRIOp VPEXTRB      = new VexMRIOp("VPEXTRB",      P_66, M_0F3A, W0, 0x14, VEXOpAssertion.XMM_CPU);
1184         public static final VexMRIOp VPEXTRW      = new VexMRIOp("VPEXTRW",      P_66, M_0F3A, W0, 0x15, VEXOpAssertion.XMM_CPU);
1185         public static final VexMRIOp VPEXTRD      = new VexMRIOp("VPEXTRD",      P_66, M_0F3A, W0, 0x16, VEXOpAssertion.XMM_CPU);
1186         public static final VexMRIOp VPEXTRQ      = new VexMRIOp("VPEXTRQ",      P_66, M_0F3A, W1, 0x16, VEXOpAssertion.XMM_CPU);
1187         // @formatter:on
1188 
1189         private VexMRIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1190             super(opcode, pp, mmmmm, w, op, assertion);
1191         }
1192 
1193         @Override
1194         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1195             assert assertion.check((AMD64) asm.target.arch, size, src, null, dst);
1196             asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w);
1197             asm.emitByte(op);
1198             asm.emitModRM(src, dst);
1199             asm.emitByte(imm8);
1200         }
1201 
1202         public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src, int imm8) {
1203             assert assertion.check((AMD64) asm.target.arch, size, src, null, null);
1204             asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w);
1205             asm.emitByte(op);
1206             asm.emitOperandHelper(src, dst, 1);
1207             asm.emitByte(imm8);
1208         }
1209     }
1210 
1211     /**
1212      * VEX-encoded instructions with an operand order of RVMR.
1213      */
1214     public static class VexRVMROp extends VexOp {
1215         // @formatter:off
1216         public static final VexRVMROp VPBLENDVB  = new VexRVMROp("VPBLENDVB",  P_66, M_0F3A, W0, 0x4C, VEXOpAssertion.AVX1_2);
1217         public static final VexRVMROp VPBLENDVPS = new VexRVMROp("VPBLENDVPS", P_66, M_0F3A, W0, 0x4A, VEXOpAssertion.AVX1);
1218         public static final VexRVMROp VPBLENDVPD = new VexRVMROp("VPBLENDVPD", P_66, M_0F3A, W0, 0x4B, VEXOpAssertion.AVX1);
1219         // @formatter:on
1220 
1221         protected VexRVMROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1222             super(opcode, pp, mmmmm, w, op, assertion);
1223         }
1224 
1225         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, Register src2) {
1226             assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, src2);
1227             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w);
1228             asm.emitByte(op);
1229             asm.emitModRM(dst, src2);
1230             asm.emitByte(mask.encoding() << 4);
1231         }
1232 
1233         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, AMD64Address src2) {
1234             assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, null);
1235             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w);
1236             asm.emitByte(op);
1237             asm.emitOperandHelper(dst, src2, 0);
1238             asm.emitByte(mask.encoding() << 4);
1239         }
1240     }
1241 
1242     /**
1243      * VEX-encoded instructions with an operand order of RVM.
1244      */
1245     public static class VexRVMOp extends VexOp {
1246         // @formatter:off
1247         public static final VexRVMOp VANDPS    = new VexRVMOp("VANDPS",    P_,   M_0F,   WIG, 0x54);
1248         public static final VexRVMOp VANDPD    = new VexRVMOp("VANDPD",    P_66, M_0F,   WIG, 0x54);
1249         public static final VexRVMOp VANDNPS   = new VexRVMOp("VANDNPS",   P_,   M_0F,   WIG, 0x55);
1250         public static final VexRVMOp VANDNPD   = new VexRVMOp("VANDNPD",   P_66, M_0F,   WIG, 0x55);
1251         public static final VexRVMOp VORPS     = new VexRVMOp("VORPS",     P_,   M_0F,   WIG, 0x56);
1252         public static final VexRVMOp VORPD     = new VexRVMOp("VORPD",     P_66, M_0F,   WIG, 0x56);
1253         public static final VexRVMOp VXORPS    = new VexRVMOp("VXORPS",    P_,   M_0F,   WIG, 0x57);
1254         public static final VexRVMOp VXORPD    = new VexRVMOp("VXORPD",    P_66, M_0F,   WIG, 0x57);
1255         public static final VexRVMOp VADDPS    = new VexRVMOp("VADDPS",    P_,   M_0F,   WIG, 0x58);


1303         public static final VexRVMOp VPCMPEQB  = new VexRVMOp("VPCMPEQB",  P_66, M_0F,   WIG, 0x74, VEXOpAssertion.AVX1_2);
1304         public static final VexRVMOp VPCMPEQW  = new VexRVMOp("VPCMPEQW",  P_66, M_0F,   WIG, 0x75, VEXOpAssertion.AVX1_2);
1305         public static final VexRVMOp VPCMPEQD  = new VexRVMOp("VPCMPEQD",  P_66, M_0F,   WIG, 0x76, VEXOpAssertion.AVX1_2);
1306         public static final VexRVMOp VPCMPEQQ  = new VexRVMOp("VPCMPEQQ",  P_66, M_0F38, WIG, 0x29, VEXOpAssertion.AVX1_2);
1307         public static final VexRVMOp VPCMPGTB  = new VexRVMOp("VPCMPGTB",  P_66, M_0F,   WIG, 0x64, VEXOpAssertion.AVX1_2);
1308         public static final VexRVMOp VPCMPGTW  = new VexRVMOp("VPCMPGTW",  P_66, M_0F,   WIG, 0x65, VEXOpAssertion.AVX1_2);
1309         public static final VexRVMOp VPCMPGTD  = new VexRVMOp("VPCMPGTD",  P_66, M_0F,   WIG, 0x66, VEXOpAssertion.AVX1_2);
1310         public static final VexRVMOp VPCMPGTQ  = new VexRVMOp("VPCMPGTQ",  P_66, M_0F38, WIG, 0x37, VEXOpAssertion.AVX1_2);
1311         // @formatter:on
1312 
1313         private VexRVMOp(String opcode, int pp, int mmmmm, int w, int op) {
1314             this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1315         }
1316 
1317         protected VexRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1318             super(opcode, pp, mmmmm, w, op, assertion);
1319         }
1320 
1321         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1322             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1323             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w);
1324             asm.emitByte(op);
1325             asm.emitModRM(dst, src2);
1326         }
1327 
1328         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) {
1329             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1330             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w);
1331             asm.emitByte(op);
1332             asm.emitOperandHelper(dst, src2, 0);
1333         }
1334     }
1335 
1336     public static final class VexGeneralPurposeRVMOp extends VexOp {
1337         // @formatter:off
1338         public static final VexGeneralPurposeRVMOp ANDN   = new VexGeneralPurposeRVMOp("ANDN",   P_,   M_0F38, WIG, 0xF2, VEXOpAssertion.BMI1);
1339         public static final VexGeneralPurposeRVMOp MULX   = new VexGeneralPurposeRVMOp("MULX",   P_F2, M_0F38, WIG, 0xF6, VEXOpAssertion.BMI2);
1340         public static final VexGeneralPurposeRVMOp PDEP   = new VexGeneralPurposeRVMOp("PDEP",   P_F2, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1341         public static final VexGeneralPurposeRVMOp PEXT   = new VexGeneralPurposeRVMOp("PEXT",   P_F3, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1342         // @formatter:on
1343 
1344         private VexGeneralPurposeRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1345             super(opcode, pp, mmmmm, w, op, assertion);
1346         }
1347 

1348         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1349             assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, src2, null);
1350             assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1351             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1);
1352             asm.emitByte(op);
1353             asm.emitModRM(dst, src2);
1354         }
1355 

1356         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) {
1357             assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, null, null);
1358             assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1359             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1);
1360             asm.emitByte(op);
1361             asm.emitOperandHelper(dst, src2, 0);
1362         }
1363     }
1364 
1365     public static final class VexGeneralPurposeRMVOp extends VexOp {
1366         // @formatter:off
1367         public static final VexGeneralPurposeRMVOp BEXTR  = new VexGeneralPurposeRMVOp("BEXTR",  P_,   M_0F38, WIG, 0xF7, VEXOpAssertion.BMI1);
1368         public static final VexGeneralPurposeRMVOp BZHI   = new VexGeneralPurposeRMVOp("BZHI",   P_,   M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1369         public static final VexGeneralPurposeRMVOp SARX   = new VexGeneralPurposeRMVOp("SARX",   P_F3, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1370         public static final VexGeneralPurposeRMVOp SHRX   = new VexGeneralPurposeRMVOp("SHRX",   P_F2, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1371         public static final VexGeneralPurposeRMVOp SHLX   = new VexGeneralPurposeRMVOp("SHLX",   P_66, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1372         // @formatter:on
1373 
1374         private VexGeneralPurposeRMVOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1375             super(opcode, pp, mmmmm, w, op, assertion);
1376         }
1377 
1378         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1379             assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, src1, null);
1380             assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1381             asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1);
1382             asm.emitByte(op);
1383             asm.emitModRM(dst, src1);
1384         }
1385 
1386         public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src1, Register src2) {
1387             assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, null, null);
1388             assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1389             asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1);
1390             asm.emitByte(op);
1391             asm.emitOperandHelper(dst, src1, 0);
1392         }
1393     }
1394 






























1395     /**
1396      * VEX-encoded shift instructions with an operand order of either RVM or VMI.
1397      */
1398     public static final class VexShiftOp extends VexRVMOp implements VexRRIOp {
1399         // @formatter:off
1400         public static final VexShiftOp VPSRLW = new VexShiftOp("VPSRLW", P_66, M_0F, WIG, 0xD1, 0x71, 2);
1401         public static final VexShiftOp VPSRLD = new VexShiftOp("VPSRLD", P_66, M_0F, WIG, 0xD2, 0x72, 2);
1402         public static final VexShiftOp VPSRLQ = new VexShiftOp("VPSRLQ", P_66, M_0F, WIG, 0xD3, 0x73, 2);
1403         public static final VexShiftOp VPSRAW = new VexShiftOp("VPSRAW", P_66, M_0F, WIG, 0xE1, 0x71, 4);
1404         public static final VexShiftOp VPSRAD = new VexShiftOp("VPSRAD", P_66, M_0F, WIG, 0xE2, 0x72, 4);
1405         public static final VexShiftOp VPSLLW = new VexShiftOp("VPSLLW", P_66, M_0F, WIG, 0xF1, 0x71, 6);
1406         public static final VexShiftOp VPSLLD = new VexShiftOp("VPSLLD", P_66, M_0F, WIG, 0xF2, 0x72, 6);
1407         public static final VexShiftOp VPSLLQ = new VexShiftOp("VPSLLQ", P_66, M_0F, WIG, 0xF3, 0x73, 6);
1408         // @formatter:on
1409 
1410         private final int immOp;
1411         private final int r;
1412 
1413         private VexShiftOp(String opcode, int pp, int mmmmm, int w, int op, int immOp, int r) {
1414             super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1_2);
1415             this.immOp = immOp;
1416             this.r = r;
1417         }
1418 
1419         @Override
1420         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1421             assert assertion.check((AMD64) asm.target.arch, size, null, dst, src);
1422             asm.vexPrefix(null, dst, src, size, pp, mmmmm, w);
1423             asm.emitByte(immOp);
1424             asm.emitModRM(r, src);
1425             asm.emitByte(imm8);
1426         }
1427     }
1428 
1429     public static final class VexMaskMoveOp extends VexOp {
1430         // @formatter:off
1431         public static final VexMaskMoveOp VMASKMOVPS = new VexMaskMoveOp("VMASKMOVPS", P_66, M_0F38, W0, 0x2C, 0x2E);
1432         public static final VexMaskMoveOp VMASKMOVPD = new VexMaskMoveOp("VMASKMOVPD", P_66, M_0F38, W0, 0x2D, 0x2F);
1433         public static final VexMaskMoveOp VPMASKMOVD = new VexMaskMoveOp("VPMASKMOVD", P_66, M_0F38, W0, 0x8C, 0x8E, VEXOpAssertion.AVX2);
1434         public static final VexMaskMoveOp VPMASKMOVQ = new VexMaskMoveOp("VPMASKMOVQ", P_66, M_0F38, W1, 0x8C, 0x8E, VEXOpAssertion.AVX2);
1435         // @formatter:on
1436 
1437         private final int opReverse;
1438 
1439         private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) {
1440             this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1);
1441         }
1442 
1443         private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) {
1444             super(opcode, pp, mmmmm, w, op, assertion);
1445             this.opReverse = opReverse;
1446         }
1447 
1448         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, AMD64Address src) {
1449             assert assertion.check((AMD64) asm.target.arch, size, dst, mask, null);
1450             asm.vexPrefix(dst, mask, src, size, pp, mmmmm, w);
1451             asm.emitByte(op);
1452             asm.emitOperandHelper(dst, src, 0);
1453         }
1454 
1455         public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register mask, Register src) {
1456             assert assertion.check((AMD64) asm.target.arch, size, src, mask, null);
1457             asm.vexPrefix(src, mask, dst, size, pp, mmmmm, w);
1458             asm.emitByte(opReverse);
1459             asm.emitOperandHelper(src, dst, 0);
1460         }
1461     }
1462 
1463     /**
1464      * VEX-encoded instructions with an operand order of RVMI.
1465      */
1466     public static final class VexRVMIOp extends VexOp {
1467         // @formatter:off
1468         public static final VexRVMIOp VSHUFPS     = new VexRVMIOp("VSHUFPS",     P_,   M_0F,   WIG, 0xC6);
1469         public static final VexRVMIOp VSHUFPD     = new VexRVMIOp("VSHUFPD",     P_66, M_0F,   WIG, 0xC6);
1470         public static final VexRVMIOp VINSERTF128 = new VexRVMIOp("VINSERTF128", P_66, M_0F3A, W0,  0x18, VEXOpAssertion.AVX1_256ONLY);
1471         public static final VexRVMIOp VINSERTI128 = new VexRVMIOp("VINSERTI128", P_66, M_0F3A, W0,  0x38, VEXOpAssertion.AVX2_256ONLY);
1472         // @formatter:on
1473 
1474         private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op) {
1475             this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1476         }
1477 
1478         private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1479             super(opcode, pp, mmmmm, w, op, assertion);
1480         }
1481 
1482         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, int imm8) {
1483             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1484             assert (imm8 & 0xFF) == imm8;
1485             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w);
1486             asm.emitByte(op);
1487             asm.emitModRM(dst, src2);
1488             asm.emitByte(imm8);
1489         }
1490 
1491         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, int imm8) {
1492             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1493             assert (imm8 & 0xFF) == imm8;
1494             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w);
1495             asm.emitByte(op);
1496             asm.emitOperandHelper(dst, src2, 1);
1497             asm.emitByte(imm8);
1498         }
1499     }
1500 
1501     /**
1502      * VEX-encoded comparison operation with an operand order of RVMI. The immediate operand is a
1503      * comparison operator.
1504      */
1505     public static final class VexFloatCompareOp extends VexOp {
1506         // @formatter:off
1507         public static final VexFloatCompareOp VCMPPS = new VexFloatCompareOp("VCMPPS", P_,   M_0F, WIG, 0xC2);
1508         public static final VexFloatCompareOp VCMPPD = new VexFloatCompareOp("VCMPPD", P_66, M_0F, WIG, 0xC2);
1509         public static final VexFloatCompareOp VCMPSS = new VexFloatCompareOp("VCMPSS", P_F2, M_0F, WIG, 0xC2);
1510         public static final VexFloatCompareOp VCMPSD = new VexFloatCompareOp("VCMPSD", P_F2, M_0F, WIG, 0xC2);
1511         // @formatter:on
1512 
1513         public enum Predicate {
1514             EQ_OQ(0x00),


1578                             return LT_OQ;
1579                         case LE:
1580                             return LE_OQ;
1581                         case GT:
1582                             return GT_OQ;
1583                         case GE:
1584                             return GE_OQ;
1585                         default:
1586                             throw GraalError.shouldNotReachHere();
1587                     }
1588                 }
1589             }
1590         }
1591 
1592         private VexFloatCompareOp(String opcode, int pp, int mmmmm, int w, int op) {
1593             super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1594         }
1595 
1596         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, Predicate p) {
1597             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1598             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w);
1599             asm.emitByte(op);
1600             asm.emitModRM(dst, src2);
1601             asm.emitByte(p.imm8);
1602         }
1603 
1604         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, Predicate p) {
1605             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1606             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w);
1607             asm.emitByte(op);
1608             asm.emitOperandHelper(dst, src2, 1);
1609             asm.emitByte(p.imm8);
1610         }
1611     }
1612 
1613     public final void addl(AMD64Address dst, int imm32) {
1614         ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1615     }
1616 
1617     public final void addl(Register dst, int imm32) {
1618         ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1619     }
1620 
1621     public final void addl(Register dst, Register src) {
1622         ADD.rmOp.emit(this, DWORD, dst, src);
1623     }
1624 
1625     public final void addpd(Register dst, Register src) {
1626         SSEOp.ADD.emit(this, PD, dst, src);


1926         prefix(src, dst);
1927         emitByte(0x8D);
1928         emitOperandHelper(dst, src, 0);
1929     }
1930 
1931     public final void leaq(Register dst, AMD64Address src) {
1932         prefixq(src, dst);
1933         emitByte(0x8D);
1934         emitOperandHelper(dst, src, 0);
1935     }
1936 
1937     public final void leave() {
1938         emitByte(0xC9);
1939     }
1940 
1941     public final void lock() {
1942         emitByte(0xF0);
1943     }
1944 
1945     public final void movapd(Register dst, Register src) {
1946         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
1947         simdPrefix(dst, Register.None, src, PD, P_0F, false);
1948         emitByte(0x28);
1949         emitModRM(dst, src);
1950     }
1951 
1952     public final void movaps(Register dst, Register src) {
1953         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
1954         simdPrefix(dst, Register.None, src, PS, P_0F, false);
1955         emitByte(0x28);
1956         emitModRM(dst, src);
1957     }
1958 
1959     public final void movb(AMD64Address dst, int imm8) {
1960         prefix(dst);
1961         emitByte(0xC6);
1962         emitOperandHelper(0, dst, 1);
1963         emitByte(imm8);
1964     }
1965 
1966     public final void movb(AMD64Address dst, Register src) {
1967         assert src.getRegisterCategory().equals(CPU) : "must have byte register";
1968         prefixb(dst, src);
1969         emitByte(0x88);
1970         emitOperandHelper(src, dst, 0);
1971     }
1972 
1973     public final void movl(Register dst, int imm32) {
1974         movl(dst, imm32, false);
1975     }
1976 
1977     public final void movl(Register dst, int imm32, boolean annotateImm) {
1978         int insnPos = position();
1979         prefix(dst);
1980         emitByte(0xB8 + encode(dst));
1981         int immPos = position();
1982         emitInt(imm32);
1983         int nextInsnPos = position();
1984         if (annotateImm && codePatchingAnnotationConsumer != null) {
1985             codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos));
1986         }
1987     }


2010     public final void movl(AMD64Address dst, int imm32) {
2011         prefix(dst);
2012         emitByte(0xC7);
2013         emitOperandHelper(0, dst, 4);
2014         emitInt(imm32);
2015     }
2016 
2017     public final void movl(AMD64Address dst, Register src) {
2018         prefix(dst, src);
2019         emitByte(0x89);
2020         emitOperandHelper(src, dst, 0);
2021     }
2022 
2023     /**
2024      * New CPUs require use of movsd and movss to avoid partial register stall when loading from
2025      * memory. But for old Opteron use movlpd instead of movsd. The selection is done in
2026      * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and
2027      * {@link AMD64MacroAssembler#movflt(Register, Register)}.
2028      */
2029     public final void movlpd(Register dst, AMD64Address src) {
2030         assert dst.getRegisterCategory().equals(XMM);
2031         simdPrefix(dst, dst, src, PD, P_0F, false);
2032         emitByte(0x12);
2033         emitOperandHelper(dst, src, 0);
2034     }
2035 
2036     public final void movlhps(Register dst, Register src) {
2037         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2038         simdPrefix(dst, src, src, PS, P_0F, false);
2039         emitByte(0x16);
2040         emitModRM(dst, src);
2041     }
2042 
2043     public final void movq(Register dst, AMD64Address src) {
2044         movq(dst, src, false);
2045     }
2046 
2047     public final void movq(Register dst, AMD64Address src, boolean wide) {
2048         if (dst.getRegisterCategory().equals(XMM)) {





2049             simdPrefix(dst, Register.None, src, SS, P_0F, false);
2050             emitByte(0x7E);
2051             emitOperandHelper(dst, src, wide, 0);
2052         } else {
2053             // gpr version of movq
2054             prefixq(src, dst);
2055             emitByte(0x8B);
2056             emitOperandHelper(dst, src, wide, 0);
2057         }
2058     }
2059 
2060     public final void movq(Register dst, Register src) {

2061         prefixq(dst, src);
2062         emitByte(0x8B);
2063         emitModRM(dst, src);
2064     }
2065 
2066     public final void movq(AMD64Address dst, Register src) {
2067         if (src.getRegisterCategory().equals(XMM)) {
2068             simdPrefix(src, Register.None, dst, PD, P_0F, true);





2069             emitByte(0xD6);
2070             emitOperandHelper(src, dst, 0);
2071         } else {
2072             // gpr version of movq
2073             prefixq(dst, src);
2074             emitByte(0x89);
2075             emitOperandHelper(src, dst, 0);
2076         }
2077     }
2078 
2079     public final void movsbl(Register dst, AMD64Address src) {
2080         prefix(src, dst);
2081         emitByte(0x0F);
2082         emitByte(0xBE);
2083         emitOperandHelper(dst, src, 0);
2084     }
2085 
2086     public final void movsbl(Register dst, Register src) {
2087         prefix(dst, false, src, true);
2088         emitByte(0x0F);


2409             case 2:
2410                 emitByte(0x66);
2411                 emitByte(0x90);
2412                 break;
2413             case 1:
2414                 emitByte(0x90);
2415                 break;
2416             default:
2417                 assert i == 0;
2418         }
2419     }
2420 
2421     public final void orl(Register dst, Register src) {
2422         OR.rmOp.emit(this, DWORD, dst, src);
2423     }
2424 
2425     public final void orl(Register dst, int imm32) {
2426         OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2427     }
2428 












2429     public final void pop(Register dst) {
2430         prefix(dst);
2431         emitByte(0x58 + encode(dst));
2432     }
2433 
2434     public void popfq() {
2435         emitByte(0x9D);
2436     }
2437 
2438     public final void ptest(Register dst, Register src) {
2439         assert supports(CPUFeature.SSE4_1);
2440         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2441         simdPrefix(dst, Register.None, src, PD, P_0F38, false);
2442         emitByte(0x17);
2443         emitModRM(dst, src);
2444     }
2445 
2446     public final void pcmpeqb(Register dst, Register src) {
2447         assert supports(CPUFeature.SSE2);
2448         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2449         simdPrefix(dst, dst, src, PD, P_0F, false);
2450         emitByte(0x74);
2451         emitModRM(dst, src);
2452     }
2453 
2454     public final void pcmpeqw(Register dst, Register src) {
2455         assert supports(CPUFeature.SSE2);
2456         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2457         simdPrefix(dst, dst, src, PD, P_0F, false);
2458         emitByte(0x75);
2459         emitModRM(dst, src);
2460     }
2461 








2462     public final void pcmpestri(Register dst, AMD64Address src, int imm8) {
2463         assert supports(CPUFeature.SSE4_2);
2464         assert dst.getRegisterCategory().equals(XMM);
2465         simdPrefix(dst, Register.None, src, PD, P_0F3A, false);
2466         emitByte(0x61);
2467         emitOperandHelper(dst, src, 0);
2468         emitByte(imm8);
2469     }
2470 
2471     public final void pcmpestri(Register dst, Register src, int imm8) {
2472         assert supports(CPUFeature.SSE4_2);
2473         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2474         simdPrefix(dst, Register.None, src, PD, P_0F3A, false);
2475         emitByte(0x61);
2476         emitModRM(dst, src);
2477         emitByte(imm8);
2478     }
2479 
2480     public final void pmovmskb(Register dst, Register src) {
2481         assert supports(CPUFeature.SSE2);
2482         assert dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(XMM);
2483         simdPrefix(dst, Register.None, src, PD, P_0F, false);
2484         emitByte(0xD7);
2485         emitModRM(dst, src);
2486     }
2487 


2488     public final void pmovzxbw(Register dst, AMD64Address src) {
2489         assert supports(CPUFeature.SSE4_2);
2490         assert dst.getRegisterCategory().equals(XMM);
2491         // XXX legacy_mode should be: _legacy_mode_bw
2492         simdPrefix(dst, Register.None, src, PD, P_0F38, false);
2493         emitByte(0x30);
2494         emitOperandHelper(dst, src, 0);
2495     }
2496 








2497     public final void push(Register src) {
2498         prefix(src);
2499         emitByte(0x50 + encode(src));
2500     }
2501 
2502     public void pushfq() {
2503         emitByte(0x9c);
2504     }
2505 
2506     public final void paddd(Register dst, Register src) {
2507         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2508         simdPrefix(dst, dst, src, PD, P_0F, false);
2509         emitByte(0xFE);
2510         emitModRM(dst, src);
2511     }
2512 
2513     public final void paddq(Register dst, Register src) {
2514         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2515         simdPrefix(dst, dst, src, PD, P_0F, false);
2516         emitByte(0xD4);
2517         emitModRM(dst, src);
2518     }
2519 
2520     public final void pextrw(Register dst, Register src, int imm8) {
2521         assert dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(XMM);
2522         simdPrefix(dst, Register.None, src, PD, P_0F, false);
2523         emitByte(0xC5);
2524         emitModRM(dst, src);
2525         emitByte(imm8);
2526     }
2527 
2528     public final void pinsrw(Register dst, Register src, int imm8) {
2529         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(CPU);
2530         simdPrefix(dst, dst, src, PD, P_0F, false);
2531         emitByte(0xC4);
2532         emitModRM(dst, src);
2533         emitByte(imm8);
2534     }
2535 
2536     public final void por(Register dst, Register src) {
2537         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2538         simdPrefix(dst, dst, src, PD, P_0F, false);
2539         emitByte(0xEB);
2540         emitModRM(dst, src);
2541     }
2542 
2543     public final void pand(Register dst, Register src) {
2544         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2545         simdPrefix(dst, dst, src, PD, P_0F, false);
2546         emitByte(0xDB);
2547         emitModRM(dst, src);
2548     }
2549 
2550     public final void pxor(Register dst, Register src) {
2551         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2552         simdPrefix(dst, dst, src, PD, P_0F, false);
2553         emitByte(0xEF);
2554         emitModRM(dst, src);
2555     }
2556 
2557     public final void pslld(Register dst, int imm8) {
2558         assert isUByte(imm8) : "invalid value";
2559         assert dst.getRegisterCategory().equals(XMM);
2560         // XMM6 is for /6 encoding: 66 0F 72 /6 ib
2561         simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false);
2562         emitByte(0x72);
2563         emitModRM(6, dst);
2564         emitByte(imm8 & 0xFF);
2565     }
2566 
2567     public final void psllq(Register dst, Register shift) {
2568         assert dst.getRegisterCategory().equals(XMM) && shift.getRegisterCategory().equals(XMM);
2569         simdPrefix(dst, dst, shift, PD, P_0F, false);
2570         emitByte(0xF3);
2571         emitModRM(dst, shift);
2572     }
2573 
2574     public final void psllq(Register dst, int imm8) {
2575         assert isUByte(imm8) : "invalid value";
2576         assert dst.getRegisterCategory().equals(XMM);
2577         // XMM6 is for /6 encoding: 66 0F 73 /6 ib
2578         simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false);
2579         emitByte(0x73);
2580         emitModRM(6, dst);
2581         emitByte(imm8);
2582     }
2583 
2584     public final void psrad(Register dst, int imm8) {
2585         assert isUByte(imm8) : "invalid value";
2586         assert dst.getRegisterCategory().equals(XMM);
2587         // XMM4 is for /4 encoding: 66 0F 72 /4 ib
2588         simdPrefix(AMD64.xmm4, dst, dst, PD, P_0F, false);
2589         emitByte(0x72);
2590         emitModRM(4, dst);
2591         emitByte(imm8);
2592     }
2593 
2594     public final void psrld(Register dst, int imm8) {
2595         assert isUByte(imm8) : "invalid value";
2596         assert dst.getRegisterCategory().equals(XMM);
2597         // XMM2 is for /2 encoding: 66 0F 72 /2 ib
2598         simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false);
2599         emitByte(0x72);
2600         emitModRM(2, dst);
2601         emitByte(imm8);
2602     }
2603 
2604     public final void psrlq(Register dst, int imm8) {
2605         assert isUByte(imm8) : "invalid value";
2606         assert dst.getRegisterCategory().equals(XMM);
2607         // XMM2 is for /2 encoding: 66 0F 73 /2 ib
2608         simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false);
2609         emitByte(0x73);
2610         emitModRM(2, dst);
2611         emitByte(imm8);
2612     }
2613 
2614     public final void psrldq(Register dst, int imm8) {
2615         assert isUByte(imm8) : "invalid value";
2616         assert dst.getRegisterCategory().equals(XMM);
2617         simdPrefix(AMD64.xmm3, dst, dst, PD, P_0F, false);
2618         emitByte(0x73);
2619         emitModRM(3, dst);
2620         emitByte(imm8);
2621     }
2622 
2623     public final void pshufb(Register dst, Register src) {
2624         assert supports(CPUFeature.SSSE3);
2625         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2626         simdPrefix(dst, dst, src, PD, P_0F38, false);
2627         emitByte(0x00);
2628         emitModRM(dst, src);
2629     }
2630 
2631     public final void pshuflw(Register dst, Register src, int imm8) {
2632         assert supports(CPUFeature.SSE2);
2633         assert isUByte(imm8) : "invalid value";
2634         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2635         simdPrefix(dst, Register.None, src, SD, P_0F, false);
2636         emitByte(0x70);
2637         emitModRM(dst, src);
2638         emitByte(imm8);
2639     }
2640 
2641     public final void pshufd(Register dst, Register src, int imm8) {
2642         assert isUByte(imm8) : "invalid value";
2643         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2644         simdPrefix(dst, Register.None, src, PD, P_0F, false);
2645         emitByte(0x70);
2646         emitModRM(dst, src);
2647         emitByte(imm8);
2648     }
2649 
2650     public final void psubd(Register dst, Register src) {
2651         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2652         simdPrefix(dst, dst, src, PD, P_0F, false);
2653         emitByte(0xFA);
2654         emitModRM(dst, src);
2655     }
2656 








2657     public final void rcpps(Register dst, Register src) {
2658         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2659         simdPrefix(dst, Register.None, src, PS, P_0F, false);
2660         emitByte(0x53);
2661         emitModRM(dst, src);
2662     }
2663 
2664     public final void ret(int imm16) {
2665         if (imm16 == 0) {
2666             emitByte(0xC3);
2667         } else {
2668             emitByte(0xC2);
2669             emitShort(imm16);
2670         }
2671     }
2672 
2673     public final void sarl(Register dst, int imm8) {
2674         prefix(dst);
2675         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2676         if (imm8 == 1) {
2677             emitByte(0xD1);
2678             emitModRM(7, dst);


2686     public final void shll(Register dst, int imm8) {
2687         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2688         prefix(dst);
2689         if (imm8 == 1) {
2690             emitByte(0xD1);
2691             emitModRM(4, dst);
2692         } else {
2693             emitByte(0xC1);
2694             emitModRM(4, dst);
2695             emitByte(imm8);
2696         }
2697     }
2698 
2699     public final void shll(Register dst) {
2700         // Multiply dst by 2, CL times.
2701         prefix(dst);
2702         emitByte(0xD3);
2703         emitModRM(4, dst);
2704     }
2705 






2706     public final void shrl(Register dst, int imm8) {
2707         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2708         prefix(dst);
2709         emitByte(0xC1);
2710         emitModRM(5, dst);
2711         emitByte(imm8);
2712     }
2713 
2714     public final void shrl(Register dst) {
2715         // Unsigned divide dst by 2, CL times.
2716         prefix(dst);
2717         emitByte(0xD3);
2718         emitModRM(5, dst);
2719     }
2720 
2721     public final void subl(AMD64Address dst, int imm32) {
2722         SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2723     }
2724 
2725     public final void subl(Register dst, int imm32) {


2752             prefix(dst);
2753             emitByte(0xF7);
2754             emitModRM(0, dst);
2755         }
2756         emitInt(imm32);
2757     }
2758 
2759     public final void testl(Register dst, Register src) {
2760         prefix(dst, src);
2761         emitByte(0x85);
2762         emitModRM(dst, src);
2763     }
2764 
2765     public final void testl(Register dst, AMD64Address src) {
2766         prefix(src, dst);
2767         emitByte(0x85);
2768         emitOperandHelper(dst, src, 0);
2769     }
2770 
2771     public final void unpckhpd(Register dst, Register src) {
2772         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2773         simdPrefix(dst, dst, src, PD, P_0F, false);
2774         emitByte(0x15);
2775         emitModRM(dst, src);
2776     }
2777 
2778     public final void unpcklpd(Register dst, Register src) {
2779         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2780         simdPrefix(dst, dst, src, PD, P_0F, false);
2781         emitByte(0x14);
2782         emitModRM(dst, src);
2783     }
2784 
2785     public final void xorl(Register dst, Register src) {
2786         XOR.rmOp.emit(this, DWORD, dst, src);
2787     }
2788 
2789     public final void xorpd(Register dst, Register src) {
2790         SSEOp.XOR.emit(this, PD, dst, src);
2791     }
2792 
2793     public final void xorps(Register dst, Register src) {
2794         SSEOp.XOR.emit(this, PS, dst, src);
2795     }
2796 
2797     protected final void decl(Register dst) {
2798         // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
2799         prefix(dst);


2870     public final void cmpq(Register dst, int imm32) {
2871         CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
2872     }
2873 
2874     public final void cmpq(Register dst, Register src) {
2875         CMP.rmOp.emit(this, QWORD, dst, src);
2876     }
2877 
2878     public final void cmpq(Register dst, AMD64Address src) {
2879         CMP.rmOp.emit(this, QWORD, dst, src);
2880     }
2881 
2882     public final void cmpxchgq(Register reg, AMD64Address adr) {
2883         prefixq(adr, reg);
2884         emitByte(0x0F);
2885         emitByte(0xB1);
2886         emitOperandHelper(reg, adr, 0);
2887     }
2888 
2889     public final void cvtdq2pd(Register dst, Register src) {
2890         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2891         simdPrefix(dst, Register.None, src, SS, P_0F, false);
2892         emitByte(0xE6);
2893         emitModRM(dst, src);
2894     }
2895 
2896     public final void cvtsi2sdq(Register dst, Register src) {
2897         SSEOp.CVTSI2SD.emit(this, QWORD, dst, src);
2898     }
2899 
2900     public final void cvttsd2siq(Register dst, Register src) {
2901         SSEOp.CVTTSD2SI.emit(this, QWORD, dst, src);
2902     }
2903 
2904     public final void cvttpd2dq(Register dst, Register src) {
2905         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2906         simdPrefix(dst, Register.None, src, PD, P_0F, false);
2907         emitByte(0xE6);
2908         emitModRM(dst, src);
2909     }
2910 
2911     public final void decq(Register dst) {
2912         // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2913         prefixq(dst);
2914         emitByte(0xFF);
2915         emitModRM(1, dst);
2916     }
2917 
2918     public final void decq(AMD64Address dst) {
2919         DEC.emit(this, QWORD, dst);
2920     }
2921 
2922     public final void imulq(Register dst, Register src) {
2923         prefixq(dst, src);
2924         emitByte(0x0F);
2925         emitByte(0xAF);
2926         emitModRM(dst, src);
2927     }
2928 
2929     public final void incq(Register dst) {
2930         // Don't use it directly. Use Macroincrementq() instead.
2931         // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2932         prefixq(dst);


2953             codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos));
2954         }
2955     }
2956 
2957     public final void movslq(Register dst, int imm32) {
2958         prefixq(dst);
2959         emitByte(0xC7);
2960         emitModRM(0, dst);
2961         emitInt(imm32);
2962     }
2963 
2964     public final void movdq(Register dst, AMD64Address src) {
2965         AMD64RMOp.MOVQ.emit(this, QWORD, dst, src);
2966     }
2967 
2968     public final void movdq(AMD64Address dst, Register src) {
2969         AMD64MROp.MOVQ.emit(this, QWORD, dst, src);
2970     }
2971 
2972     public final void movdq(Register dst, Register src) {
2973         if (dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(CPU)) {
2974             AMD64RMOp.MOVQ.emit(this, QWORD, dst, src);
2975         } else if (src.getRegisterCategory().equals(XMM) && dst.getRegisterCategory().equals(CPU)) {
2976             AMD64MROp.MOVQ.emit(this, QWORD, dst, src);
2977         } else {
2978             throw new InternalError("should not reach here");
2979         }
2980     }
2981 
2982     public final void movdl(Register dst, Register src) {
2983         if (dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(CPU)) {
2984             AMD64RMOp.MOVD.emit(this, DWORD, dst, src);
2985         } else if (src.getRegisterCategory().equals(XMM) && dst.getRegisterCategory().equals(CPU)) {
2986             AMD64MROp.MOVD.emit(this, DWORD, dst, src);
2987         } else {
2988             throw new InternalError("should not reach here");
2989         }
2990     }
2991 
2992     public final void movdl(Register dst, AMD64Address src) {
2993         AMD64RMOp.MOVD.emit(this, DWORD, dst, src);
2994     }
2995 
2996     public final void movddup(Register dst, Register src) {
2997         assert supports(CPUFeature.SSE3);
2998         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2999         simdPrefix(dst, Register.None, src, SD, P_0F, false);
3000         emitByte(0x12);
3001         emitModRM(dst, src);
3002     }
3003 
3004     public final void movdqu(Register dst, AMD64Address src) {
3005         assert dst.getRegisterCategory().equals(XMM);
3006         simdPrefix(dst, Register.None, src, SS, P_0F, false);
3007         emitByte(0x6F);
3008         emitOperandHelper(dst, src, 0);
3009     }
3010 
3011     public final void movdqu(Register dst, Register src) {
3012         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
3013         simdPrefix(dst, Register.None, src, SS, P_0F, false);
3014         emitByte(0x6F);
3015         emitModRM(dst, src);
3016     }
3017 










3018     public final void movslq(AMD64Address dst, int imm32) {
3019         prefixq(dst);
3020         emitByte(0xC7);
3021         emitOperandHelper(0, dst, 4);
3022         emitInt(imm32);
3023     }
3024 
3025     public final void movslq(Register dst, AMD64Address src) {
3026         prefixq(src, dst);
3027         emitByte(0x63);
3028         emitOperandHelper(dst, src, 0);
3029     }
3030 
3031     public final void movslq(Register dst, Register src) {
3032         prefixq(dst, src);
3033         emitByte(0x63);
3034         emitModRM(dst, src);
3035     }
3036 
3037     public final void negq(Register dst) {


3178             // We only have to handle StoreLoad
3179             if ((barriers & STORE_LOAD) != 0) {
3180                 // All usable chips support "locked" instructions which suffice
3181                 // as barriers, and are much faster than the alternative of
3182                 // using cpuid instruction. We use here a locked add [rsp],0.
3183                 // This is conveniently otherwise a no-op except for blowing
3184                 // flags.
3185                 // Any change to this code may need to revisit other places in
3186                 // the code where this idiom is used, in particular the
3187                 // orderAccess code.
3188                 lock();
3189                 addl(new AMD64Address(AMD64.rsp, 0), 0); // Assert the lock# signal here
3190             }
3191         }
3192     }
3193 
3194     @Override
3195     protected final void patchJumpTarget(int branch, int branchTarget) {
3196         int op = getByte(branch);
3197         assert op == 0xE8 // call
3198                         ||
3199                         op == 0x00 // jump table entry
3200                         || op == 0xE9 // jmp
3201                         || op == 0xEB // short jmp
3202                         || (op & 0xF0) == 0x70 // short jcc
3203                         || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc
3204         : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
3205 
3206         if (op == 0x00) {
3207             int offsetToJumpTableBase = getShort(branch + 1);
3208             int jumpTableBase = branch - offsetToJumpTableBase;
3209             int imm32 = branchTarget - jumpTableBase;
3210             emitInt(imm32, branch);
3211         } else if (op == 0xEB || (op & 0xF0) == 0x70) {
3212 
3213             // short offset operators (jmp and jcc)
3214             final int imm8 = branchTarget - (branch + 2);
3215             /*
3216              * Since a wrongly patched short branch can potentially lead to working but really bad
3217              * behaving code we should always fail with an exception instead of having an assert.
3218              */
3219             if (!NumUtil.isByte(imm8)) {


3440         emitByte(0xae);
3441         emitByte(0xe8);
3442     }
3443 
3444     public final void vptest(Register dst, Register src) {
3445         VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src);
3446     }
3447 
3448     public final void vpxor(Register dst, Register nds, Register src) {
3449         VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
3450     }
3451 
3452     public final void vpxor(Register dst, Register nds, AMD64Address src) {
3453         VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
3454     }
3455 
3456     public final void vmovdqu(Register dst, AMD64Address src) {
3457         VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src);
3458     }
3459 





3460     public final void vpmovzxbw(Register dst, AMD64Address src) {

3461         VexRMOp.VPMOVZXBW.emit(this, AVXSize.YMM, dst, src);
3462     }
3463 
3464     public final void vzeroupper() {
3465         emitVEX(L128, P_, M_0F, W0, 0, 0);
3466         emitByte(0x77);
3467     }
3468 














3469     // This instruction produces ZF or CF flags
3470     public final void kortestq(Register src1, Register src2) {
3471         assert supports(CPUFeature.AVX512BW);
3472         assert src1.getRegisterCategory().equals(MASK) && src2.getRegisterCategory().equals(MASK);
3473         vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_, M_0F, W1);

3474         emitByte(0x98);
3475         emitModRM(src1, src2);
3476     }
3477 




































3478     public final void kmovq(Register dst, Register src) {
3479         assert supports(CPUFeature.AVX512BW);
3480         assert dst.getRegisterCategory().equals(MASK) || dst.getRegisterCategory().equals(CPU);
3481         assert src.getRegisterCategory().equals(MASK) || src.getRegisterCategory().equals(CPU);
3482         assert !(dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(CPU));
3483 
3484         if (dst.getRegisterCategory().equals(MASK)) {
3485             if (src.getRegisterCategory().equals(MASK)) {
3486                 // kmovq(KRegister dst, KRegister src)
3487                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_, M_0F, W1);


3488                 emitByte(0x90);
3489                 emitModRM(dst, src);
3490             } else {
3491                 // kmovq(KRegister dst, Register src)
3492                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1);


3493                 emitByte(0x92);
3494                 emitModRM(dst, src);
3495             }
3496         } else {
3497             if (src.getRegisterCategory().equals(MASK)) {
3498                 // kmovq(Register dst, KRegister src)
3499                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1);


3500                 emitByte(0x93);
3501                 emitModRM(dst, src);
3502             } else {
3503                 throw GraalError.shouldNotReachHere();
3504             }
3505         }
3506     }
3507 











3508     public final void evmovdqu64(Register dst, AMD64Address src) {
3509         assert supports(CPUFeature.AVX512F);
3510         assert dst.getRegisterCategory().equals(XMM);
3511         evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F3, M_0F, W1, Z0, B0);
3512         emitByte(0x6F);
3513         emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3514     }
3515 


3516     public final void evpmovzxbw(Register dst, AMD64Address src) {
3517         assert supports(CPUFeature.AVX512BW);
3518         assert dst.getRegisterCategory().equals(XMM);

3519         evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0);
3520         emitByte(0x30);
3521         emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
3522     }
3523 
3524     public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src) {
3525         assert supports(CPUFeature.AVX512BW);
3526         assert kdst.getRegisterCategory().equals(MASK) && nds.getRegisterCategory().equals(XMM);
3527         evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F, WIG, Z0, B0);
3528         emitByte(0x74);
3529         emitEVEXOperandHelper(kdst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3530     }
































































































































3531 }


  26 
  27 import static jdk.vm.ci.amd64.AMD64.CPU;
  28 import static jdk.vm.ci.amd64.AMD64.MASK;
  29 import static jdk.vm.ci.amd64.AMD64.XMM;
  30 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD;
  31 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop;
  32 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop;
  33 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD;
  34 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND;
  35 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP;
  36 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR;
  37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB;
  38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB;
  39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR;
  40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC;
  41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC;
  42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG;
  43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT;
  44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0;
  45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0;
  46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1;
  47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.BYTE;
  48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.DWORD;
  49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PD;
  50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PS;
  51 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.QWORD;
  52 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SD;
  53 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SS;
  54 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.WORD;
  55 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128;
  56 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256;
  57 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ;
  58 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F;
  59 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38;
  60 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A;
  61 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_;
  62 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66;
  63 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2;
  64 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3;
  65 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0;
  66 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1;


1007 
1008     /**
1009      * VEX-encoded instructions with an operand order of RM, but the M operand must be a register.
1010      */
1011     public static class VexRROp extends VexOp {
1012         // @formatter:off
1013         public static final VexRROp VMASKMOVDQU = new VexRROp("VMASKMOVDQU", P_66, M_0F, WIG, 0xF7, VEXOpAssertion.AVX1_128ONLY);
1014         // @formatter:on
1015 
1016         protected VexRROp(String opcode, int pp, int mmmmm, int w, int op) {
1017             this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1018         }
1019 
1020         protected VexRROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1021             super(opcode, pp, mmmmm, w, op, assertion);
1022         }
1023 
1024         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1025             assert assertion.check((AMD64) asm.target.arch, size, dst, null, src);
1026             assert op != 0x1A || op != 0x5A;
1027             asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false);
1028             asm.emitByte(op);
1029             asm.emitModRM(dst, src);
1030         }
1031     }
1032 
1033     /**
1034      * VEX-encoded instructions with an operand order of RM.
1035      */
1036     public static class VexRMOp extends VexRROp {
1037         // @formatter:off
1038         public static final VexRMOp VCVTTSS2SI      = new VexRMOp("VCVTTSS2SI",      P_F3, M_0F,   W0,  0x2C, VEXOpAssertion.CPU_XMM);
1039         public static final VexRMOp VCVTTSS2SQ      = new VexRMOp("VCVTTSS2SQ",      P_F3, M_0F,   W1,  0x2C, VEXOpAssertion.CPU_XMM);
1040         public static final VexRMOp VCVTTSD2SI      = new VexRMOp("VCVTTSD2SI",      P_F2, M_0F,   W0,  0x2C, VEXOpAssertion.CPU_XMM);
1041         public static final VexRMOp VCVTTSD2SQ      = new VexRMOp("VCVTTSD2SQ",      P_F2, M_0F,   W1,  0x2C, VEXOpAssertion.CPU_XMM);
1042         public static final VexRMOp VCVTPS2PD       = new VexRMOp("VCVTPS2PD",       P_,   M_0F,   WIG, 0x5A);
1043         public static final VexRMOp VCVTPD2PS       = new VexRMOp("VCVTPD2PS",       P_66, M_0F,   WIG, 0x5A);
1044         public static final VexRMOp VCVTDQ2PS       = new VexRMOp("VCVTDQ2PS",       P_,   M_0F,   WIG, 0x5B);
1045         public static final VexRMOp VCVTTPS2DQ      = new VexRMOp("VCVTTPS2DQ",      P_F3, M_0F,   WIG, 0x5B);
1046         public static final VexRMOp VCVTTPD2DQ      = new VexRMOp("VCVTTPD2DQ",      P_66, M_0F,   WIG, 0xE6);
1047         public static final VexRMOp VCVTDQ2PD       = new VexRMOp("VCVTDQ2PD",       P_F3, M_0F,   WIG, 0xE6);


1068         public static final VexRMOp VPMOVZXDQ       = new VexRMOp("VPMOVZXDQ",       P_66, M_0F38, WIG, 0x35);
1069         public static final VexRMOp VPTEST          = new VexRMOp("VPTEST",          P_66, M_0F38, WIG, 0x17);
1070         public static final VexRMOp VSQRTPD         = new VexRMOp("VSQRTPD",         P_66, M_0F,   WIG, 0x51);
1071         public static final VexRMOp VSQRTPS         = new VexRMOp("VSQRTPS",         P_,   M_0F,   WIG, 0x51);
1072         public static final VexRMOp VSQRTSD         = new VexRMOp("VSQRTSD",         P_F2, M_0F,   WIG, 0x51);
1073         public static final VexRMOp VSQRTSS         = new VexRMOp("VSQRTSS",         P_F3, M_0F,   WIG, 0x51);
1074         public static final VexRMOp VUCOMISS        = new VexRMOp("VUCOMISS",        P_,   M_0F,   WIG, 0x2E);
1075         public static final VexRMOp VUCOMISD        = new VexRMOp("VUCOMISD",        P_66, M_0F,   WIG, 0x2E);
1076         // @formatter:on
1077 
1078         protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op) {
1079             this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1080         }
1081 
1082         protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1083             super(opcode, pp, mmmmm, w, op, assertion);
1084         }
1085 
1086         public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) {
1087             assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1088             asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false);
1089             asm.emitByte(op);
1090             asm.emitOperandHelper(dst, src, 0);
1091         }
1092     }
1093 
1094     /**
1095      * VEX-encoded move instructions.
1096      * <p>
1097      * These instructions have two opcodes: op is the forward move instruction with an operand order
1098      * of RM, and opReverse is the reverse move instruction with an operand order of MR.
1099      */
1100     public static final class VexMoveOp extends VexRMOp {
1101         // @formatter:off
1102         public static final VexMoveOp VMOVDQA = new VexMoveOp("VMOVDQA", P_66, M_0F, WIG, 0x6F, 0x7F);
1103         public static final VexMoveOp VMOVDQU = new VexMoveOp("VMOVDQU", P_F3, M_0F, WIG, 0x6F, 0x7F);
1104         public static final VexMoveOp VMOVAPS = new VexMoveOp("VMOVAPS", P_,   M_0F, WIG, 0x28, 0x29);
1105         public static final VexMoveOp VMOVAPD = new VexMoveOp("VMOVAPD", P_66, M_0F, WIG, 0x28, 0x29);
1106         public static final VexMoveOp VMOVUPS = new VexMoveOp("VMOVUPS", P_,   M_0F, WIG, 0x10, 0x11);
1107         public static final VexMoveOp VMOVUPD = new VexMoveOp("VMOVUPD", P_66, M_0F, WIG, 0x10, 0x11);
1108         public static final VexMoveOp VMOVSS  = new VexMoveOp("VMOVSS",  P_F3, M_0F, WIG, 0x10, 0x11);
1109         public static final VexMoveOp VMOVSD  = new VexMoveOp("VMOVSD",  P_F2, M_0F, WIG, 0x10, 0x11);
1110         public static final VexMoveOp VMOVD   = new VexMoveOp("VMOVD",   P_66, M_0F, W0,  0x6E, 0x7E, VEXOpAssertion.XMM_CPU);
1111         public static final VexMoveOp VMOVQ   = new VexMoveOp("VMOVQ",   P_66, M_0F, W1,  0x6E, 0x7E, VEXOpAssertion.XMM_CPU);
1112         // @formatter:on
1113 
1114         private final int opReverse;
1115 
1116         private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) {
1117             this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1);
1118         }
1119 
1120         private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) {
1121             super(opcode, pp, mmmmm, w, op, assertion);
1122             this.opReverse = opReverse;
1123         }
1124 
1125         public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src) {
1126             assert assertion.check((AMD64) asm.target.arch, size, src, null, null);
1127             asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false);
1128             asm.emitByte(opReverse);
1129             asm.emitOperandHelper(src, dst, 0);
1130         }
1131 
1132         public void emitReverse(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1133             assert assertion.check((AMD64) asm.target.arch, size, src, null, dst);
1134             asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false);
1135             asm.emitByte(opReverse);
1136             asm.emitModRM(src, dst);
1137         }
1138     }
1139 
1140     public interface VexRRIOp {
1141         void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8);
1142     }
1143 
1144     /**
1145      * VEX-encoded instructions with an operand order of RMI.
1146      */
1147     public static final class VexRMIOp extends VexOp implements VexRRIOp {
1148         // @formatter:off
1149         public static final VexRMIOp VPERMQ   = new VexRMIOp("VPERMQ",   P_66, M_0F3A, W1,  0x00, VEXOpAssertion.AVX2_256ONLY);
1150         public static final VexRMIOp VPSHUFLW = new VexRMIOp("VPSHUFLW", P_F2, M_0F,   WIG, 0x70, VEXOpAssertion.AVX1_2);
1151         public static final VexRMIOp VPSHUFHW = new VexRMIOp("VPSHUFHW", P_F3, M_0F,   WIG, 0x70, VEXOpAssertion.AVX1_2);
1152         public static final VexRMIOp VPSHUFD  = new VexRMIOp("VPSHUFD",  P_66, M_0F,   WIG, 0x70, VEXOpAssertion.AVX1_2);
1153         // @formatter:on
1154 
1155         private VexRMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1156             super(opcode, pp, mmmmm, w, op, assertion);
1157         }
1158 
1159         @Override
1160         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1161             assert assertion.check((AMD64) asm.target.arch, size, dst, null, src);
1162             asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false);
1163             asm.emitByte(op);
1164             asm.emitModRM(dst, src);
1165             asm.emitByte(imm8);
1166         }
1167 
1168         public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src, int imm8) {
1169             assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1170             asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false);
1171             asm.emitByte(op);
1172             asm.emitOperandHelper(dst, src, 1);
1173             asm.emitByte(imm8);
1174         }
1175     }
1176 
1177     /**
1178      * VEX-encoded instructions with an operand order of MRI.
1179      */
1180     public static final class VexMRIOp extends VexOp implements VexRRIOp {
1181         // @formatter:off
1182         public static final VexMRIOp VEXTRACTF128 = new VexMRIOp("VEXTRACTF128", P_66, M_0F3A, W0, 0x19, VEXOpAssertion.AVX1_256ONLY);
1183         public static final VexMRIOp VEXTRACTI128 = new VexMRIOp("VEXTRACTI128", P_66, M_0F3A, W0, 0x39, VEXOpAssertion.AVX2_256ONLY);
1184         public static final VexMRIOp VPEXTRB      = new VexMRIOp("VPEXTRB",      P_66, M_0F3A, W0, 0x14, VEXOpAssertion.XMM_CPU);
1185         public static final VexMRIOp VPEXTRW      = new VexMRIOp("VPEXTRW",      P_66, M_0F3A, W0, 0x15, VEXOpAssertion.XMM_CPU);
1186         public static final VexMRIOp VPEXTRD      = new VexMRIOp("VPEXTRD",      P_66, M_0F3A, W0, 0x16, VEXOpAssertion.XMM_CPU);
1187         public static final VexMRIOp VPEXTRQ      = new VexMRIOp("VPEXTRQ",      P_66, M_0F3A, W1, 0x16, VEXOpAssertion.XMM_CPU);
1188         // @formatter:on
1189 
1190         private VexMRIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1191             super(opcode, pp, mmmmm, w, op, assertion);
1192         }
1193 
1194         @Override
1195         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1196             assert assertion.check((AMD64) asm.target.arch, size, src, null, dst);
1197             asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false);
1198             asm.emitByte(op);
1199             asm.emitModRM(src, dst);
1200             asm.emitByte(imm8);
1201         }
1202 
1203         public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src, int imm8) {
1204             assert assertion.check((AMD64) asm.target.arch, size, src, null, null);
1205             asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false);
1206             asm.emitByte(op);
1207             asm.emitOperandHelper(src, dst, 1);
1208             asm.emitByte(imm8);
1209         }
1210     }
1211 
1212     /**
1213      * VEX-encoded instructions with an operand order of RVMR.
1214      */
1215     public static class VexRVMROp extends VexOp {
1216         // @formatter:off
1217         public static final VexRVMROp VPBLENDVB  = new VexRVMROp("VPBLENDVB",  P_66, M_0F3A, W0, 0x4C, VEXOpAssertion.AVX1_2);
1218         public static final VexRVMROp VPBLENDVPS = new VexRVMROp("VPBLENDVPS", P_66, M_0F3A, W0, 0x4A, VEXOpAssertion.AVX1);
1219         public static final VexRVMROp VPBLENDVPD = new VexRVMROp("VPBLENDVPD", P_66, M_0F3A, W0, 0x4B, VEXOpAssertion.AVX1);
1220         // @formatter:on
1221 
1222         protected VexRVMROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1223             super(opcode, pp, mmmmm, w, op, assertion);
1224         }
1225 
1226         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, Register src2) {
1227             assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, src2);
1228             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1229             asm.emitByte(op);
1230             asm.emitModRM(dst, src2);
1231             asm.emitByte(mask.encoding() << 4);
1232         }
1233 
1234         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, AMD64Address src2) {
1235             assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, null);
1236             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1237             asm.emitByte(op);
1238             asm.emitOperandHelper(dst, src2, 0);
1239             asm.emitByte(mask.encoding() << 4);
1240         }
1241     }
1242 
1243     /**
1244      * VEX-encoded instructions with an operand order of RVM.
1245      */
1246     public static class VexRVMOp extends VexOp {
1247         // @formatter:off
1248         public static final VexRVMOp VANDPS    = new VexRVMOp("VANDPS",    P_,   M_0F,   WIG, 0x54);
1249         public static final VexRVMOp VANDPD    = new VexRVMOp("VANDPD",    P_66, M_0F,   WIG, 0x54);
1250         public static final VexRVMOp VANDNPS   = new VexRVMOp("VANDNPS",   P_,   M_0F,   WIG, 0x55);
1251         public static final VexRVMOp VANDNPD   = new VexRVMOp("VANDNPD",   P_66, M_0F,   WIG, 0x55);
1252         public static final VexRVMOp VORPS     = new VexRVMOp("VORPS",     P_,   M_0F,   WIG, 0x56);
1253         public static final VexRVMOp VORPD     = new VexRVMOp("VORPD",     P_66, M_0F,   WIG, 0x56);
1254         public static final VexRVMOp VXORPS    = new VexRVMOp("VXORPS",    P_,   M_0F,   WIG, 0x57);
1255         public static final VexRVMOp VXORPD    = new VexRVMOp("VXORPD",    P_66, M_0F,   WIG, 0x57);
1256         public static final VexRVMOp VADDPS    = new VexRVMOp("VADDPS",    P_,   M_0F,   WIG, 0x58);


1304         public static final VexRVMOp VPCMPEQB  = new VexRVMOp("VPCMPEQB",  P_66, M_0F,   WIG, 0x74, VEXOpAssertion.AVX1_2);
1305         public static final VexRVMOp VPCMPEQW  = new VexRVMOp("VPCMPEQW",  P_66, M_0F,   WIG, 0x75, VEXOpAssertion.AVX1_2);
1306         public static final VexRVMOp VPCMPEQD  = new VexRVMOp("VPCMPEQD",  P_66, M_0F,   WIG, 0x76, VEXOpAssertion.AVX1_2);
1307         public static final VexRVMOp VPCMPEQQ  = new VexRVMOp("VPCMPEQQ",  P_66, M_0F38, WIG, 0x29, VEXOpAssertion.AVX1_2);
1308         public static final VexRVMOp VPCMPGTB  = new VexRVMOp("VPCMPGTB",  P_66, M_0F,   WIG, 0x64, VEXOpAssertion.AVX1_2);
1309         public static final VexRVMOp VPCMPGTW  = new VexRVMOp("VPCMPGTW",  P_66, M_0F,   WIG, 0x65, VEXOpAssertion.AVX1_2);
1310         public static final VexRVMOp VPCMPGTD  = new VexRVMOp("VPCMPGTD",  P_66, M_0F,   WIG, 0x66, VEXOpAssertion.AVX1_2);
1311         public static final VexRVMOp VPCMPGTQ  = new VexRVMOp("VPCMPGTQ",  P_66, M_0F38, WIG, 0x37, VEXOpAssertion.AVX1_2);
1312         // @formatter:on
1313 
1314         private VexRVMOp(String opcode, int pp, int mmmmm, int w, int op) {
1315             this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1316         }
1317 
1318         protected VexRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1319             super(opcode, pp, mmmmm, w, op, assertion);
1320         }
1321 
1322         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1323             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1324             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1325             asm.emitByte(op);
1326             asm.emitModRM(dst, src2);
1327         }
1328 
1329         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) {
1330             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1331             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1332             asm.emitByte(op);
1333             asm.emitOperandHelper(dst, src2, 0);
1334         }
1335     }
1336 
1337     public static final class VexGeneralPurposeRVMOp extends VexRVMOp {
1338         // @formatter:off
1339         public static final VexGeneralPurposeRVMOp ANDN   = new VexGeneralPurposeRVMOp("ANDN",   P_,   M_0F38, WIG, 0xF2, VEXOpAssertion.BMI1);
1340         public static final VexGeneralPurposeRVMOp MULX   = new VexGeneralPurposeRVMOp("MULX",   P_F2, M_0F38, WIG, 0xF6, VEXOpAssertion.BMI2);
1341         public static final VexGeneralPurposeRVMOp PDEP   = new VexGeneralPurposeRVMOp("PDEP",   P_F2, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1342         public static final VexGeneralPurposeRVMOp PEXT   = new VexGeneralPurposeRVMOp("PEXT",   P_F3, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1343         // @formatter:on
1344 
1345         private VexGeneralPurposeRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1346             super(opcode, pp, mmmmm, w, op, assertion);
1347         }
1348 
1349         @Override
1350         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1351             assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, src2, null);
1352             assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1353             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1354             asm.emitByte(op);
1355             asm.emitModRM(dst, src2);
1356         }
1357 
1358         @Override
1359         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) {
1360             assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, null, null);
1361             assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1362             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1363             asm.emitByte(op);
1364             asm.emitOperandHelper(dst, src2, 0);
1365         }
1366     }
1367 
1368     public static final class VexGeneralPurposeRMVOp extends VexOp {
1369         // @formatter:off
1370         public static final VexGeneralPurposeRMVOp BEXTR  = new VexGeneralPurposeRMVOp("BEXTR",  P_,   M_0F38, WIG, 0xF7, VEXOpAssertion.BMI1);
1371         public static final VexGeneralPurposeRMVOp BZHI   = new VexGeneralPurposeRMVOp("BZHI",   P_,   M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1372         public static final VexGeneralPurposeRMVOp SARX   = new VexGeneralPurposeRMVOp("SARX",   P_F3, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1373         public static final VexGeneralPurposeRMVOp SHRX   = new VexGeneralPurposeRMVOp("SHRX",   P_F2, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1374         public static final VexGeneralPurposeRMVOp SHLX   = new VexGeneralPurposeRMVOp("SHLX",   P_66, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1375         // @formatter:on
1376 
1377         private VexGeneralPurposeRMVOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1378             super(opcode, pp, mmmmm, w, op, assertion);
1379         }
1380 
1381         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1382             assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, src1, null);
1383             assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1384             asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1385             asm.emitByte(op);
1386             asm.emitModRM(dst, src1);
1387         }
1388 
1389         public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src1, Register src2) {
1390             assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, null, null);
1391             assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1392             asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1393             asm.emitByte(op);
1394             asm.emitOperandHelper(dst, src1, 0);
1395         }
1396     }
1397 
1398     public static final class VexGeneralPurposeRMOp extends VexRMOp {
1399         // @formatter:off
1400         public static final VexGeneralPurposeRMOp BLSI    = new VexGeneralPurposeRMOp("BLSI",   P_,    M_0F38, WIG, 0xF3, 3, VEXOpAssertion.BMI1);
1401         public static final VexGeneralPurposeRMOp BLSMSK  = new VexGeneralPurposeRMOp("BLSMSK", P_,    M_0F38, WIG, 0xF3, 2, VEXOpAssertion.BMI1);
1402         public static final VexGeneralPurposeRMOp BLSR    = new VexGeneralPurposeRMOp("BLSR",   P_,    M_0F38, WIG, 0xF3, 1, VEXOpAssertion.BMI1);
1403         // @formatter:on
1404         private final int ext;
1405 
1406         private VexGeneralPurposeRMOp(String opcode, int pp, int mmmmm, int w, int op, int ext, VEXOpAssertion assertion) {
1407             super(opcode, pp, mmmmm, w, op, assertion);
1408             this.ext = ext;
1409         }
1410 
1411         @Override
1412         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1413             assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1414             asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1415             asm.emitByte(op);
1416             asm.emitModRM(ext, src);
1417         }
1418 
1419         @Override
1420         public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) {
1421             assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1422             asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1423             asm.emitByte(op);
1424             asm.emitOperandHelper(ext, src, 0);
1425         }
1426     }
1427 
1428     /**
1429      * VEX-encoded shift instructions with an operand order of either RVM or VMI.
1430      */
1431     public static final class VexShiftOp extends VexRVMOp implements VexRRIOp {
1432         // @formatter:off
1433         public static final VexShiftOp VPSRLW = new VexShiftOp("VPSRLW", P_66, M_0F, WIG, 0xD1, 0x71, 2);
1434         public static final VexShiftOp VPSRLD = new VexShiftOp("VPSRLD", P_66, M_0F, WIG, 0xD2, 0x72, 2);
1435         public static final VexShiftOp VPSRLQ = new VexShiftOp("VPSRLQ", P_66, M_0F, WIG, 0xD3, 0x73, 2);
1436         public static final VexShiftOp VPSRAW = new VexShiftOp("VPSRAW", P_66, M_0F, WIG, 0xE1, 0x71, 4);
1437         public static final VexShiftOp VPSRAD = new VexShiftOp("VPSRAD", P_66, M_0F, WIG, 0xE2, 0x72, 4);
1438         public static final VexShiftOp VPSLLW = new VexShiftOp("VPSLLW", P_66, M_0F, WIG, 0xF1, 0x71, 6);
1439         public static final VexShiftOp VPSLLD = new VexShiftOp("VPSLLD", P_66, M_0F, WIG, 0xF2, 0x72, 6);
1440         public static final VexShiftOp VPSLLQ = new VexShiftOp("VPSLLQ", P_66, M_0F, WIG, 0xF3, 0x73, 6);
1441         // @formatter:on
1442 
1443         private final int immOp;
1444         private final int r;
1445 
1446         private VexShiftOp(String opcode, int pp, int mmmmm, int w, int op, int immOp, int r) {
1447             super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1_2);
1448             this.immOp = immOp;
1449             this.r = r;
1450         }
1451 
1452         @Override
1453         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1454             assert assertion.check((AMD64) asm.target.arch, size, null, dst, src);
1455             asm.vexPrefix(null, dst, src, size, pp, mmmmm, w, false);
1456             asm.emitByte(immOp);
1457             asm.emitModRM(r, src);
1458             asm.emitByte(imm8);
1459         }
1460     }
1461 
1462     public static final class VexMaskMoveOp extends VexOp {
1463         // @formatter:off
1464         public static final VexMaskMoveOp VMASKMOVPS = new VexMaskMoveOp("VMASKMOVPS", P_66, M_0F38, W0, 0x2C, 0x2E);
1465         public static final VexMaskMoveOp VMASKMOVPD = new VexMaskMoveOp("VMASKMOVPD", P_66, M_0F38, W0, 0x2D, 0x2F);
1466         public static final VexMaskMoveOp VPMASKMOVD = new VexMaskMoveOp("VPMASKMOVD", P_66, M_0F38, W0, 0x8C, 0x8E, VEXOpAssertion.AVX2);
1467         public static final VexMaskMoveOp VPMASKMOVQ = new VexMaskMoveOp("VPMASKMOVQ", P_66, M_0F38, W1, 0x8C, 0x8E, VEXOpAssertion.AVX2);
1468         // @formatter:on
1469 
1470         private final int opReverse;
1471 
1472         private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) {
1473             this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1);
1474         }
1475 
1476         private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) {
1477             super(opcode, pp, mmmmm, w, op, assertion);
1478             this.opReverse = opReverse;
1479         }
1480 
1481         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, AMD64Address src) {
1482             assert assertion.check((AMD64) asm.target.arch, size, dst, mask, null);
1483             asm.vexPrefix(dst, mask, src, size, pp, mmmmm, w, false);
1484             asm.emitByte(op);
1485             asm.emitOperandHelper(dst, src, 0);
1486         }
1487 
1488         public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register mask, Register src) {
1489             assert assertion.check((AMD64) asm.target.arch, size, src, mask, null);
1490             asm.vexPrefix(src, mask, dst, size, pp, mmmmm, w, false);
1491             asm.emitByte(opReverse);
1492             asm.emitOperandHelper(src, dst, 0);
1493         }
1494     }
1495 
1496     /**
1497      * VEX-encoded instructions with an operand order of RVMI.
1498      */
1499     public static final class VexRVMIOp extends VexOp {
1500         // @formatter:off
1501         public static final VexRVMIOp VSHUFPS     = new VexRVMIOp("VSHUFPS",     P_,   M_0F,   WIG, 0xC6);
1502         public static final VexRVMIOp VSHUFPD     = new VexRVMIOp("VSHUFPD",     P_66, M_0F,   WIG, 0xC6);
1503         public static final VexRVMIOp VINSERTF128 = new VexRVMIOp("VINSERTF128", P_66, M_0F3A, W0,  0x18, VEXOpAssertion.AVX1_256ONLY);
1504         public static final VexRVMIOp VINSERTI128 = new VexRVMIOp("VINSERTI128", P_66, M_0F3A, W0,  0x38, VEXOpAssertion.AVX2_256ONLY);
1505         // @formatter:on
1506 
1507         private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op) {
1508             this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1509         }
1510 
1511         private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1512             super(opcode, pp, mmmmm, w, op, assertion);
1513         }
1514 
1515         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, int imm8) {
1516             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1517             assert (imm8 & 0xFF) == imm8;
1518             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1519             asm.emitByte(op);
1520             asm.emitModRM(dst, src2);
1521             asm.emitByte(imm8);
1522         }
1523 
1524         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, int imm8) {
1525             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1526             assert (imm8 & 0xFF) == imm8;
1527             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1528             asm.emitByte(op);
1529             asm.emitOperandHelper(dst, src2, 1);
1530             asm.emitByte(imm8);
1531         }
1532     }
1533 
1534     /**
1535      * VEX-encoded comparison operation with an operand order of RVMI. The immediate operand is a
1536      * comparison operator.
1537      */
1538     public static final class VexFloatCompareOp extends VexOp {
1539         // @formatter:off
1540         public static final VexFloatCompareOp VCMPPS = new VexFloatCompareOp("VCMPPS", P_,   M_0F, WIG, 0xC2);
1541         public static final VexFloatCompareOp VCMPPD = new VexFloatCompareOp("VCMPPD", P_66, M_0F, WIG, 0xC2);
1542         public static final VexFloatCompareOp VCMPSS = new VexFloatCompareOp("VCMPSS", P_F2, M_0F, WIG, 0xC2);
1543         public static final VexFloatCompareOp VCMPSD = new VexFloatCompareOp("VCMPSD", P_F2, M_0F, WIG, 0xC2);
1544         // @formatter:on
1545 
1546         public enum Predicate {
1547             EQ_OQ(0x00),


1611                             return LT_OQ;
1612                         case LE:
1613                             return LE_OQ;
1614                         case GT:
1615                             return GT_OQ;
1616                         case GE:
1617                             return GE_OQ;
1618                         default:
1619                             throw GraalError.shouldNotReachHere();
1620                     }
1621                 }
1622             }
1623         }
1624 
1625         private VexFloatCompareOp(String opcode, int pp, int mmmmm, int w, int op) {
1626             super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1627         }
1628 
1629         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, Predicate p) {
1630             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1631             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1632             asm.emitByte(op);
1633             asm.emitModRM(dst, src2);
1634             asm.emitByte(p.imm8);
1635         }
1636 
1637         public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, Predicate p) {
1638             assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1639             asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1640             asm.emitByte(op);
1641             asm.emitOperandHelper(dst, src2, 1);
1642             asm.emitByte(p.imm8);
1643         }
1644     }
1645 
1646     public final void addl(AMD64Address dst, int imm32) {
1647         ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1648     }
1649 
1650     public final void addl(Register dst, int imm32) {
1651         ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1652     }
1653 
1654     public final void addl(Register dst, Register src) {
1655         ADD.rmOp.emit(this, DWORD, dst, src);
1656     }
1657 
1658     public final void addpd(Register dst, Register src) {
1659         SSEOp.ADD.emit(this, PD, dst, src);


1959         prefix(src, dst);
1960         emitByte(0x8D);
1961         emitOperandHelper(dst, src, 0);
1962     }
1963 
1964     public final void leaq(Register dst, AMD64Address src) {
1965         prefixq(src, dst);
1966         emitByte(0x8D);
1967         emitOperandHelper(dst, src, 0);
1968     }
1969 
1970     public final void leave() {
1971         emitByte(0xC9);
1972     }
1973 
1974     public final void lock() {
1975         emitByte(0xF0);
1976     }
1977 
1978     public final void movapd(Register dst, Register src) {
1979         assert inRC(XMM, dst) && inRC(XMM, src);
1980         simdPrefix(dst, Register.None, src, PD, P_0F, false);
1981         emitByte(0x28);
1982         emitModRM(dst, src);
1983     }
1984 
1985     public final void movaps(Register dst, Register src) {
1986         assert inRC(XMM, dst) && inRC(XMM, src);
1987         simdPrefix(dst, Register.None, src, PS, P_0F, false);
1988         emitByte(0x28);
1989         emitModRM(dst, src);
1990     }
1991 
1992     public final void movb(AMD64Address dst, int imm8) {
1993         prefix(dst);
1994         emitByte(0xC6);
1995         emitOperandHelper(0, dst, 1);
1996         emitByte(imm8);
1997     }
1998 
1999     public final void movb(AMD64Address dst, Register src) {
2000         assert inRC(CPU, src) : "must have byte register";
2001         prefixb(dst, src);
2002         emitByte(0x88);
2003         emitOperandHelper(src, dst, 0);
2004     }
2005 
2006     public final void movl(Register dst, int imm32) {
2007         movl(dst, imm32, false);
2008     }
2009 
2010     public final void movl(Register dst, int imm32, boolean annotateImm) {
2011         int insnPos = position();
2012         prefix(dst);
2013         emitByte(0xB8 + encode(dst));
2014         int immPos = position();
2015         emitInt(imm32);
2016         int nextInsnPos = position();
2017         if (annotateImm && codePatchingAnnotationConsumer != null) {
2018             codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos));
2019         }
2020     }


2043     public final void movl(AMD64Address dst, int imm32) {
2044         prefix(dst);
2045         emitByte(0xC7);
2046         emitOperandHelper(0, dst, 4);
2047         emitInt(imm32);
2048     }
2049 
2050     public final void movl(AMD64Address dst, Register src) {
2051         prefix(dst, src);
2052         emitByte(0x89);
2053         emitOperandHelper(src, dst, 0);
2054     }
2055 
2056     /**
2057      * New CPUs require use of movsd and movss to avoid partial register stall when loading from
2058      * memory. But for old Opteron use movlpd instead of movsd. The selection is done in
2059      * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and
2060      * {@link AMD64MacroAssembler#movflt(Register, Register)}.
2061      */
2062     public final void movlpd(Register dst, AMD64Address src) {
2063         assert inRC(XMM, dst);
2064         simdPrefix(dst, dst, src, PD, P_0F, false);
2065         emitByte(0x12);
2066         emitOperandHelper(dst, src, 0);
2067     }
2068 
2069     public final void movlhps(Register dst, Register src) {
2070         assert inRC(XMM, dst) && inRC(XMM, src);
2071         simdPrefix(dst, src, src, PS, P_0F, false);
2072         emitByte(0x16);
2073         emitModRM(dst, src);
2074     }
2075 
2076     public final void movq(Register dst, AMD64Address src) {
2077         movq(dst, src, false);
2078     }
2079 
2080     public final void movq(Register dst, AMD64Address src, boolean force4BytesDisplacement) {
2081         if (inRC(XMM, dst)) {
2082             // Insn: MOVQ xmm, r/m64
2083             // Code: F3 0F 7E /r
2084             // An alternative instruction would be 66 REX.W 0F 6E /r. We prefer the REX.W free
2085             // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction
2086             // when applicable.
2087             simdPrefix(dst, Register.None, src, SS, P_0F, false);
2088             emitByte(0x7E);
2089             emitOperandHelper(dst, src, force4BytesDisplacement, 0);
2090         } else {
2091             // gpr version of movq
2092             prefixq(src, dst);
2093             emitByte(0x8B);
2094             emitOperandHelper(dst, src, force4BytesDisplacement, 0);
2095         }
2096     }
2097 
2098     public final void movq(Register dst, Register src) {
2099         assert inRC(CPU, dst) && inRC(CPU, src);
2100         prefixq(dst, src);
2101         emitByte(0x8B);
2102         emitModRM(dst, src);
2103     }
2104 
2105     public final void movq(AMD64Address dst, Register src) {
2106         if (inRC(XMM, src)) {
2107             // Insn: MOVQ r/m64, xmm
2108             // Code: 66 0F D6 /r
2109             // An alternative instruction would be 66 REX.W 0F 7E /r. We prefer the REX.W free
2110             // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction
2111             // when applicable.
2112             simdPrefix(src, Register.None, dst, PD, P_0F, false);
2113             emitByte(0xD6);
2114             emitOperandHelper(src, dst, 0);
2115         } else {
2116             // gpr version of movq
2117             prefixq(dst, src);
2118             emitByte(0x89);
2119             emitOperandHelper(src, dst, 0);
2120         }
2121     }
2122 
2123     public final void movsbl(Register dst, AMD64Address src) {
2124         prefix(src, dst);
2125         emitByte(0x0F);
2126         emitByte(0xBE);
2127         emitOperandHelper(dst, src, 0);
2128     }
2129 
2130     public final void movsbl(Register dst, Register src) {
2131         prefix(dst, false, src, true);
2132         emitByte(0x0F);


2453             case 2:
2454                 emitByte(0x66);
2455                 emitByte(0x90);
2456                 break;
2457             case 1:
2458                 emitByte(0x90);
2459                 break;
2460             default:
2461                 assert i == 0;
2462         }
2463     }
2464 
2465     public final void orl(Register dst, Register src) {
2466         OR.rmOp.emit(this, DWORD, dst, src);
2467     }
2468 
2469     public final void orl(Register dst, int imm32) {
2470         OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2471     }
2472 
2473     // Insn: VPACKUSWB xmm1, xmm2, xmm3/m128
2474     // -----
2475     // Insn: VPACKUSWB xmm1, xmm1, xmm2
2476 
2477     public final void packuswb(Register dst, Register src) {
2478         assert inRC(XMM, dst) && inRC(XMM, src);
2479         // Code: VEX.NDS.128.66.0F.WIG 67 /r
2480         simdPrefix(dst, dst, src, PD, P_0F, false);
2481         emitByte(0x67);
2482         emitModRM(dst, src);
2483     }
2484 
2485     public final void pop(Register dst) {
2486         prefix(dst);
2487         emitByte(0x58 + encode(dst));
2488     }
2489 
2490     public void popfq() {
2491         emitByte(0x9D);
2492     }
2493 
2494     public final void ptest(Register dst, Register src) {
2495         assert supports(CPUFeature.SSE4_1);
2496         assert inRC(XMM, dst) && inRC(XMM, src);
2497         simdPrefix(dst, Register.None, src, PD, P_0F38, false);
2498         emitByte(0x17);
2499         emitModRM(dst, src);
2500     }
2501 
2502     public final void pcmpeqb(Register dst, Register src) {
2503         assert supports(CPUFeature.SSE2);
2504         assert inRC(XMM, dst) && inRC(XMM, src);
2505         simdPrefix(dst, dst, src, PD, P_0F, false);
2506         emitByte(0x74);
2507         emitModRM(dst, src);
2508     }
2509 
2510     public final void pcmpeqw(Register dst, Register src) {
2511         assert supports(CPUFeature.SSE2);
2512         assert inRC(XMM, dst) && inRC(XMM, src);
2513         simdPrefix(dst, dst, src, PD, P_0F, false);
2514         emitByte(0x75);
2515         emitModRM(dst, src);
2516     }
2517 
2518     public final void pcmpeqd(Register dst, Register src) {
2519         assert supports(CPUFeature.SSE2);
2520         assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2521         simdPrefix(dst, dst, src, PD, P_0F, false);
2522         emitByte(0x76);
2523         emitModRM(dst, src);
2524     }
2525 
2526     public final void pcmpestri(Register dst, AMD64Address src, int imm8) {
2527         assert supports(CPUFeature.SSE4_2);
2528         assert inRC(XMM, dst);
2529         simdPrefix(dst, Register.None, src, PD, P_0F3A, false);
2530         emitByte(0x61);
2531         emitOperandHelper(dst, src, 0);
2532         emitByte(imm8);
2533     }
2534 
2535     public final void pcmpestri(Register dst, Register src, int imm8) {
2536         assert supports(CPUFeature.SSE4_2);
2537         assert inRC(XMM, dst) && inRC(XMM, src);
2538         simdPrefix(dst, Register.None, src, PD, P_0F3A, false);
2539         emitByte(0x61);
2540         emitModRM(dst, src);
2541         emitByte(imm8);
2542     }
2543 
2544     public final void pmovmskb(Register dst, Register src) {
2545         assert supports(CPUFeature.SSE2);
2546         assert inRC(CPU, dst) && inRC(XMM, src);
2547         simdPrefix(dst, Register.None, src, PD, P_0F, false);
2548         emitByte(0xD7);
2549         emitModRM(dst, src);
2550     }
2551 
2552     // Insn: VPMOVZXBW xmm1, xmm2/m64
2553 
2554     public final void pmovzxbw(Register dst, AMD64Address src) {
2555         assert supports(CPUFeature.SSE4_1);
2556         assert inRC(XMM, dst);

2557         simdPrefix(dst, Register.None, src, PD, P_0F38, false);
2558         emitByte(0x30);
2559         emitOperandHelper(dst, src, 0);
2560     }
2561 
2562     public final void pmovzxbw(Register dst, Register src) {
2563         assert supports(CPUFeature.SSE4_1);
2564         assert inRC(XMM, dst) && inRC(XMM, src);
2565         simdPrefix(dst, Register.None, src, PD, P_0F38, false);
2566         emitByte(0x30);
2567         emitModRM(dst, src);
2568     }
2569 
2570     public final void push(Register src) {
2571         prefix(src);
2572         emitByte(0x50 + encode(src));
2573     }
2574 
2575     public void pushfq() {
2576         emitByte(0x9c);
2577     }
2578 
2579     public final void paddd(Register dst, Register src) {
2580         assert inRC(XMM, dst) && inRC(XMM, src);
2581         simdPrefix(dst, dst, src, PD, P_0F, false);
2582         emitByte(0xFE);
2583         emitModRM(dst, src);
2584     }
2585 
2586     public final void paddq(Register dst, Register src) {
2587         assert inRC(XMM, dst) && inRC(XMM, src);
2588         simdPrefix(dst, dst, src, PD, P_0F, false);
2589         emitByte(0xD4);
2590         emitModRM(dst, src);
2591     }
2592 
2593     public final void pextrw(Register dst, Register src, int imm8) {
2594         assert inRC(CPU, dst) && inRC(XMM, src);
2595         simdPrefix(dst, Register.None, src, PD, P_0F, false);
2596         emitByte(0xC5);
2597         emitModRM(dst, src);
2598         emitByte(imm8);
2599     }
2600 
2601     public final void pinsrw(Register dst, Register src, int imm8) {
2602         assert inRC(XMM, dst) && inRC(CPU, src);
2603         simdPrefix(dst, dst, src, PD, P_0F, false);
2604         emitByte(0xC4);
2605         emitModRM(dst, src);
2606         emitByte(imm8);
2607     }
2608 
2609     public final void por(Register dst, Register src) {
2610         assert inRC(XMM, dst) && inRC(XMM, src);
2611         simdPrefix(dst, dst, src, PD, P_0F, false);
2612         emitByte(0xEB);
2613         emitModRM(dst, src);
2614     }
2615 
2616     public final void pand(Register dst, Register src) {
2617         assert inRC(XMM, dst) && inRC(XMM, src);
2618         simdPrefix(dst, dst, src, PD, P_0F, false);
2619         emitByte(0xDB);
2620         emitModRM(dst, src);
2621     }
2622 
2623     public final void pxor(Register dst, Register src) {
2624         assert inRC(XMM, dst) && inRC(XMM, src);
2625         simdPrefix(dst, dst, src, PD, P_0F, false);
2626         emitByte(0xEF);
2627         emitModRM(dst, src);
2628     }
2629 
2630     public final void pslld(Register dst, int imm8) {
2631         assert isUByte(imm8) : "invalid value";
2632         assert inRC(XMM, dst);
2633         // XMM6 is for /6 encoding: 66 0F 72 /6 ib
2634         simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false);
2635         emitByte(0x72);
2636         emitModRM(6, dst);
2637         emitByte(imm8 & 0xFF);
2638     }
2639 
2640     public final void psllq(Register dst, Register shift) {
2641         assert inRC(XMM, dst) && inRC(XMM, shift);
2642         simdPrefix(dst, dst, shift, PD, P_0F, false);
2643         emitByte(0xF3);
2644         emitModRM(dst, shift);
2645     }
2646 
2647     public final void psllq(Register dst, int imm8) {
2648         assert isUByte(imm8) : "invalid value";
2649         assert inRC(XMM, dst);
2650         // XMM6 is for /6 encoding: 66 0F 73 /6 ib
2651         simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false);
2652         emitByte(0x73);
2653         emitModRM(6, dst);
2654         emitByte(imm8);
2655     }
2656 
2657     public final void psrad(Register dst, int imm8) {
2658         assert isUByte(imm8) : "invalid value";
2659         assert inRC(XMM, dst);
2660         // XMM4 is for /4 encoding: 66 0F 72 /4 ib
2661         simdPrefix(AMD64.xmm4, dst, dst, PD, P_0F, false);
2662         emitByte(0x72);
2663         emitModRM(4, dst);
2664         emitByte(imm8);
2665     }
2666 
2667     public final void psrld(Register dst, int imm8) {
2668         assert isUByte(imm8) : "invalid value";
2669         assert inRC(XMM, dst);
2670         // XMM2 is for /2 encoding: 66 0F 72 /2 ib
2671         simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false);
2672         emitByte(0x72);
2673         emitModRM(2, dst);
2674         emitByte(imm8);
2675     }
2676 
2677     public final void psrlq(Register dst, int imm8) {
2678         assert isUByte(imm8) : "invalid value";
2679         assert inRC(XMM, dst);
2680         // XMM2 is for /2 encoding: 66 0F 73 /2 ib
2681         simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false);
2682         emitByte(0x73);
2683         emitModRM(2, dst);
2684         emitByte(imm8);
2685     }
2686 
2687     public final void psrldq(Register dst, int imm8) {
2688         assert isUByte(imm8) : "invalid value";
2689         assert inRC(XMM, dst);
2690         simdPrefix(AMD64.xmm3, dst, dst, PD, P_0F, false);
2691         emitByte(0x73);
2692         emitModRM(3, dst);
2693         emitByte(imm8);
2694     }
2695 
2696     public final void pshufb(Register dst, Register src) {
2697         assert supports(CPUFeature.SSSE3);
2698         assert inRC(XMM, dst) && inRC(XMM, src);
2699         simdPrefix(dst, dst, src, PD, P_0F38, false);
2700         emitByte(0x00);
2701         emitModRM(dst, src);
2702     }
2703 
2704     public final void pshuflw(Register dst, Register src, int imm8) {
2705         assert supports(CPUFeature.SSE2);
2706         assert isUByte(imm8) : "invalid value";
2707         assert inRC(XMM, dst) && inRC(XMM, src);
2708         simdPrefix(dst, Register.None, src, SD, P_0F, false);
2709         emitByte(0x70);
2710         emitModRM(dst, src);
2711         emitByte(imm8);
2712     }
2713 
2714     public final void pshufd(Register dst, Register src, int imm8) {
2715         assert isUByte(imm8) : "invalid value";
2716         assert inRC(XMM, dst) && inRC(XMM, src);
2717         simdPrefix(dst, Register.None, src, PD, P_0F, false);
2718         emitByte(0x70);
2719         emitModRM(dst, src);
2720         emitByte(imm8);
2721     }
2722 
2723     public final void psubd(Register dst, Register src) {
2724         assert inRC(XMM, dst) && inRC(XMM, src);
2725         simdPrefix(dst, dst, src, PD, P_0F, false);
2726         emitByte(0xFA);
2727         emitModRM(dst, src);
2728     }
2729 
2730     public final void punpcklbw(Register dst, Register src) {
2731         assert supports(CPUFeature.SSE2);
2732         assert inRC(XMM, dst) && inRC(XMM, src);
2733         simdPrefix(dst, dst, src, PD, P_0F, false);
2734         emitByte(0x60);
2735         emitModRM(dst, src);
2736     }
2737 
2738     public final void rcpps(Register dst, Register src) {
2739         assert inRC(XMM, dst) && inRC(XMM, src);
2740         simdPrefix(dst, Register.None, src, PS, P_0F, false);
2741         emitByte(0x53);
2742         emitModRM(dst, src);
2743     }
2744 
2745     public final void ret(int imm16) {
2746         if (imm16 == 0) {
2747             emitByte(0xC3);
2748         } else {
2749             emitByte(0xC2);
2750             emitShort(imm16);
2751         }
2752     }
2753 
2754     public final void sarl(Register dst, int imm8) {
2755         prefix(dst);
2756         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2757         if (imm8 == 1) {
2758             emitByte(0xD1);
2759             emitModRM(7, dst);


2767     public final void shll(Register dst, int imm8) {
2768         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2769         prefix(dst);
2770         if (imm8 == 1) {
2771             emitByte(0xD1);
2772             emitModRM(4, dst);
2773         } else {
2774             emitByte(0xC1);
2775             emitModRM(4, dst);
2776             emitByte(imm8);
2777         }
2778     }
2779 
2780     public final void shll(Register dst) {
2781         // Multiply dst by 2, CL times.
2782         prefix(dst);
2783         emitByte(0xD3);
2784         emitModRM(4, dst);
2785     }
2786 
2787     // Insn: SHLX r32a, r/m32, r32b
2788 
2789     public final void shlxl(Register dst, Register src1, Register src2) {
2790         VexGeneralPurposeRMVOp.SHLX.emit(this, AVXSize.DWORD, dst, src1, src2);
2791     }
2792 
2793     public final void shrl(Register dst, int imm8) {
2794         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2795         prefix(dst);
2796         emitByte(0xC1);
2797         emitModRM(5, dst);
2798         emitByte(imm8);
2799     }
2800 
2801     public final void shrl(Register dst) {
2802         // Unsigned divide dst by 2, CL times.
2803         prefix(dst);
2804         emitByte(0xD3);
2805         emitModRM(5, dst);
2806     }
2807 
2808     public final void subl(AMD64Address dst, int imm32) {
2809         SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2810     }
2811 
2812     public final void subl(Register dst, int imm32) {


2839             prefix(dst);
2840             emitByte(0xF7);
2841             emitModRM(0, dst);
2842         }
2843         emitInt(imm32);
2844     }
2845 
2846     public final void testl(Register dst, Register src) {
2847         prefix(dst, src);
2848         emitByte(0x85);
2849         emitModRM(dst, src);
2850     }
2851 
2852     public final void testl(Register dst, AMD64Address src) {
2853         prefix(src, dst);
2854         emitByte(0x85);
2855         emitOperandHelper(dst, src, 0);
2856     }
2857 
2858     public final void unpckhpd(Register dst, Register src) {
2859         assert inRC(XMM, dst) && inRC(XMM, src);
2860         simdPrefix(dst, dst, src, PD, P_0F, false);
2861         emitByte(0x15);
2862         emitModRM(dst, src);
2863     }
2864 
2865     public final void unpcklpd(Register dst, Register src) {
2866         assert inRC(XMM, dst) && inRC(XMM, src);
2867         simdPrefix(dst, dst, src, PD, P_0F, false);
2868         emitByte(0x14);
2869         emitModRM(dst, src);
2870     }
2871 
2872     public final void xorl(Register dst, Register src) {
2873         XOR.rmOp.emit(this, DWORD, dst, src);
2874     }
2875 
2876     public final void xorpd(Register dst, Register src) {
2877         SSEOp.XOR.emit(this, PD, dst, src);
2878     }
2879 
2880     public final void xorps(Register dst, Register src) {
2881         SSEOp.XOR.emit(this, PS, dst, src);
2882     }
2883 
2884     protected final void decl(Register dst) {
2885         // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
2886         prefix(dst);


2957     public final void cmpq(Register dst, int imm32) {
2958         CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
2959     }
2960 
2961     public final void cmpq(Register dst, Register src) {
2962         CMP.rmOp.emit(this, QWORD, dst, src);
2963     }
2964 
2965     public final void cmpq(Register dst, AMD64Address src) {
2966         CMP.rmOp.emit(this, QWORD, dst, src);
2967     }
2968 
2969     public final void cmpxchgq(Register reg, AMD64Address adr) {
2970         prefixq(adr, reg);
2971         emitByte(0x0F);
2972         emitByte(0xB1);
2973         emitOperandHelper(reg, adr, 0);
2974     }
2975 
2976     public final void cvtdq2pd(Register dst, Register src) {
2977         assert inRC(XMM, dst) && inRC(XMM, src);
2978         simdPrefix(dst, Register.None, src, SS, P_0F, false);
2979         emitByte(0xE6);
2980         emitModRM(dst, src);
2981     }
2982 
2983     public final void cvtsi2sdq(Register dst, Register src) {
2984         SSEOp.CVTSI2SD.emit(this, QWORD, dst, src);
2985     }
2986 
2987     public final void cvttsd2siq(Register dst, Register src) {
2988         SSEOp.CVTTSD2SI.emit(this, QWORD, dst, src);
2989     }
2990 
2991     public final void cvttpd2dq(Register dst, Register src) {
2992         assert inRC(XMM, dst) && inRC(XMM, src);
2993         simdPrefix(dst, Register.None, src, PD, P_0F, false);
2994         emitByte(0xE6);
2995         emitModRM(dst, src);
2996     }
2997 
2998     public final void decq(Register dst) {
2999         // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
3000         prefixq(dst);
3001         emitByte(0xFF);
3002         emitModRM(1, dst);
3003     }
3004 
3005     public final void decq(AMD64Address dst) {
3006         DEC.emit(this, QWORD, dst);
3007     }
3008 
3009     public final void imulq(Register dst, Register src) {
3010         prefixq(dst, src);
3011         emitByte(0x0F);
3012         emitByte(0xAF);
3013         emitModRM(dst, src);
3014     }
3015 
3016     public final void incq(Register dst) {
3017         // Don't use it directly. Use Macroincrementq() instead.
3018         // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3019         prefixq(dst);


3040             codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos));
3041         }
3042     }
3043 
3044     public final void movslq(Register dst, int imm32) {
3045         prefixq(dst);
3046         emitByte(0xC7);
3047         emitModRM(0, dst);
3048         emitInt(imm32);
3049     }
3050 
3051     public final void movdq(Register dst, AMD64Address src) {
3052         AMD64RMOp.MOVQ.emit(this, QWORD, dst, src);
3053     }
3054 
3055     public final void movdq(AMD64Address dst, Register src) {
3056         AMD64MROp.MOVQ.emit(this, QWORD, dst, src);
3057     }
3058 
3059     public final void movdq(Register dst, Register src) {
3060         if (inRC(XMM, dst) && inRC(CPU, src)) {
3061             AMD64RMOp.MOVQ.emit(this, QWORD, dst, src);
3062         } else if (inRC(XMM, src) && inRC(CPU, dst)) {
3063             AMD64MROp.MOVQ.emit(this, QWORD, dst, src);
3064         } else {
3065             throw new InternalError("should not reach here");
3066         }
3067     }
3068 
3069     public final void movdl(Register dst, Register src) {
3070         if (inRC(XMM, dst) && inRC(CPU, src)) {
3071             AMD64RMOp.MOVD.emit(this, DWORD, dst, src);
3072         } else if (inRC(XMM, src) && inRC(CPU, dst)) {
3073             AMD64MROp.MOVD.emit(this, DWORD, dst, src);
3074         } else {
3075             throw new InternalError("should not reach here");
3076         }
3077     }
3078 
3079     public final void movdl(Register dst, AMD64Address src) {
3080         AMD64RMOp.MOVD.emit(this, DWORD, dst, src);
3081     }
3082 
3083     public final void movddup(Register dst, Register src) {
3084         assert supports(CPUFeature.SSE3);
3085         assert inRC(XMM, dst) && inRC(XMM, src);
3086         simdPrefix(dst, Register.None, src, SD, P_0F, false);
3087         emitByte(0x12);
3088         emitModRM(dst, src);
3089     }
3090 
3091     public final void movdqu(Register dst, AMD64Address src) {
3092         assert inRC(XMM, dst);
3093         simdPrefix(dst, Register.None, src, SS, P_0F, false);
3094         emitByte(0x6F);
3095         emitOperandHelper(dst, src, 0);
3096     }
3097 
3098     public final void movdqu(Register dst, Register src) {
3099         assert inRC(XMM, dst) && inRC(XMM, src);
3100         simdPrefix(dst, Register.None, src, SS, P_0F, false);
3101         emitByte(0x6F);
3102         emitModRM(dst, src);
3103     }
3104 
3105     // Insn: VMOVDQU xmm2/m128, xmm1
3106 
3107     public final void movdqu(AMD64Address dst, Register src) {
3108         assert inRC(XMM, src);
3109         // Code: VEX.128.F3.0F.WIG 7F /r
3110         simdPrefix(src, Register.None, dst, SS, P_0F, false);
3111         emitByte(0x7F);
3112         emitOperandHelper(src, dst, 0);
3113     }
3114 
3115     public final void movslq(AMD64Address dst, int imm32) {
3116         prefixq(dst);
3117         emitByte(0xC7);
3118         emitOperandHelper(0, dst, 4);
3119         emitInt(imm32);
3120     }
3121 
3122     public final void movslq(Register dst, AMD64Address src) {
3123         prefixq(src, dst);
3124         emitByte(0x63);
3125         emitOperandHelper(dst, src, 0);
3126     }
3127 
3128     public final void movslq(Register dst, Register src) {
3129         prefixq(dst, src);
3130         emitByte(0x63);
3131         emitModRM(dst, src);
3132     }
3133 
3134     public final void negq(Register dst) {


3275             // We only have to handle StoreLoad
3276             if ((barriers & STORE_LOAD) != 0) {
3277                 // All usable chips support "locked" instructions which suffice
3278                 // as barriers, and are much faster than the alternative of
3279                 // using cpuid instruction. We use here a locked add [rsp],0.
3280                 // This is conveniently otherwise a no-op except for blowing
3281                 // flags.
3282                 // Any change to this code may need to revisit other places in
3283                 // the code where this idiom is used, in particular the
3284                 // orderAccess code.
3285                 lock();
3286                 addl(new AMD64Address(AMD64.rsp, 0), 0); // Assert the lock# signal here
3287             }
3288         }
3289     }
3290 
3291     @Override
3292     protected final void patchJumpTarget(int branch, int branchTarget) {
3293         int op = getByte(branch);
3294         assert op == 0xE8 // call
3295                         || op == 0x00 // jump table entry

3296                         || op == 0xE9 // jmp
3297                         || op == 0xEB // short jmp
3298                         || (op & 0xF0) == 0x70 // short jcc
3299                         || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc
3300         : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
3301 
3302         if (op == 0x00) {
3303             int offsetToJumpTableBase = getShort(branch + 1);
3304             int jumpTableBase = branch - offsetToJumpTableBase;
3305             int imm32 = branchTarget - jumpTableBase;
3306             emitInt(imm32, branch);
3307         } else if (op == 0xEB || (op & 0xF0) == 0x70) {
3308 
3309             // short offset operators (jmp and jcc)
3310             final int imm8 = branchTarget - (branch + 2);
3311             /*
3312              * Since a wrongly patched short branch can potentially lead to working but really bad
3313              * behaving code we should always fail with an exception instead of having an assert.
3314              */
3315             if (!NumUtil.isByte(imm8)) {


3536         emitByte(0xae);
3537         emitByte(0xe8);
3538     }
3539 
3540     public final void vptest(Register dst, Register src) {
3541         VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src);
3542     }
3543 
3544     public final void vpxor(Register dst, Register nds, Register src) {
3545         VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
3546     }
3547 
3548     public final void vpxor(Register dst, Register nds, AMD64Address src) {
3549         VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
3550     }
3551 
3552     public final void vmovdqu(Register dst, AMD64Address src) {
3553         VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src);
3554     }
3555 
3556     public final void vmovdqu(AMD64Address dst, Register src) {
3557         assert inRC(XMM, src);
3558         VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src);
3559     }
3560 
3561     public final void vpmovzxbw(Register dst, AMD64Address src) {
3562         assert supports(CPUFeature.AVX2);
3563         VexRMOp.VPMOVZXBW.emit(this, AVXSize.YMM, dst, src);
3564     }
3565 
3566     public final void vzeroupper() {
3567         emitVEX(L128, P_, M_0F, W0, 0, 0, true);
3568         emitByte(0x77);
3569     }
3570 
3571     // Insn: KORTESTD k1, k2
3572 
3573     // This instruction produces ZF or CF flags
3574     public final void kortestd(Register src1, Register src2) {
3575         assert supports(CPUFeature.AVX512BW);
3576         assert inRC(MASK, src1) && inRC(MASK, src2);
3577         // Code: VEX.L0.66.0F.W1 98 /r
3578         vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true);
3579         emitByte(0x98);
3580         emitModRM(src1, src2);
3581     }
3582 
3583     // Insn: KORTESTQ k1, k2
3584 
3585     // This instruction produces ZF or CF flags
3586     public final void kortestq(Register src1, Register src2) {
3587         assert supports(CPUFeature.AVX512BW);
3588         assert inRC(MASK, src1) && inRC(MASK, src2);
3589         // Code: VEX.L0.0F.W1 98 /r
3590         vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_, M_0F, W1, true);
3591         emitByte(0x98);
3592         emitModRM(src1, src2);
3593     }
3594 
3595     public final void kmovd(Register dst, Register src) {
3596         assert supports(CPUFeature.AVX512BW);
3597         assert inRC(MASK, dst) || inRC(CPU, dst);
3598         assert inRC(MASK, src) || inRC(CPU, src);
3599         assert !(inRC(CPU, dst) && inRC(CPU, src));
3600 
3601         if (inRC(MASK, dst)) {
3602             if (inRC(MASK, src)) {
3603                 // kmovd(KRegister dst, KRegister src):
3604                 // Insn: KMOVD k1, k2/m32
3605                 // Code: VEX.L0.66.0F.W1 90 /r
3606                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_66, M_0F, W1, true);
3607                 emitByte(0x90);
3608                 emitModRM(dst, src);
3609             } else {
3610                 // kmovd(KRegister dst, Register src)
3611                 // Insn: KMOVD k1, r32
3612                 // Code: VEX.L0.F2.0F.W0 92 /r
3613                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true);
3614                 emitByte(0x92);
3615                 emitModRM(dst, src);
3616             }
3617         } else {
3618             if (inRC(MASK, src)) {
3619                 // kmovd(Register dst, KRegister src)
3620                 // Insn: KMOVD r32, k1
3621                 // Code: VEX.L0.F2.0F.W0 93 /r
3622                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true);
3623                 emitByte(0x93);
3624                 emitModRM(dst, src);
3625             } else {
3626                 throw GraalError.shouldNotReachHere();
3627             }
3628         }
3629     }
3630 
3631     public final void kmovq(Register dst, Register src) {
3632         assert supports(CPUFeature.AVX512BW);
3633         assert inRC(MASK, dst) || inRC(CPU, dst);
3634         assert inRC(MASK, src) || inRC(CPU, src);
3635         assert !(inRC(CPU, dst) && inRC(CPU, src));
3636 
3637         if (inRC(MASK, dst)) {
3638             if (inRC(MASK, src)) {
3639                 // kmovq(KRegister dst, KRegister src):
3640                 // Insn: KMOVQ k1, k2/m64
3641                 // Code: VEX.L0.0F.W1 90 /r
3642                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_, M_0F, W1, true);
3643                 emitByte(0x90);
3644                 emitModRM(dst, src);
3645             } else {
3646                 // kmovq(KRegister dst, Register src)
3647                 // Insn: KMOVQ k1, r64
3648                 // Code: VEX.L0.F2.0F.W1 92 /r
3649                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true);
3650                 emitByte(0x92);
3651                 emitModRM(dst, src);
3652             }
3653         } else {
3654             if (inRC(MASK, src)) {
3655                 // kmovq(Register dst, KRegister src)
3656                 // Insn: KMOVQ r64, k1
3657                 // Code: VEX.L0.F2.0F.W1 93 /r
3658                 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true);
3659                 emitByte(0x93);
3660                 emitModRM(dst, src);
3661             } else {
3662                 throw GraalError.shouldNotReachHere();
3663             }
3664         }
3665     }
3666 
3667     // Insn: KTESTD k1, k2
3668 
3669     public final void ktestd(Register src1, Register src2) {
3670         assert supports(CPUFeature.AVX512BW);
3671         assert inRC(MASK, src1) && inRC(MASK, src2);
3672         // Code: VEX.L0.66.0F.W1 99 /r
3673         vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true);
3674         emitByte(0x99);
3675         emitModRM(src1, src2);
3676     }
3677 
3678     public final void evmovdqu64(Register dst, AMD64Address src) {
3679         assert supports(CPUFeature.AVX512F);
3680         assert inRC(XMM, dst);
3681         evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F3, M_0F, W1, Z0, B0);
3682         emitByte(0x6F);
3683         emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3684     }
3685 
3686     // Insn: VPMOVZXBW zmm1, m256
3687 
3688     public final void evpmovzxbw(Register dst, AMD64Address src) {
3689         assert supports(CPUFeature.AVX512BW);
3690         assert inRC(XMM, dst);
3691         // Code: EVEX.512.66.0F38.WIG 30 /r
3692         evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0);
3693         emitByte(0x30);
3694         emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
3695     }
3696 
3697     public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src) {
3698         assert supports(CPUFeature.AVX512BW);
3699         assert inRC(MASK, kdst) && inRC(XMM, nds);
3700         evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F, WIG, Z0, B0);
3701         emitByte(0x74);
3702         emitEVEXOperandHelper(kdst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3703     }
3704 
3705     // Insn: VMOVDQU16 zmm1 {k1}{z}, zmm2/m512
3706     // -----
3707     // Insn: VMOVDQU16 zmm1, m512
3708 
3709     public final void evmovdqu16(Register dst, AMD64Address src) {
3710         assert supports(CPUFeature.AVX512BW);
3711         assert inRC(XMM, dst);
3712         // Code: EVEX.512.F2.0F.W1 6F /r
3713         evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
3714         emitByte(0x6F);
3715         emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3716     }
3717 
3718     // Insn: VMOVDQU16 zmm1, k1:z, m512
3719 
3720     public final void evmovdqu16(Register dst, Register mask, AMD64Address src) {
3721         assert supports(CPUFeature.AVX512BW);
3722         assert inRC(XMM, dst) && inRC(MASK, mask);
3723         // Code: EVEX.512.F2.0F.W1 6F /r
3724         evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z1, B0);
3725         emitByte(0x6F);
3726         emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3727     }
3728 
3729     // Insn: VMOVDQU16 zmm2/m512 {k1}{z}, zmm1
3730     // -----
3731     // Insn: VMOVDQU16 m512, zmm1
3732 
3733     public final void evmovdqu16(AMD64Address dst, Register src) {
3734         assert supports(CPUFeature.AVX512BW);
3735         assert inRC(XMM, src);
3736         // Code: EVEX.512.F2.0F.W1 7F /r
3737         evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
3738         emitByte(0x7F);
3739         emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3740     }
3741 
3742     // Insn: VMOVDQU16 m512, k1, zmm1
3743 
3744     public final void evmovdqu16(AMD64Address dst, Register mask, Register src) {
3745         assert supports(CPUFeature.AVX512BW);
3746         assert inRC(MASK, mask) && inRC(XMM, src);
3747         // Code: EVEX.512.F2.0F.W1 7F /r
3748         evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
3749         emitByte(0x7F);
3750         emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3751     }
3752 
3753     // Insn: VPBROADCASTW zmm1 {k1}{z}, reg
3754     // -----
3755     // Insn: VPBROADCASTW zmm1, reg
3756 
3757     public final void evpbroadcastw(Register dst, Register src) {
3758         assert supports(CPUFeature.AVX512BW);
3759         assert inRC(XMM, dst) && inRC(CPU, src);
3760         // Code: EVEX.512.66.0F38.W0 7B /r
3761         evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, W0, Z0, B0);
3762         emitByte(0x7B);
3763         emitModRM(dst, src);
3764     }
3765 
3766     // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8
3767     // -----
3768     // Insn: VPCMPUW k1, zmm2, zmm3, imm8
3769 
3770     public final void evpcmpuw(Register kdst, Register nds, Register src, int vcc) {
3771         assert supports(CPUFeature.AVX512BW);
3772         assert inRC(MASK, kdst) && inRC(XMM, nds) && inRC(XMM, src);
3773         // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib
3774         evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0);
3775         emitByte(0x3E);
3776         emitModRM(kdst, src);
3777         emitByte(vcc);
3778     }
3779 
3780     // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8
3781     // -----
3782     // Insn: VPCMPUW k1, k2, zmm2, zmm3, imm8
3783 
3784     public final void evpcmpuw(Register kdst, Register mask, Register nds, Register src, int vcc) {
3785         assert supports(CPUFeature.AVX512BW);
3786         assert inRC(MASK, kdst) && inRC(MASK, mask);
3787         assert inRC(XMM, nds) && inRC(XMM, src);
3788         // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib
3789         evexPrefix(kdst, mask, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0);
3790         emitByte(0x3E);
3791         emitModRM(kdst, src);
3792         emitByte(vcc);
3793     }
3794 
3795     // Insn: VPMOVWB ymm1/m256 {k1}{z}, zmm2
3796     // -----
3797     // Insn: VPMOVWB m256, zmm2
3798 
3799     public final void evpmovwb(AMD64Address dst, Register src) {
3800         assert supports(CPUFeature.AVX512BW);
3801         assert inRC(XMM, src);
3802         // Code: EVEX.512.F3.0F38.W0 30 /r
3803         evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0);
3804         emitByte(0x30);
3805         emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
3806     }
3807 
3808     // Insn: VPMOVWB m256, k1, zmm2
3809 
3810     public final void evpmovwb(AMD64Address dst, Register mask, Register src) {
3811         assert supports(CPUFeature.AVX512BW);
3812         assert inRC(MASK, mask) && inRC(XMM, src);
3813         // Code: EVEX.512.F3.0F38.W0 30 /r
3814         evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0);
3815         emitByte(0x30);
3816         emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
3817     }
3818 
3819     // Insn: VPMOVZXBW zmm1 {k1}{z}, ymm2/m256
3820     // -----
3821     // Insn: VPMOVZXBW zmm1, k1, m256
3822 
3823     public final void evpmovzxbw(Register dst, Register mask, AMD64Address src) {
3824         assert supports(CPUFeature.AVX512BW);
3825         assert inRC(MASK, mask) && inRC(XMM, dst);
3826         // Code: EVEX.512.66.0F38.WIG 30 /r
3827         evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0);
3828         emitByte(0x30);
3829         emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
3830     }
3831 
3832 }
< prev index next >