26
27 import static jdk.vm.ci.amd64.AMD64.CPU;
28 import static jdk.vm.ci.amd64.AMD64.MASK;
29 import static jdk.vm.ci.amd64.AMD64.XMM;
30 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD;
31 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop;
32 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop;
33 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD;
34 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND;
35 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP;
36 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR;
37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB;
38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB;
39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR;
40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC;
41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC;
42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG;
43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT;
44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0;
45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0;
46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.BYTE;
47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.DWORD;
48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PD;
49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PS;
50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.QWORD;
51 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SD;
52 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SS;
53 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.WORD;
54 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128;
55 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256;
56 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ;
57 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F;
58 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38;
59 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A;
60 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_;
61 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66;
62 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2;
63 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3;
64 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0;
65 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1;
1006
1007 /**
1008 * VEX-encoded instructions with an operand order of RM, but the M operand must be a register.
1009 */
1010 public static class VexRROp extends VexOp {
1011 // @formatter:off
1012 public static final VexRROp VMASKMOVDQU = new VexRROp("VMASKMOVDQU", P_66, M_0F, WIG, 0xF7, VEXOpAssertion.AVX1_128ONLY);
1013 // @formatter:on
1014
1015 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op) {
1016 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1017 }
1018
1019 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1020 super(opcode, pp, mmmmm, w, op, assertion);
1021 }
1022
1023 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1024 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src);
1025 assert op != 0x1A || op != 0x5A;
1026 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w);
1027 asm.emitByte(op);
1028 asm.emitModRM(dst, src);
1029 }
1030 }
1031
1032 /**
1033 * VEX-encoded instructions with an operand order of RM.
1034 */
1035 public static class VexRMOp extends VexRROp {
1036 // @formatter:off
1037 public static final VexRMOp VCVTTSS2SI = new VexRMOp("VCVTTSS2SI", P_F3, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM);
1038 public static final VexRMOp VCVTTSS2SQ = new VexRMOp("VCVTTSS2SQ", P_F3, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM);
1039 public static final VexRMOp VCVTTSD2SI = new VexRMOp("VCVTTSD2SI", P_F2, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM);
1040 public static final VexRMOp VCVTTSD2SQ = new VexRMOp("VCVTTSD2SQ", P_F2, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM);
1041 public static final VexRMOp VCVTPS2PD = new VexRMOp("VCVTPS2PD", P_, M_0F, WIG, 0x5A);
1042 public static final VexRMOp VCVTPD2PS = new VexRMOp("VCVTPD2PS", P_66, M_0F, WIG, 0x5A);
1043 public static final VexRMOp VCVTDQ2PS = new VexRMOp("VCVTDQ2PS", P_, M_0F, WIG, 0x5B);
1044 public static final VexRMOp VCVTTPS2DQ = new VexRMOp("VCVTTPS2DQ", P_F3, M_0F, WIG, 0x5B);
1045 public static final VexRMOp VCVTTPD2DQ = new VexRMOp("VCVTTPD2DQ", P_66, M_0F, WIG, 0xE6);
1046 public static final VexRMOp VCVTDQ2PD = new VexRMOp("VCVTDQ2PD", P_F3, M_0F, WIG, 0xE6);
1067 public static final VexRMOp VPMOVZXDQ = new VexRMOp("VPMOVZXDQ", P_66, M_0F38, WIG, 0x35);
1068 public static final VexRMOp VPTEST = new VexRMOp("VPTEST", P_66, M_0F38, WIG, 0x17);
1069 public static final VexRMOp VSQRTPD = new VexRMOp("VSQRTPD", P_66, M_0F, WIG, 0x51);
1070 public static final VexRMOp VSQRTPS = new VexRMOp("VSQRTPS", P_, M_0F, WIG, 0x51);
1071 public static final VexRMOp VSQRTSD = new VexRMOp("VSQRTSD", P_F2, M_0F, WIG, 0x51);
1072 public static final VexRMOp VSQRTSS = new VexRMOp("VSQRTSS", P_F3, M_0F, WIG, 0x51);
1073 public static final VexRMOp VUCOMISS = new VexRMOp("VUCOMISS", P_, M_0F, WIG, 0x2E);
1074 public static final VexRMOp VUCOMISD = new VexRMOp("VUCOMISD", P_66, M_0F, WIG, 0x2E);
1075 // @formatter:on
1076
1077 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op) {
1078 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1079 }
1080
1081 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1082 super(opcode, pp, mmmmm, w, op, assertion);
1083 }
1084
1085 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) {
1086 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1087 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w);
1088 asm.emitByte(op);
1089 asm.emitOperandHelper(dst, src, 0);
1090 }
1091 }
1092
1093 /**
1094 * VEX-encoded move instructions.
1095 * <p>
1096 * These instructions have two opcodes: op is the forward move instruction with an operand order
1097 * of RM, and opReverse is the reverse move instruction with an operand order of MR.
1098 */
1099 public static final class VexMoveOp extends VexRMOp {
1100 // @formatter:off
1101 public static final VexMoveOp VMOVDQA = new VexMoveOp("VMOVDQA", P_66, M_0F, WIG, 0x6F, 0x7F);
1102 public static final VexMoveOp VMOVDQU = new VexMoveOp("VMOVDQU", P_F3, M_0F, WIG, 0x6F, 0x7F);
1103 public static final VexMoveOp VMOVAPS = new VexMoveOp("VMOVAPS", P_, M_0F, WIG, 0x28, 0x29);
1104 public static final VexMoveOp VMOVAPD = new VexMoveOp("VMOVAPD", P_66, M_0F, WIG, 0x28, 0x29);
1105 public static final VexMoveOp VMOVUPS = new VexMoveOp("VMOVUPS", P_, M_0F, WIG, 0x10, 0x11);
1106 public static final VexMoveOp VMOVUPD = new VexMoveOp("VMOVUPD", P_66, M_0F, WIG, 0x10, 0x11);
1107 public static final VexMoveOp VMOVSS = new VexMoveOp("VMOVSS", P_F3, M_0F, WIG, 0x10, 0x11);
1108 public static final VexMoveOp VMOVSD = new VexMoveOp("VMOVSD", P_F2, M_0F, WIG, 0x10, 0x11);
1109 public static final VexMoveOp VMOVD = new VexMoveOp("VMOVD", P_66, M_0F, W0, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU);
1110 public static final VexMoveOp VMOVQ = new VexMoveOp("VMOVQ", P_66, M_0F, W1, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU);
1111 // @formatter:on
1112
1113 private final int opReverse;
1114
1115 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) {
1116 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1);
1117 }
1118
1119 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) {
1120 super(opcode, pp, mmmmm, w, op, assertion);
1121 this.opReverse = opReverse;
1122 }
1123
1124 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src) {
1125 assert assertion.check((AMD64) asm.target.arch, size, src, null, null);
1126 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w);
1127 asm.emitByte(opReverse);
1128 asm.emitOperandHelper(src, dst, 0);
1129 }
1130
1131 public void emitReverse(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1132 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst);
1133 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w);
1134 asm.emitByte(opReverse);
1135 asm.emitModRM(src, dst);
1136 }
1137 }
1138
1139 public interface VexRRIOp {
1140 void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8);
1141 }
1142
1143 /**
1144 * VEX-encoded instructions with an operand order of RMI.
1145 */
1146 public static final class VexRMIOp extends VexOp implements VexRRIOp {
1147 // @formatter:off
1148 public static final VexRMIOp VPERMQ = new VexRMIOp("VPERMQ", P_66, M_0F3A, W1, 0x00, VEXOpAssertion.AVX2_256ONLY);
1149 public static final VexRMIOp VPSHUFLW = new VexRMIOp("VPSHUFLW", P_F2, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2);
1150 public static final VexRMIOp VPSHUFHW = new VexRMIOp("VPSHUFHW", P_F3, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2);
1151 public static final VexRMIOp VPSHUFD = new VexRMIOp("VPSHUFD", P_66, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2);
1152 // @formatter:on
1153
1154 private VexRMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1155 super(opcode, pp, mmmmm, w, op, assertion);
1156 }
1157
1158 @Override
1159 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1160 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src);
1161 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w);
1162 asm.emitByte(op);
1163 asm.emitModRM(dst, src);
1164 asm.emitByte(imm8);
1165 }
1166
1167 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src, int imm8) {
1168 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1169 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w);
1170 asm.emitByte(op);
1171 asm.emitOperandHelper(dst, src, 1);
1172 asm.emitByte(imm8);
1173 }
1174 }
1175
1176 /**
1177 * VEX-encoded instructions with an operand order of MRI.
1178 */
1179 public static final class VexMRIOp extends VexOp implements VexRRIOp {
1180 // @formatter:off
1181 public static final VexMRIOp VEXTRACTF128 = new VexMRIOp("VEXTRACTF128", P_66, M_0F3A, W0, 0x19, VEXOpAssertion.AVX1_256ONLY);
1182 public static final VexMRIOp VEXTRACTI128 = new VexMRIOp("VEXTRACTI128", P_66, M_0F3A, W0, 0x39, VEXOpAssertion.AVX2_256ONLY);
1183 public static final VexMRIOp VPEXTRB = new VexMRIOp("VPEXTRB", P_66, M_0F3A, W0, 0x14, VEXOpAssertion.XMM_CPU);
1184 public static final VexMRIOp VPEXTRW = new VexMRIOp("VPEXTRW", P_66, M_0F3A, W0, 0x15, VEXOpAssertion.XMM_CPU);
1185 public static final VexMRIOp VPEXTRD = new VexMRIOp("VPEXTRD", P_66, M_0F3A, W0, 0x16, VEXOpAssertion.XMM_CPU);
1186 public static final VexMRIOp VPEXTRQ = new VexMRIOp("VPEXTRQ", P_66, M_0F3A, W1, 0x16, VEXOpAssertion.XMM_CPU);
1187 // @formatter:on
1188
1189 private VexMRIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1190 super(opcode, pp, mmmmm, w, op, assertion);
1191 }
1192
1193 @Override
1194 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1195 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst);
1196 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w);
1197 asm.emitByte(op);
1198 asm.emitModRM(src, dst);
1199 asm.emitByte(imm8);
1200 }
1201
1202 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src, int imm8) {
1203 assert assertion.check((AMD64) asm.target.arch, size, src, null, null);
1204 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w);
1205 asm.emitByte(op);
1206 asm.emitOperandHelper(src, dst, 1);
1207 asm.emitByte(imm8);
1208 }
1209 }
1210
1211 /**
1212 * VEX-encoded instructions with an operand order of RVMR.
1213 */
1214 public static class VexRVMROp extends VexOp {
1215 // @formatter:off
1216 public static final VexRVMROp VPBLENDVB = new VexRVMROp("VPBLENDVB", P_66, M_0F3A, W0, 0x4C, VEXOpAssertion.AVX1_2);
1217 public static final VexRVMROp VPBLENDVPS = new VexRVMROp("VPBLENDVPS", P_66, M_0F3A, W0, 0x4A, VEXOpAssertion.AVX1);
1218 public static final VexRVMROp VPBLENDVPD = new VexRVMROp("VPBLENDVPD", P_66, M_0F3A, W0, 0x4B, VEXOpAssertion.AVX1);
1219 // @formatter:on
1220
1221 protected VexRVMROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1222 super(opcode, pp, mmmmm, w, op, assertion);
1223 }
1224
1225 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, Register src2) {
1226 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, src2);
1227 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w);
1228 asm.emitByte(op);
1229 asm.emitModRM(dst, src2);
1230 asm.emitByte(mask.encoding() << 4);
1231 }
1232
1233 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, AMD64Address src2) {
1234 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, null);
1235 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w);
1236 asm.emitByte(op);
1237 asm.emitOperandHelper(dst, src2, 0);
1238 asm.emitByte(mask.encoding() << 4);
1239 }
1240 }
1241
1242 /**
1243 * VEX-encoded instructions with an operand order of RVM.
1244 */
1245 public static class VexRVMOp extends VexOp {
1246 // @formatter:off
1247 public static final VexRVMOp VANDPS = new VexRVMOp("VANDPS", P_, M_0F, WIG, 0x54);
1248 public static final VexRVMOp VANDPD = new VexRVMOp("VANDPD", P_66, M_0F, WIG, 0x54);
1249 public static final VexRVMOp VANDNPS = new VexRVMOp("VANDNPS", P_, M_0F, WIG, 0x55);
1250 public static final VexRVMOp VANDNPD = new VexRVMOp("VANDNPD", P_66, M_0F, WIG, 0x55);
1251 public static final VexRVMOp VORPS = new VexRVMOp("VORPS", P_, M_0F, WIG, 0x56);
1252 public static final VexRVMOp VORPD = new VexRVMOp("VORPD", P_66, M_0F, WIG, 0x56);
1253 public static final VexRVMOp VXORPS = new VexRVMOp("VXORPS", P_, M_0F, WIG, 0x57);
1254 public static final VexRVMOp VXORPD = new VexRVMOp("VXORPD", P_66, M_0F, WIG, 0x57);
1255 public static final VexRVMOp VADDPS = new VexRVMOp("VADDPS", P_, M_0F, WIG, 0x58);
1303 public static final VexRVMOp VPCMPEQB = new VexRVMOp("VPCMPEQB", P_66, M_0F, WIG, 0x74, VEXOpAssertion.AVX1_2);
1304 public static final VexRVMOp VPCMPEQW = new VexRVMOp("VPCMPEQW", P_66, M_0F, WIG, 0x75, VEXOpAssertion.AVX1_2);
1305 public static final VexRVMOp VPCMPEQD = new VexRVMOp("VPCMPEQD", P_66, M_0F, WIG, 0x76, VEXOpAssertion.AVX1_2);
1306 public static final VexRVMOp VPCMPEQQ = new VexRVMOp("VPCMPEQQ", P_66, M_0F38, WIG, 0x29, VEXOpAssertion.AVX1_2);
1307 public static final VexRVMOp VPCMPGTB = new VexRVMOp("VPCMPGTB", P_66, M_0F, WIG, 0x64, VEXOpAssertion.AVX1_2);
1308 public static final VexRVMOp VPCMPGTW = new VexRVMOp("VPCMPGTW", P_66, M_0F, WIG, 0x65, VEXOpAssertion.AVX1_2);
1309 public static final VexRVMOp VPCMPGTD = new VexRVMOp("VPCMPGTD", P_66, M_0F, WIG, 0x66, VEXOpAssertion.AVX1_2);
1310 public static final VexRVMOp VPCMPGTQ = new VexRVMOp("VPCMPGTQ", P_66, M_0F38, WIG, 0x37, VEXOpAssertion.AVX1_2);
1311 // @formatter:on
1312
1313 private VexRVMOp(String opcode, int pp, int mmmmm, int w, int op) {
1314 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1315 }
1316
1317 protected VexRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1318 super(opcode, pp, mmmmm, w, op, assertion);
1319 }
1320
1321 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1322 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1323 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w);
1324 asm.emitByte(op);
1325 asm.emitModRM(dst, src2);
1326 }
1327
1328 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) {
1329 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1330 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w);
1331 asm.emitByte(op);
1332 asm.emitOperandHelper(dst, src2, 0);
1333 }
1334 }
1335
1336 public static final class VexGeneralPurposeRVMOp extends VexOp {
1337 // @formatter:off
1338 public static final VexGeneralPurposeRVMOp ANDN = new VexGeneralPurposeRVMOp("ANDN", P_, M_0F38, WIG, 0xF2, VEXOpAssertion.BMI1);
1339 public static final VexGeneralPurposeRVMOp MULX = new VexGeneralPurposeRVMOp("MULX", P_F2, M_0F38, WIG, 0xF6, VEXOpAssertion.BMI2);
1340 public static final VexGeneralPurposeRVMOp PDEP = new VexGeneralPurposeRVMOp("PDEP", P_F2, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1341 public static final VexGeneralPurposeRVMOp PEXT = new VexGeneralPurposeRVMOp("PEXT", P_F3, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1342 // @formatter:on
1343
1344 private VexGeneralPurposeRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1345 super(opcode, pp, mmmmm, w, op, assertion);
1346 }
1347
1348 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1349 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, src2, null);
1350 assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1351 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1);
1352 asm.emitByte(op);
1353 asm.emitModRM(dst, src2);
1354 }
1355
1356 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) {
1357 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, null, null);
1358 assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1359 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1);
1360 asm.emitByte(op);
1361 asm.emitOperandHelper(dst, src2, 0);
1362 }
1363 }
1364
1365 public static final class VexGeneralPurposeRMVOp extends VexOp {
1366 // @formatter:off
1367 public static final VexGeneralPurposeRMVOp BEXTR = new VexGeneralPurposeRMVOp("BEXTR", P_, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI1);
1368 public static final VexGeneralPurposeRMVOp BZHI = new VexGeneralPurposeRMVOp("BZHI", P_, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1369 public static final VexGeneralPurposeRMVOp SARX = new VexGeneralPurposeRMVOp("SARX", P_F3, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1370 public static final VexGeneralPurposeRMVOp SHRX = new VexGeneralPurposeRMVOp("SHRX", P_F2, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1371 public static final VexGeneralPurposeRMVOp SHLX = new VexGeneralPurposeRMVOp("SHLX", P_66, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1372 // @formatter:on
1373
1374 private VexGeneralPurposeRMVOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1375 super(opcode, pp, mmmmm, w, op, assertion);
1376 }
1377
1378 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1379 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, src1, null);
1380 assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1381 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1);
1382 asm.emitByte(op);
1383 asm.emitModRM(dst, src1);
1384 }
1385
1386 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src1, Register src2) {
1387 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, null, null);
1388 assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1389 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1);
1390 asm.emitByte(op);
1391 asm.emitOperandHelper(dst, src1, 0);
1392 }
1393 }
1394
1395 /**
1396 * VEX-encoded shift instructions with an operand order of either RVM or VMI.
1397 */
1398 public static final class VexShiftOp extends VexRVMOp implements VexRRIOp {
1399 // @formatter:off
1400 public static final VexShiftOp VPSRLW = new VexShiftOp("VPSRLW", P_66, M_0F, WIG, 0xD1, 0x71, 2);
1401 public static final VexShiftOp VPSRLD = new VexShiftOp("VPSRLD", P_66, M_0F, WIG, 0xD2, 0x72, 2);
1402 public static final VexShiftOp VPSRLQ = new VexShiftOp("VPSRLQ", P_66, M_0F, WIG, 0xD3, 0x73, 2);
1403 public static final VexShiftOp VPSRAW = new VexShiftOp("VPSRAW", P_66, M_0F, WIG, 0xE1, 0x71, 4);
1404 public static final VexShiftOp VPSRAD = new VexShiftOp("VPSRAD", P_66, M_0F, WIG, 0xE2, 0x72, 4);
1405 public static final VexShiftOp VPSLLW = new VexShiftOp("VPSLLW", P_66, M_0F, WIG, 0xF1, 0x71, 6);
1406 public static final VexShiftOp VPSLLD = new VexShiftOp("VPSLLD", P_66, M_0F, WIG, 0xF2, 0x72, 6);
1407 public static final VexShiftOp VPSLLQ = new VexShiftOp("VPSLLQ", P_66, M_0F, WIG, 0xF3, 0x73, 6);
1408 // @formatter:on
1409
1410 private final int immOp;
1411 private final int r;
1412
1413 private VexShiftOp(String opcode, int pp, int mmmmm, int w, int op, int immOp, int r) {
1414 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1_2);
1415 this.immOp = immOp;
1416 this.r = r;
1417 }
1418
1419 @Override
1420 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1421 assert assertion.check((AMD64) asm.target.arch, size, null, dst, src);
1422 asm.vexPrefix(null, dst, src, size, pp, mmmmm, w);
1423 asm.emitByte(immOp);
1424 asm.emitModRM(r, src);
1425 asm.emitByte(imm8);
1426 }
1427 }
1428
1429 public static final class VexMaskMoveOp extends VexOp {
1430 // @formatter:off
1431 public static final VexMaskMoveOp VMASKMOVPS = new VexMaskMoveOp("VMASKMOVPS", P_66, M_0F38, W0, 0x2C, 0x2E);
1432 public static final VexMaskMoveOp VMASKMOVPD = new VexMaskMoveOp("VMASKMOVPD", P_66, M_0F38, W0, 0x2D, 0x2F);
1433 public static final VexMaskMoveOp VPMASKMOVD = new VexMaskMoveOp("VPMASKMOVD", P_66, M_0F38, W0, 0x8C, 0x8E, VEXOpAssertion.AVX2);
1434 public static final VexMaskMoveOp VPMASKMOVQ = new VexMaskMoveOp("VPMASKMOVQ", P_66, M_0F38, W1, 0x8C, 0x8E, VEXOpAssertion.AVX2);
1435 // @formatter:on
1436
1437 private final int opReverse;
1438
1439 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) {
1440 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1);
1441 }
1442
1443 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) {
1444 super(opcode, pp, mmmmm, w, op, assertion);
1445 this.opReverse = opReverse;
1446 }
1447
1448 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, AMD64Address src) {
1449 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, null);
1450 asm.vexPrefix(dst, mask, src, size, pp, mmmmm, w);
1451 asm.emitByte(op);
1452 asm.emitOperandHelper(dst, src, 0);
1453 }
1454
1455 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register mask, Register src) {
1456 assert assertion.check((AMD64) asm.target.arch, size, src, mask, null);
1457 asm.vexPrefix(src, mask, dst, size, pp, mmmmm, w);
1458 asm.emitByte(opReverse);
1459 asm.emitOperandHelper(src, dst, 0);
1460 }
1461 }
1462
1463 /**
1464 * VEX-encoded instructions with an operand order of RVMI.
1465 */
1466 public static final class VexRVMIOp extends VexOp {
1467 // @formatter:off
1468 public static final VexRVMIOp VSHUFPS = new VexRVMIOp("VSHUFPS", P_, M_0F, WIG, 0xC6);
1469 public static final VexRVMIOp VSHUFPD = new VexRVMIOp("VSHUFPD", P_66, M_0F, WIG, 0xC6);
1470 public static final VexRVMIOp VINSERTF128 = new VexRVMIOp("VINSERTF128", P_66, M_0F3A, W0, 0x18, VEXOpAssertion.AVX1_256ONLY);
1471 public static final VexRVMIOp VINSERTI128 = new VexRVMIOp("VINSERTI128", P_66, M_0F3A, W0, 0x38, VEXOpAssertion.AVX2_256ONLY);
1472 // @formatter:on
1473
1474 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op) {
1475 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1476 }
1477
1478 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1479 super(opcode, pp, mmmmm, w, op, assertion);
1480 }
1481
1482 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, int imm8) {
1483 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1484 assert (imm8 & 0xFF) == imm8;
1485 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w);
1486 asm.emitByte(op);
1487 asm.emitModRM(dst, src2);
1488 asm.emitByte(imm8);
1489 }
1490
1491 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, int imm8) {
1492 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1493 assert (imm8 & 0xFF) == imm8;
1494 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w);
1495 asm.emitByte(op);
1496 asm.emitOperandHelper(dst, src2, 1);
1497 asm.emitByte(imm8);
1498 }
1499 }
1500
1501 /**
1502 * VEX-encoded comparison operation with an operand order of RVMI. The immediate operand is a
1503 * comparison operator.
1504 */
1505 public static final class VexFloatCompareOp extends VexOp {
1506 // @formatter:off
1507 public static final VexFloatCompareOp VCMPPS = new VexFloatCompareOp("VCMPPS", P_, M_0F, WIG, 0xC2);
1508 public static final VexFloatCompareOp VCMPPD = new VexFloatCompareOp("VCMPPD", P_66, M_0F, WIG, 0xC2);
1509 public static final VexFloatCompareOp VCMPSS = new VexFloatCompareOp("VCMPSS", P_F2, M_0F, WIG, 0xC2);
1510 public static final VexFloatCompareOp VCMPSD = new VexFloatCompareOp("VCMPSD", P_F2, M_0F, WIG, 0xC2);
1511 // @formatter:on
1512
1513 public enum Predicate {
1514 EQ_OQ(0x00),
1578 return LT_OQ;
1579 case LE:
1580 return LE_OQ;
1581 case GT:
1582 return GT_OQ;
1583 case GE:
1584 return GE_OQ;
1585 default:
1586 throw GraalError.shouldNotReachHere();
1587 }
1588 }
1589 }
1590 }
1591
1592 private VexFloatCompareOp(String opcode, int pp, int mmmmm, int w, int op) {
1593 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1594 }
1595
1596 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, Predicate p) {
1597 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1598 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w);
1599 asm.emitByte(op);
1600 asm.emitModRM(dst, src2);
1601 asm.emitByte(p.imm8);
1602 }
1603
1604 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, Predicate p) {
1605 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1606 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w);
1607 asm.emitByte(op);
1608 asm.emitOperandHelper(dst, src2, 1);
1609 asm.emitByte(p.imm8);
1610 }
1611 }
1612
1613 public final void addl(AMD64Address dst, int imm32) {
1614 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1615 }
1616
1617 public final void addl(Register dst, int imm32) {
1618 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1619 }
1620
1621 public final void addl(Register dst, Register src) {
1622 ADD.rmOp.emit(this, DWORD, dst, src);
1623 }
1624
1625 public final void addpd(Register dst, Register src) {
1626 SSEOp.ADD.emit(this, PD, dst, src);
1926 prefix(src, dst);
1927 emitByte(0x8D);
1928 emitOperandHelper(dst, src, 0);
1929 }
1930
1931 public final void leaq(Register dst, AMD64Address src) {
1932 prefixq(src, dst);
1933 emitByte(0x8D);
1934 emitOperandHelper(dst, src, 0);
1935 }
1936
1937 public final void leave() {
1938 emitByte(0xC9);
1939 }
1940
1941 public final void lock() {
1942 emitByte(0xF0);
1943 }
1944
1945 public final void movapd(Register dst, Register src) {
1946 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
1947 simdPrefix(dst, Register.None, src, PD, P_0F, false);
1948 emitByte(0x28);
1949 emitModRM(dst, src);
1950 }
1951
1952 public final void movaps(Register dst, Register src) {
1953 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
1954 simdPrefix(dst, Register.None, src, PS, P_0F, false);
1955 emitByte(0x28);
1956 emitModRM(dst, src);
1957 }
1958
1959 public final void movb(AMD64Address dst, int imm8) {
1960 prefix(dst);
1961 emitByte(0xC6);
1962 emitOperandHelper(0, dst, 1);
1963 emitByte(imm8);
1964 }
1965
1966 public final void movb(AMD64Address dst, Register src) {
1967 assert src.getRegisterCategory().equals(CPU) : "must have byte register";
1968 prefixb(dst, src);
1969 emitByte(0x88);
1970 emitOperandHelper(src, dst, 0);
1971 }
1972
1973 public final void movl(Register dst, int imm32) {
1974 movl(dst, imm32, false);
1975 }
1976
1977 public final void movl(Register dst, int imm32, boolean annotateImm) {
1978 int insnPos = position();
1979 prefix(dst);
1980 emitByte(0xB8 + encode(dst));
1981 int immPos = position();
1982 emitInt(imm32);
1983 int nextInsnPos = position();
1984 if (annotateImm && codePatchingAnnotationConsumer != null) {
1985 codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos));
1986 }
1987 }
2010 public final void movl(AMD64Address dst, int imm32) {
2011 prefix(dst);
2012 emitByte(0xC7);
2013 emitOperandHelper(0, dst, 4);
2014 emitInt(imm32);
2015 }
2016
2017 public final void movl(AMD64Address dst, Register src) {
2018 prefix(dst, src);
2019 emitByte(0x89);
2020 emitOperandHelper(src, dst, 0);
2021 }
2022
2023 /**
2024 * New CPUs require use of movsd and movss to avoid partial register stall when loading from
2025 * memory. But for old Opteron use movlpd instead of movsd. The selection is done in
2026 * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and
2027 * {@link AMD64MacroAssembler#movflt(Register, Register)}.
2028 */
2029 public final void movlpd(Register dst, AMD64Address src) {
2030 assert dst.getRegisterCategory().equals(XMM);
2031 simdPrefix(dst, dst, src, PD, P_0F, false);
2032 emitByte(0x12);
2033 emitOperandHelper(dst, src, 0);
2034 }
2035
2036 public final void movlhps(Register dst, Register src) {
2037 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2038 simdPrefix(dst, src, src, PS, P_0F, false);
2039 emitByte(0x16);
2040 emitModRM(dst, src);
2041 }
2042
2043 public final void movq(Register dst, AMD64Address src) {
2044 movq(dst, src, false);
2045 }
2046
2047 public final void movq(Register dst, AMD64Address src, boolean wide) {
2048 if (dst.getRegisterCategory().equals(XMM)) {
2049 simdPrefix(dst, Register.None, src, SS, P_0F, false);
2050 emitByte(0x7E);
2051 emitOperandHelper(dst, src, wide, 0);
2052 } else {
2053 // gpr version of movq
2054 prefixq(src, dst);
2055 emitByte(0x8B);
2056 emitOperandHelper(dst, src, wide, 0);
2057 }
2058 }
2059
2060 public final void movq(Register dst, Register src) {
2061 prefixq(dst, src);
2062 emitByte(0x8B);
2063 emitModRM(dst, src);
2064 }
2065
2066 public final void movq(AMD64Address dst, Register src) {
2067 if (src.getRegisterCategory().equals(XMM)) {
2068 simdPrefix(src, Register.None, dst, PD, P_0F, true);
2069 emitByte(0xD6);
2070 emitOperandHelper(src, dst, 0);
2071 } else {
2072 // gpr version of movq
2073 prefixq(dst, src);
2074 emitByte(0x89);
2075 emitOperandHelper(src, dst, 0);
2076 }
2077 }
2078
2079 public final void movsbl(Register dst, AMD64Address src) {
2080 prefix(src, dst);
2081 emitByte(0x0F);
2082 emitByte(0xBE);
2083 emitOperandHelper(dst, src, 0);
2084 }
2085
2086 public final void movsbl(Register dst, Register src) {
2087 prefix(dst, false, src, true);
2088 emitByte(0x0F);
2409 case 2:
2410 emitByte(0x66);
2411 emitByte(0x90);
2412 break;
2413 case 1:
2414 emitByte(0x90);
2415 break;
2416 default:
2417 assert i == 0;
2418 }
2419 }
2420
2421 public final void orl(Register dst, Register src) {
2422 OR.rmOp.emit(this, DWORD, dst, src);
2423 }
2424
2425 public final void orl(Register dst, int imm32) {
2426 OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2427 }
2428
2429 public final void pop(Register dst) {
2430 prefix(dst);
2431 emitByte(0x58 + encode(dst));
2432 }
2433
2434 public void popfq() {
2435 emitByte(0x9D);
2436 }
2437
2438 public final void ptest(Register dst, Register src) {
2439 assert supports(CPUFeature.SSE4_1);
2440 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2441 simdPrefix(dst, Register.None, src, PD, P_0F38, false);
2442 emitByte(0x17);
2443 emitModRM(dst, src);
2444 }
2445
2446 public final void pcmpeqb(Register dst, Register src) {
2447 assert supports(CPUFeature.SSE2);
2448 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2449 simdPrefix(dst, dst, src, PD, P_0F, false);
2450 emitByte(0x74);
2451 emitModRM(dst, src);
2452 }
2453
2454 public final void pcmpeqw(Register dst, Register src) {
2455 assert supports(CPUFeature.SSE2);
2456 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2457 simdPrefix(dst, dst, src, PD, P_0F, false);
2458 emitByte(0x75);
2459 emitModRM(dst, src);
2460 }
2461
2462 public final void pcmpestri(Register dst, AMD64Address src, int imm8) {
2463 assert supports(CPUFeature.SSE4_2);
2464 assert dst.getRegisterCategory().equals(XMM);
2465 simdPrefix(dst, Register.None, src, PD, P_0F3A, false);
2466 emitByte(0x61);
2467 emitOperandHelper(dst, src, 0);
2468 emitByte(imm8);
2469 }
2470
2471 public final void pcmpestri(Register dst, Register src, int imm8) {
2472 assert supports(CPUFeature.SSE4_2);
2473 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2474 simdPrefix(dst, Register.None, src, PD, P_0F3A, false);
2475 emitByte(0x61);
2476 emitModRM(dst, src);
2477 emitByte(imm8);
2478 }
2479
2480 public final void pmovmskb(Register dst, Register src) {
2481 assert supports(CPUFeature.SSE2);
2482 assert dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(XMM);
2483 simdPrefix(dst, Register.None, src, PD, P_0F, false);
2484 emitByte(0xD7);
2485 emitModRM(dst, src);
2486 }
2487
2488 public final void pmovzxbw(Register dst, AMD64Address src) {
2489 assert supports(CPUFeature.SSE4_2);
2490 assert dst.getRegisterCategory().equals(XMM);
2491 // XXX legacy_mode should be: _legacy_mode_bw
2492 simdPrefix(dst, Register.None, src, PD, P_0F38, false);
2493 emitByte(0x30);
2494 emitOperandHelper(dst, src, 0);
2495 }
2496
2497 public final void push(Register src) {
2498 prefix(src);
2499 emitByte(0x50 + encode(src));
2500 }
2501
2502 public void pushfq() {
2503 emitByte(0x9c);
2504 }
2505
2506 public final void paddd(Register dst, Register src) {
2507 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2508 simdPrefix(dst, dst, src, PD, P_0F, false);
2509 emitByte(0xFE);
2510 emitModRM(dst, src);
2511 }
2512
2513 public final void paddq(Register dst, Register src) {
2514 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2515 simdPrefix(dst, dst, src, PD, P_0F, false);
2516 emitByte(0xD4);
2517 emitModRM(dst, src);
2518 }
2519
2520 public final void pextrw(Register dst, Register src, int imm8) {
2521 assert dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(XMM);
2522 simdPrefix(dst, Register.None, src, PD, P_0F, false);
2523 emitByte(0xC5);
2524 emitModRM(dst, src);
2525 emitByte(imm8);
2526 }
2527
2528 public final void pinsrw(Register dst, Register src, int imm8) {
2529 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(CPU);
2530 simdPrefix(dst, dst, src, PD, P_0F, false);
2531 emitByte(0xC4);
2532 emitModRM(dst, src);
2533 emitByte(imm8);
2534 }
2535
2536 public final void por(Register dst, Register src) {
2537 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2538 simdPrefix(dst, dst, src, PD, P_0F, false);
2539 emitByte(0xEB);
2540 emitModRM(dst, src);
2541 }
2542
2543 public final void pand(Register dst, Register src) {
2544 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2545 simdPrefix(dst, dst, src, PD, P_0F, false);
2546 emitByte(0xDB);
2547 emitModRM(dst, src);
2548 }
2549
2550 public final void pxor(Register dst, Register src) {
2551 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2552 simdPrefix(dst, dst, src, PD, P_0F, false);
2553 emitByte(0xEF);
2554 emitModRM(dst, src);
2555 }
2556
2557 public final void pslld(Register dst, int imm8) {
2558 assert isUByte(imm8) : "invalid value";
2559 assert dst.getRegisterCategory().equals(XMM);
2560 // XMM6 is for /6 encoding: 66 0F 72 /6 ib
2561 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false);
2562 emitByte(0x72);
2563 emitModRM(6, dst);
2564 emitByte(imm8 & 0xFF);
2565 }
2566
2567 public final void psllq(Register dst, Register shift) {
2568 assert dst.getRegisterCategory().equals(XMM) && shift.getRegisterCategory().equals(XMM);
2569 simdPrefix(dst, dst, shift, PD, P_0F, false);
2570 emitByte(0xF3);
2571 emitModRM(dst, shift);
2572 }
2573
2574 public final void psllq(Register dst, int imm8) {
2575 assert isUByte(imm8) : "invalid value";
2576 assert dst.getRegisterCategory().equals(XMM);
2577 // XMM6 is for /6 encoding: 66 0F 73 /6 ib
2578 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false);
2579 emitByte(0x73);
2580 emitModRM(6, dst);
2581 emitByte(imm8);
2582 }
2583
2584 public final void psrad(Register dst, int imm8) {
2585 assert isUByte(imm8) : "invalid value";
2586 assert dst.getRegisterCategory().equals(XMM);
2587 // XMM4 is for /4 encoding: 66 0F 72 /4 ib
2588 simdPrefix(AMD64.xmm4, dst, dst, PD, P_0F, false);
2589 emitByte(0x72);
2590 emitModRM(4, dst);
2591 emitByte(imm8);
2592 }
2593
2594 public final void psrld(Register dst, int imm8) {
2595 assert isUByte(imm8) : "invalid value";
2596 assert dst.getRegisterCategory().equals(XMM);
2597 // XMM2 is for /2 encoding: 66 0F 72 /2 ib
2598 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false);
2599 emitByte(0x72);
2600 emitModRM(2, dst);
2601 emitByte(imm8);
2602 }
2603
2604 public final void psrlq(Register dst, int imm8) {
2605 assert isUByte(imm8) : "invalid value";
2606 assert dst.getRegisterCategory().equals(XMM);
2607 // XMM2 is for /2 encoding: 66 0F 73 /2 ib
2608 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false);
2609 emitByte(0x73);
2610 emitModRM(2, dst);
2611 emitByte(imm8);
2612 }
2613
2614 public final void psrldq(Register dst, int imm8) {
2615 assert isUByte(imm8) : "invalid value";
2616 assert dst.getRegisterCategory().equals(XMM);
2617 simdPrefix(AMD64.xmm3, dst, dst, PD, P_0F, false);
2618 emitByte(0x73);
2619 emitModRM(3, dst);
2620 emitByte(imm8);
2621 }
2622
2623 public final void pshufb(Register dst, Register src) {
2624 assert supports(CPUFeature.SSSE3);
2625 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2626 simdPrefix(dst, dst, src, PD, P_0F38, false);
2627 emitByte(0x00);
2628 emitModRM(dst, src);
2629 }
2630
2631 public final void pshuflw(Register dst, Register src, int imm8) {
2632 assert supports(CPUFeature.SSE2);
2633 assert isUByte(imm8) : "invalid value";
2634 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2635 simdPrefix(dst, Register.None, src, SD, P_0F, false);
2636 emitByte(0x70);
2637 emitModRM(dst, src);
2638 emitByte(imm8);
2639 }
2640
2641 public final void pshufd(Register dst, Register src, int imm8) {
2642 assert isUByte(imm8) : "invalid value";
2643 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2644 simdPrefix(dst, Register.None, src, PD, P_0F, false);
2645 emitByte(0x70);
2646 emitModRM(dst, src);
2647 emitByte(imm8);
2648 }
2649
2650 public final void psubd(Register dst, Register src) {
2651 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2652 simdPrefix(dst, dst, src, PD, P_0F, false);
2653 emitByte(0xFA);
2654 emitModRM(dst, src);
2655 }
2656
2657 public final void rcpps(Register dst, Register src) {
2658 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2659 simdPrefix(dst, Register.None, src, PS, P_0F, false);
2660 emitByte(0x53);
2661 emitModRM(dst, src);
2662 }
2663
2664 public final void ret(int imm16) {
2665 if (imm16 == 0) {
2666 emitByte(0xC3);
2667 } else {
2668 emitByte(0xC2);
2669 emitShort(imm16);
2670 }
2671 }
2672
2673 public final void sarl(Register dst, int imm8) {
2674 prefix(dst);
2675 assert isShiftCount(imm8 >> 1) : "illegal shift count";
2676 if (imm8 == 1) {
2677 emitByte(0xD1);
2678 emitModRM(7, dst);
2686 public final void shll(Register dst, int imm8) {
2687 assert isShiftCount(imm8 >> 1) : "illegal shift count";
2688 prefix(dst);
2689 if (imm8 == 1) {
2690 emitByte(0xD1);
2691 emitModRM(4, dst);
2692 } else {
2693 emitByte(0xC1);
2694 emitModRM(4, dst);
2695 emitByte(imm8);
2696 }
2697 }
2698
2699 public final void shll(Register dst) {
2700 // Multiply dst by 2, CL times.
2701 prefix(dst);
2702 emitByte(0xD3);
2703 emitModRM(4, dst);
2704 }
2705
2706 public final void shrl(Register dst, int imm8) {
2707 assert isShiftCount(imm8 >> 1) : "illegal shift count";
2708 prefix(dst);
2709 emitByte(0xC1);
2710 emitModRM(5, dst);
2711 emitByte(imm8);
2712 }
2713
2714 public final void shrl(Register dst) {
2715 // Unsigned divide dst by 2, CL times.
2716 prefix(dst);
2717 emitByte(0xD3);
2718 emitModRM(5, dst);
2719 }
2720
2721 public final void subl(AMD64Address dst, int imm32) {
2722 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2723 }
2724
2725 public final void subl(Register dst, int imm32) {
2752 prefix(dst);
2753 emitByte(0xF7);
2754 emitModRM(0, dst);
2755 }
2756 emitInt(imm32);
2757 }
2758
2759 public final void testl(Register dst, Register src) {
2760 prefix(dst, src);
2761 emitByte(0x85);
2762 emitModRM(dst, src);
2763 }
2764
2765 public final void testl(Register dst, AMD64Address src) {
2766 prefix(src, dst);
2767 emitByte(0x85);
2768 emitOperandHelper(dst, src, 0);
2769 }
2770
2771 public final void unpckhpd(Register dst, Register src) {
2772 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2773 simdPrefix(dst, dst, src, PD, P_0F, false);
2774 emitByte(0x15);
2775 emitModRM(dst, src);
2776 }
2777
2778 public final void unpcklpd(Register dst, Register src) {
2779 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2780 simdPrefix(dst, dst, src, PD, P_0F, false);
2781 emitByte(0x14);
2782 emitModRM(dst, src);
2783 }
2784
2785 public final void xorl(Register dst, Register src) {
2786 XOR.rmOp.emit(this, DWORD, dst, src);
2787 }
2788
2789 public final void xorpd(Register dst, Register src) {
2790 SSEOp.XOR.emit(this, PD, dst, src);
2791 }
2792
2793 public final void xorps(Register dst, Register src) {
2794 SSEOp.XOR.emit(this, PS, dst, src);
2795 }
2796
2797 protected final void decl(Register dst) {
2798 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
2799 prefix(dst);
2870 public final void cmpq(Register dst, int imm32) {
2871 CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
2872 }
2873
2874 public final void cmpq(Register dst, Register src) {
2875 CMP.rmOp.emit(this, QWORD, dst, src);
2876 }
2877
2878 public final void cmpq(Register dst, AMD64Address src) {
2879 CMP.rmOp.emit(this, QWORD, dst, src);
2880 }
2881
2882 public final void cmpxchgq(Register reg, AMD64Address adr) {
2883 prefixq(adr, reg);
2884 emitByte(0x0F);
2885 emitByte(0xB1);
2886 emitOperandHelper(reg, adr, 0);
2887 }
2888
2889 public final void cvtdq2pd(Register dst, Register src) {
2890 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2891 simdPrefix(dst, Register.None, src, SS, P_0F, false);
2892 emitByte(0xE6);
2893 emitModRM(dst, src);
2894 }
2895
2896 public final void cvtsi2sdq(Register dst, Register src) {
2897 SSEOp.CVTSI2SD.emit(this, QWORD, dst, src);
2898 }
2899
2900 public final void cvttsd2siq(Register dst, Register src) {
2901 SSEOp.CVTTSD2SI.emit(this, QWORD, dst, src);
2902 }
2903
2904 public final void cvttpd2dq(Register dst, Register src) {
2905 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2906 simdPrefix(dst, Register.None, src, PD, P_0F, false);
2907 emitByte(0xE6);
2908 emitModRM(dst, src);
2909 }
2910
2911 public final void decq(Register dst) {
2912 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2913 prefixq(dst);
2914 emitByte(0xFF);
2915 emitModRM(1, dst);
2916 }
2917
2918 public final void decq(AMD64Address dst) {
2919 DEC.emit(this, QWORD, dst);
2920 }
2921
2922 public final void imulq(Register dst, Register src) {
2923 prefixq(dst, src);
2924 emitByte(0x0F);
2925 emitByte(0xAF);
2926 emitModRM(dst, src);
2927 }
2928
2929 public final void incq(Register dst) {
2930 // Don't use it directly. Use Macroincrementq() instead.
2931 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2932 prefixq(dst);
2953 codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos));
2954 }
2955 }
2956
2957 public final void movslq(Register dst, int imm32) {
2958 prefixq(dst);
2959 emitByte(0xC7);
2960 emitModRM(0, dst);
2961 emitInt(imm32);
2962 }
2963
2964 public final void movdq(Register dst, AMD64Address src) {
2965 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src);
2966 }
2967
2968 public final void movdq(AMD64Address dst, Register src) {
2969 AMD64MROp.MOVQ.emit(this, QWORD, dst, src);
2970 }
2971
2972 public final void movdq(Register dst, Register src) {
2973 if (dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(CPU)) {
2974 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src);
2975 } else if (src.getRegisterCategory().equals(XMM) && dst.getRegisterCategory().equals(CPU)) {
2976 AMD64MROp.MOVQ.emit(this, QWORD, dst, src);
2977 } else {
2978 throw new InternalError("should not reach here");
2979 }
2980 }
2981
2982 public final void movdl(Register dst, Register src) {
2983 if (dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(CPU)) {
2984 AMD64RMOp.MOVD.emit(this, DWORD, dst, src);
2985 } else if (src.getRegisterCategory().equals(XMM) && dst.getRegisterCategory().equals(CPU)) {
2986 AMD64MROp.MOVD.emit(this, DWORD, dst, src);
2987 } else {
2988 throw new InternalError("should not reach here");
2989 }
2990 }
2991
2992 public final void movdl(Register dst, AMD64Address src) {
2993 AMD64RMOp.MOVD.emit(this, DWORD, dst, src);
2994 }
2995
2996 public final void movddup(Register dst, Register src) {
2997 assert supports(CPUFeature.SSE3);
2998 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2999 simdPrefix(dst, Register.None, src, SD, P_0F, false);
3000 emitByte(0x12);
3001 emitModRM(dst, src);
3002 }
3003
3004 public final void movdqu(Register dst, AMD64Address src) {
3005 assert dst.getRegisterCategory().equals(XMM);
3006 simdPrefix(dst, Register.None, src, SS, P_0F, false);
3007 emitByte(0x6F);
3008 emitOperandHelper(dst, src, 0);
3009 }
3010
3011 public final void movdqu(Register dst, Register src) {
3012 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
3013 simdPrefix(dst, Register.None, src, SS, P_0F, false);
3014 emitByte(0x6F);
3015 emitModRM(dst, src);
3016 }
3017
3018 public final void movslq(AMD64Address dst, int imm32) {
3019 prefixq(dst);
3020 emitByte(0xC7);
3021 emitOperandHelper(0, dst, 4);
3022 emitInt(imm32);
3023 }
3024
3025 public final void movslq(Register dst, AMD64Address src) {
3026 prefixq(src, dst);
3027 emitByte(0x63);
3028 emitOperandHelper(dst, src, 0);
3029 }
3030
3031 public final void movslq(Register dst, Register src) {
3032 prefixq(dst, src);
3033 emitByte(0x63);
3034 emitModRM(dst, src);
3035 }
3036
3037 public final void negq(Register dst) {
3178 // We only have to handle StoreLoad
3179 if ((barriers & STORE_LOAD) != 0) {
3180 // All usable chips support "locked" instructions which suffice
3181 // as barriers, and are much faster than the alternative of
3182 // using cpuid instruction. We use here a locked add [rsp],0.
3183 // This is conveniently otherwise a no-op except for blowing
3184 // flags.
3185 // Any change to this code may need to revisit other places in
3186 // the code where this idiom is used, in particular the
3187 // orderAccess code.
3188 lock();
3189 addl(new AMD64Address(AMD64.rsp, 0), 0); // Assert the lock# signal here
3190 }
3191 }
3192 }
3193
3194 @Override
3195 protected final void patchJumpTarget(int branch, int branchTarget) {
3196 int op = getByte(branch);
3197 assert op == 0xE8 // call
3198 ||
3199 op == 0x00 // jump table entry
3200 || op == 0xE9 // jmp
3201 || op == 0xEB // short jmp
3202 || (op & 0xF0) == 0x70 // short jcc
3203 || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc
3204 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
3205
3206 if (op == 0x00) {
3207 int offsetToJumpTableBase = getShort(branch + 1);
3208 int jumpTableBase = branch - offsetToJumpTableBase;
3209 int imm32 = branchTarget - jumpTableBase;
3210 emitInt(imm32, branch);
3211 } else if (op == 0xEB || (op & 0xF0) == 0x70) {
3212
3213 // short offset operators (jmp and jcc)
3214 final int imm8 = branchTarget - (branch + 2);
3215 /*
3216 * Since a wrongly patched short branch can potentially lead to working but really bad
3217 * behaving code we should always fail with an exception instead of having an assert.
3218 */
3219 if (!NumUtil.isByte(imm8)) {
3440 emitByte(0xae);
3441 emitByte(0xe8);
3442 }
3443
3444 public final void vptest(Register dst, Register src) {
3445 VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src);
3446 }
3447
3448 public final void vpxor(Register dst, Register nds, Register src) {
3449 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
3450 }
3451
3452 public final void vpxor(Register dst, Register nds, AMD64Address src) {
3453 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
3454 }
3455
3456 public final void vmovdqu(Register dst, AMD64Address src) {
3457 VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src);
3458 }
3459
3460 public final void vpmovzxbw(Register dst, AMD64Address src) {
3461 VexRMOp.VPMOVZXBW.emit(this, AVXSize.YMM, dst, src);
3462 }
3463
3464 public final void vzeroupper() {
3465 emitVEX(L128, P_, M_0F, W0, 0, 0);
3466 emitByte(0x77);
3467 }
3468
3469 // This instruction produces ZF or CF flags
3470 public final void kortestq(Register src1, Register src2) {
3471 assert supports(CPUFeature.AVX512BW);
3472 assert src1.getRegisterCategory().equals(MASK) && src2.getRegisterCategory().equals(MASK);
3473 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_, M_0F, W1);
3474 emitByte(0x98);
3475 emitModRM(src1, src2);
3476 }
3477
3478 public final void kmovq(Register dst, Register src) {
3479 assert supports(CPUFeature.AVX512BW);
3480 assert dst.getRegisterCategory().equals(MASK) || dst.getRegisterCategory().equals(CPU);
3481 assert src.getRegisterCategory().equals(MASK) || src.getRegisterCategory().equals(CPU);
3482 assert !(dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(CPU));
3483
3484 if (dst.getRegisterCategory().equals(MASK)) {
3485 if (src.getRegisterCategory().equals(MASK)) {
3486 // kmovq(KRegister dst, KRegister src)
3487 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_, M_0F, W1);
3488 emitByte(0x90);
3489 emitModRM(dst, src);
3490 } else {
3491 // kmovq(KRegister dst, Register src)
3492 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1);
3493 emitByte(0x92);
3494 emitModRM(dst, src);
3495 }
3496 } else {
3497 if (src.getRegisterCategory().equals(MASK)) {
3498 // kmovq(Register dst, KRegister src)
3499 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1);
3500 emitByte(0x93);
3501 emitModRM(dst, src);
3502 } else {
3503 throw GraalError.shouldNotReachHere();
3504 }
3505 }
3506 }
3507
3508 public final void evmovdqu64(Register dst, AMD64Address src) {
3509 assert supports(CPUFeature.AVX512F);
3510 assert dst.getRegisterCategory().equals(XMM);
3511 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F3, M_0F, W1, Z0, B0);
3512 emitByte(0x6F);
3513 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3514 }
3515
3516 public final void evpmovzxbw(Register dst, AMD64Address src) {
3517 assert supports(CPUFeature.AVX512BW);
3518 assert dst.getRegisterCategory().equals(XMM);
3519 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0);
3520 emitByte(0x30);
3521 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
3522 }
3523
3524 public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src) {
3525 assert supports(CPUFeature.AVX512BW);
3526 assert kdst.getRegisterCategory().equals(MASK) && nds.getRegisterCategory().equals(XMM);
3527 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F, WIG, Z0, B0);
3528 emitByte(0x74);
3529 emitEVEXOperandHelper(kdst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3530 }
3531 }
|
26
27 import static jdk.vm.ci.amd64.AMD64.CPU;
28 import static jdk.vm.ci.amd64.AMD64.MASK;
29 import static jdk.vm.ci.amd64.AMD64.XMM;
30 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD;
31 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop;
32 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop;
33 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD;
34 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND;
35 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP;
36 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR;
37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB;
38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB;
39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR;
40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC;
41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC;
42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG;
43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT;
44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0;
45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0;
46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1;
47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.BYTE;
48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.DWORD;
49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PD;
50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PS;
51 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.QWORD;
52 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SD;
53 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SS;
54 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.WORD;
55 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128;
56 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256;
57 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ;
58 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F;
59 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38;
60 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A;
61 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_;
62 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66;
63 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2;
64 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3;
65 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0;
66 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1;
1007
1008 /**
1009 * VEX-encoded instructions with an operand order of RM, but the M operand must be a register.
1010 */
1011 public static class VexRROp extends VexOp {
1012 // @formatter:off
1013 public static final VexRROp VMASKMOVDQU = new VexRROp("VMASKMOVDQU", P_66, M_0F, WIG, 0xF7, VEXOpAssertion.AVX1_128ONLY);
1014 // @formatter:on
1015
1016 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op) {
1017 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1018 }
1019
1020 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1021 super(opcode, pp, mmmmm, w, op, assertion);
1022 }
1023
1024 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1025 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src);
1026 assert op != 0x1A || op != 0x5A;
1027 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false);
1028 asm.emitByte(op);
1029 asm.emitModRM(dst, src);
1030 }
1031 }
1032
1033 /**
1034 * VEX-encoded instructions with an operand order of RM.
1035 */
1036 public static class VexRMOp extends VexRROp {
1037 // @formatter:off
1038 public static final VexRMOp VCVTTSS2SI = new VexRMOp("VCVTTSS2SI", P_F3, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM);
1039 public static final VexRMOp VCVTTSS2SQ = new VexRMOp("VCVTTSS2SQ", P_F3, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM);
1040 public static final VexRMOp VCVTTSD2SI = new VexRMOp("VCVTTSD2SI", P_F2, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM);
1041 public static final VexRMOp VCVTTSD2SQ = new VexRMOp("VCVTTSD2SQ", P_F2, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM);
1042 public static final VexRMOp VCVTPS2PD = new VexRMOp("VCVTPS2PD", P_, M_0F, WIG, 0x5A);
1043 public static final VexRMOp VCVTPD2PS = new VexRMOp("VCVTPD2PS", P_66, M_0F, WIG, 0x5A);
1044 public static final VexRMOp VCVTDQ2PS = new VexRMOp("VCVTDQ2PS", P_, M_0F, WIG, 0x5B);
1045 public static final VexRMOp VCVTTPS2DQ = new VexRMOp("VCVTTPS2DQ", P_F3, M_0F, WIG, 0x5B);
1046 public static final VexRMOp VCVTTPD2DQ = new VexRMOp("VCVTTPD2DQ", P_66, M_0F, WIG, 0xE6);
1047 public static final VexRMOp VCVTDQ2PD = new VexRMOp("VCVTDQ2PD", P_F3, M_0F, WIG, 0xE6);
1068 public static final VexRMOp VPMOVZXDQ = new VexRMOp("VPMOVZXDQ", P_66, M_0F38, WIG, 0x35);
1069 public static final VexRMOp VPTEST = new VexRMOp("VPTEST", P_66, M_0F38, WIG, 0x17);
1070 public static final VexRMOp VSQRTPD = new VexRMOp("VSQRTPD", P_66, M_0F, WIG, 0x51);
1071 public static final VexRMOp VSQRTPS = new VexRMOp("VSQRTPS", P_, M_0F, WIG, 0x51);
1072 public static final VexRMOp VSQRTSD = new VexRMOp("VSQRTSD", P_F2, M_0F, WIG, 0x51);
1073 public static final VexRMOp VSQRTSS = new VexRMOp("VSQRTSS", P_F3, M_0F, WIG, 0x51);
1074 public static final VexRMOp VUCOMISS = new VexRMOp("VUCOMISS", P_, M_0F, WIG, 0x2E);
1075 public static final VexRMOp VUCOMISD = new VexRMOp("VUCOMISD", P_66, M_0F, WIG, 0x2E);
1076 // @formatter:on
1077
1078 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op) {
1079 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1080 }
1081
1082 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1083 super(opcode, pp, mmmmm, w, op, assertion);
1084 }
1085
1086 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) {
1087 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1088 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false);
1089 asm.emitByte(op);
1090 asm.emitOperandHelper(dst, src, 0);
1091 }
1092 }
1093
1094 /**
1095 * VEX-encoded move instructions.
1096 * <p>
1097 * These instructions have two opcodes: op is the forward move instruction with an operand order
1098 * of RM, and opReverse is the reverse move instruction with an operand order of MR.
1099 */
1100 public static final class VexMoveOp extends VexRMOp {
1101 // @formatter:off
1102 public static final VexMoveOp VMOVDQA = new VexMoveOp("VMOVDQA", P_66, M_0F, WIG, 0x6F, 0x7F);
1103 public static final VexMoveOp VMOVDQU = new VexMoveOp("VMOVDQU", P_F3, M_0F, WIG, 0x6F, 0x7F);
1104 public static final VexMoveOp VMOVAPS = new VexMoveOp("VMOVAPS", P_, M_0F, WIG, 0x28, 0x29);
1105 public static final VexMoveOp VMOVAPD = new VexMoveOp("VMOVAPD", P_66, M_0F, WIG, 0x28, 0x29);
1106 public static final VexMoveOp VMOVUPS = new VexMoveOp("VMOVUPS", P_, M_0F, WIG, 0x10, 0x11);
1107 public static final VexMoveOp VMOVUPD = new VexMoveOp("VMOVUPD", P_66, M_0F, WIG, 0x10, 0x11);
1108 public static final VexMoveOp VMOVSS = new VexMoveOp("VMOVSS", P_F3, M_0F, WIG, 0x10, 0x11);
1109 public static final VexMoveOp VMOVSD = new VexMoveOp("VMOVSD", P_F2, M_0F, WIG, 0x10, 0x11);
1110 public static final VexMoveOp VMOVD = new VexMoveOp("VMOVD", P_66, M_0F, W0, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU);
1111 public static final VexMoveOp VMOVQ = new VexMoveOp("VMOVQ", P_66, M_0F, W1, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU);
1112 // @formatter:on
1113
1114 private final int opReverse;
1115
1116 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) {
1117 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1);
1118 }
1119
1120 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) {
1121 super(opcode, pp, mmmmm, w, op, assertion);
1122 this.opReverse = opReverse;
1123 }
1124
1125 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src) {
1126 assert assertion.check((AMD64) asm.target.arch, size, src, null, null);
1127 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false);
1128 asm.emitByte(opReverse);
1129 asm.emitOperandHelper(src, dst, 0);
1130 }
1131
1132 public void emitReverse(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1133 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst);
1134 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false);
1135 asm.emitByte(opReverse);
1136 asm.emitModRM(src, dst);
1137 }
1138 }
1139
1140 public interface VexRRIOp {
1141 void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8);
1142 }
1143
1144 /**
1145 * VEX-encoded instructions with an operand order of RMI.
1146 */
1147 public static final class VexRMIOp extends VexOp implements VexRRIOp {
1148 // @formatter:off
1149 public static final VexRMIOp VPERMQ = new VexRMIOp("VPERMQ", P_66, M_0F3A, W1, 0x00, VEXOpAssertion.AVX2_256ONLY);
1150 public static final VexRMIOp VPSHUFLW = new VexRMIOp("VPSHUFLW", P_F2, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2);
1151 public static final VexRMIOp VPSHUFHW = new VexRMIOp("VPSHUFHW", P_F3, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2);
1152 public static final VexRMIOp VPSHUFD = new VexRMIOp("VPSHUFD", P_66, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2);
1153 // @formatter:on
1154
1155 private VexRMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1156 super(opcode, pp, mmmmm, w, op, assertion);
1157 }
1158
1159 @Override
1160 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1161 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src);
1162 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false);
1163 asm.emitByte(op);
1164 asm.emitModRM(dst, src);
1165 asm.emitByte(imm8);
1166 }
1167
1168 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src, int imm8) {
1169 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1170 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false);
1171 asm.emitByte(op);
1172 asm.emitOperandHelper(dst, src, 1);
1173 asm.emitByte(imm8);
1174 }
1175 }
1176
1177 /**
1178 * VEX-encoded instructions with an operand order of MRI.
1179 */
1180 public static final class VexMRIOp extends VexOp implements VexRRIOp {
1181 // @formatter:off
1182 public static final VexMRIOp VEXTRACTF128 = new VexMRIOp("VEXTRACTF128", P_66, M_0F3A, W0, 0x19, VEXOpAssertion.AVX1_256ONLY);
1183 public static final VexMRIOp VEXTRACTI128 = new VexMRIOp("VEXTRACTI128", P_66, M_0F3A, W0, 0x39, VEXOpAssertion.AVX2_256ONLY);
1184 public static final VexMRIOp VPEXTRB = new VexMRIOp("VPEXTRB", P_66, M_0F3A, W0, 0x14, VEXOpAssertion.XMM_CPU);
1185 public static final VexMRIOp VPEXTRW = new VexMRIOp("VPEXTRW", P_66, M_0F3A, W0, 0x15, VEXOpAssertion.XMM_CPU);
1186 public static final VexMRIOp VPEXTRD = new VexMRIOp("VPEXTRD", P_66, M_0F3A, W0, 0x16, VEXOpAssertion.XMM_CPU);
1187 public static final VexMRIOp VPEXTRQ = new VexMRIOp("VPEXTRQ", P_66, M_0F3A, W1, 0x16, VEXOpAssertion.XMM_CPU);
1188 // @formatter:on
1189
1190 private VexMRIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1191 super(opcode, pp, mmmmm, w, op, assertion);
1192 }
1193
1194 @Override
1195 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1196 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst);
1197 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false);
1198 asm.emitByte(op);
1199 asm.emitModRM(src, dst);
1200 asm.emitByte(imm8);
1201 }
1202
1203 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src, int imm8) {
1204 assert assertion.check((AMD64) asm.target.arch, size, src, null, null);
1205 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false);
1206 asm.emitByte(op);
1207 asm.emitOperandHelper(src, dst, 1);
1208 asm.emitByte(imm8);
1209 }
1210 }
1211
1212 /**
1213 * VEX-encoded instructions with an operand order of RVMR.
1214 */
1215 public static class VexRVMROp extends VexOp {
1216 // @formatter:off
1217 public static final VexRVMROp VPBLENDVB = new VexRVMROp("VPBLENDVB", P_66, M_0F3A, W0, 0x4C, VEXOpAssertion.AVX1_2);
1218 public static final VexRVMROp VPBLENDVPS = new VexRVMROp("VPBLENDVPS", P_66, M_0F3A, W0, 0x4A, VEXOpAssertion.AVX1);
1219 public static final VexRVMROp VPBLENDVPD = new VexRVMROp("VPBLENDVPD", P_66, M_0F3A, W0, 0x4B, VEXOpAssertion.AVX1);
1220 // @formatter:on
1221
1222 protected VexRVMROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1223 super(opcode, pp, mmmmm, w, op, assertion);
1224 }
1225
1226 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, Register src2) {
1227 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, src2);
1228 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1229 asm.emitByte(op);
1230 asm.emitModRM(dst, src2);
1231 asm.emitByte(mask.encoding() << 4);
1232 }
1233
1234 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, AMD64Address src2) {
1235 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, null);
1236 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1237 asm.emitByte(op);
1238 asm.emitOperandHelper(dst, src2, 0);
1239 asm.emitByte(mask.encoding() << 4);
1240 }
1241 }
1242
1243 /**
1244 * VEX-encoded instructions with an operand order of RVM.
1245 */
1246 public static class VexRVMOp extends VexOp {
1247 // @formatter:off
1248 public static final VexRVMOp VANDPS = new VexRVMOp("VANDPS", P_, M_0F, WIG, 0x54);
1249 public static final VexRVMOp VANDPD = new VexRVMOp("VANDPD", P_66, M_0F, WIG, 0x54);
1250 public static final VexRVMOp VANDNPS = new VexRVMOp("VANDNPS", P_, M_0F, WIG, 0x55);
1251 public static final VexRVMOp VANDNPD = new VexRVMOp("VANDNPD", P_66, M_0F, WIG, 0x55);
1252 public static final VexRVMOp VORPS = new VexRVMOp("VORPS", P_, M_0F, WIG, 0x56);
1253 public static final VexRVMOp VORPD = new VexRVMOp("VORPD", P_66, M_0F, WIG, 0x56);
1254 public static final VexRVMOp VXORPS = new VexRVMOp("VXORPS", P_, M_0F, WIG, 0x57);
1255 public static final VexRVMOp VXORPD = new VexRVMOp("VXORPD", P_66, M_0F, WIG, 0x57);
1256 public static final VexRVMOp VADDPS = new VexRVMOp("VADDPS", P_, M_0F, WIG, 0x58);
1304 public static final VexRVMOp VPCMPEQB = new VexRVMOp("VPCMPEQB", P_66, M_0F, WIG, 0x74, VEXOpAssertion.AVX1_2);
1305 public static final VexRVMOp VPCMPEQW = new VexRVMOp("VPCMPEQW", P_66, M_0F, WIG, 0x75, VEXOpAssertion.AVX1_2);
1306 public static final VexRVMOp VPCMPEQD = new VexRVMOp("VPCMPEQD", P_66, M_0F, WIG, 0x76, VEXOpAssertion.AVX1_2);
1307 public static final VexRVMOp VPCMPEQQ = new VexRVMOp("VPCMPEQQ", P_66, M_0F38, WIG, 0x29, VEXOpAssertion.AVX1_2);
1308 public static final VexRVMOp VPCMPGTB = new VexRVMOp("VPCMPGTB", P_66, M_0F, WIG, 0x64, VEXOpAssertion.AVX1_2);
1309 public static final VexRVMOp VPCMPGTW = new VexRVMOp("VPCMPGTW", P_66, M_0F, WIG, 0x65, VEXOpAssertion.AVX1_2);
1310 public static final VexRVMOp VPCMPGTD = new VexRVMOp("VPCMPGTD", P_66, M_0F, WIG, 0x66, VEXOpAssertion.AVX1_2);
1311 public static final VexRVMOp VPCMPGTQ = new VexRVMOp("VPCMPGTQ", P_66, M_0F38, WIG, 0x37, VEXOpAssertion.AVX1_2);
1312 // @formatter:on
1313
1314 private VexRVMOp(String opcode, int pp, int mmmmm, int w, int op) {
1315 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1316 }
1317
1318 protected VexRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1319 super(opcode, pp, mmmmm, w, op, assertion);
1320 }
1321
1322 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1323 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1324 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1325 asm.emitByte(op);
1326 asm.emitModRM(dst, src2);
1327 }
1328
1329 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) {
1330 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1331 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1332 asm.emitByte(op);
1333 asm.emitOperandHelper(dst, src2, 0);
1334 }
1335 }
1336
1337 public static final class VexGeneralPurposeRVMOp extends VexRVMOp {
1338 // @formatter:off
1339 public static final VexGeneralPurposeRVMOp ANDN = new VexGeneralPurposeRVMOp("ANDN", P_, M_0F38, WIG, 0xF2, VEXOpAssertion.BMI1);
1340 public static final VexGeneralPurposeRVMOp MULX = new VexGeneralPurposeRVMOp("MULX", P_F2, M_0F38, WIG, 0xF6, VEXOpAssertion.BMI2);
1341 public static final VexGeneralPurposeRVMOp PDEP = new VexGeneralPurposeRVMOp("PDEP", P_F2, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1342 public static final VexGeneralPurposeRVMOp PEXT = new VexGeneralPurposeRVMOp("PEXT", P_F3, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1343 // @formatter:on
1344
1345 private VexGeneralPurposeRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1346 super(opcode, pp, mmmmm, w, op, assertion);
1347 }
1348
1349 @Override
1350 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1351 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, src2, null);
1352 assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1353 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1354 asm.emitByte(op);
1355 asm.emitModRM(dst, src2);
1356 }
1357
1358 @Override
1359 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) {
1360 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, null, null);
1361 assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1362 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1363 asm.emitByte(op);
1364 asm.emitOperandHelper(dst, src2, 0);
1365 }
1366 }
1367
1368 public static final class VexGeneralPurposeRMVOp extends VexOp {
1369 // @formatter:off
1370 public static final VexGeneralPurposeRMVOp BEXTR = new VexGeneralPurposeRMVOp("BEXTR", P_, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI1);
1371 public static final VexGeneralPurposeRMVOp BZHI = new VexGeneralPurposeRMVOp("BZHI", P_, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1372 public static final VexGeneralPurposeRMVOp SARX = new VexGeneralPurposeRMVOp("SARX", P_F3, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1373 public static final VexGeneralPurposeRMVOp SHRX = new VexGeneralPurposeRMVOp("SHRX", P_F2, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1374 public static final VexGeneralPurposeRMVOp SHLX = new VexGeneralPurposeRMVOp("SHLX", P_66, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1375 // @formatter:on
1376
1377 private VexGeneralPurposeRMVOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1378 super(opcode, pp, mmmmm, w, op, assertion);
1379 }
1380
1381 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1382 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, src1, null);
1383 assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1384 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1385 asm.emitByte(op);
1386 asm.emitModRM(dst, src1);
1387 }
1388
1389 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src1, Register src2) {
1390 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, null, null);
1391 assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1392 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1393 asm.emitByte(op);
1394 asm.emitOperandHelper(dst, src1, 0);
1395 }
1396 }
1397
1398 public static final class VexGeneralPurposeRMOp extends VexRMOp {
1399 // @formatter:off
1400 public static final VexGeneralPurposeRMOp BLSI = new VexGeneralPurposeRMOp("BLSI", P_, M_0F38, WIG, 0xF3, 3, VEXOpAssertion.BMI1);
1401 public static final VexGeneralPurposeRMOp BLSMSK = new VexGeneralPurposeRMOp("BLSMSK", P_, M_0F38, WIG, 0xF3, 2, VEXOpAssertion.BMI1);
1402 public static final VexGeneralPurposeRMOp BLSR = new VexGeneralPurposeRMOp("BLSR", P_, M_0F38, WIG, 0xF3, 1, VEXOpAssertion.BMI1);
1403 // @formatter:on
1404 private final int ext;
1405
1406 private VexGeneralPurposeRMOp(String opcode, int pp, int mmmmm, int w, int op, int ext, VEXOpAssertion assertion) {
1407 super(opcode, pp, mmmmm, w, op, assertion);
1408 this.ext = ext;
1409 }
1410
1411 @Override
1412 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1413 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1414 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1415 asm.emitByte(op);
1416 asm.emitModRM(ext, src);
1417 }
1418
1419 @Override
1420 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) {
1421 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1422 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1423 asm.emitByte(op);
1424 asm.emitOperandHelper(ext, src, 0);
1425 }
1426 }
1427
1428 /**
1429 * VEX-encoded shift instructions with an operand order of either RVM or VMI.
1430 */
1431 public static final class VexShiftOp extends VexRVMOp implements VexRRIOp {
1432 // @formatter:off
1433 public static final VexShiftOp VPSRLW = new VexShiftOp("VPSRLW", P_66, M_0F, WIG, 0xD1, 0x71, 2);
1434 public static final VexShiftOp VPSRLD = new VexShiftOp("VPSRLD", P_66, M_0F, WIG, 0xD2, 0x72, 2);
1435 public static final VexShiftOp VPSRLQ = new VexShiftOp("VPSRLQ", P_66, M_0F, WIG, 0xD3, 0x73, 2);
1436 public static final VexShiftOp VPSRAW = new VexShiftOp("VPSRAW", P_66, M_0F, WIG, 0xE1, 0x71, 4);
1437 public static final VexShiftOp VPSRAD = new VexShiftOp("VPSRAD", P_66, M_0F, WIG, 0xE2, 0x72, 4);
1438 public static final VexShiftOp VPSLLW = new VexShiftOp("VPSLLW", P_66, M_0F, WIG, 0xF1, 0x71, 6);
1439 public static final VexShiftOp VPSLLD = new VexShiftOp("VPSLLD", P_66, M_0F, WIG, 0xF2, 0x72, 6);
1440 public static final VexShiftOp VPSLLQ = new VexShiftOp("VPSLLQ", P_66, M_0F, WIG, 0xF3, 0x73, 6);
1441 // @formatter:on
1442
1443 private final int immOp;
1444 private final int r;
1445
1446 private VexShiftOp(String opcode, int pp, int mmmmm, int w, int op, int immOp, int r) {
1447 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1_2);
1448 this.immOp = immOp;
1449 this.r = r;
1450 }
1451
1452 @Override
1453 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1454 assert assertion.check((AMD64) asm.target.arch, size, null, dst, src);
1455 asm.vexPrefix(null, dst, src, size, pp, mmmmm, w, false);
1456 asm.emitByte(immOp);
1457 asm.emitModRM(r, src);
1458 asm.emitByte(imm8);
1459 }
1460 }
1461
1462 public static final class VexMaskMoveOp extends VexOp {
1463 // @formatter:off
1464 public static final VexMaskMoveOp VMASKMOVPS = new VexMaskMoveOp("VMASKMOVPS", P_66, M_0F38, W0, 0x2C, 0x2E);
1465 public static final VexMaskMoveOp VMASKMOVPD = new VexMaskMoveOp("VMASKMOVPD", P_66, M_0F38, W0, 0x2D, 0x2F);
1466 public static final VexMaskMoveOp VPMASKMOVD = new VexMaskMoveOp("VPMASKMOVD", P_66, M_0F38, W0, 0x8C, 0x8E, VEXOpAssertion.AVX2);
1467 public static final VexMaskMoveOp VPMASKMOVQ = new VexMaskMoveOp("VPMASKMOVQ", P_66, M_0F38, W1, 0x8C, 0x8E, VEXOpAssertion.AVX2);
1468 // @formatter:on
1469
1470 private final int opReverse;
1471
1472 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) {
1473 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1);
1474 }
1475
1476 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) {
1477 super(opcode, pp, mmmmm, w, op, assertion);
1478 this.opReverse = opReverse;
1479 }
1480
1481 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, AMD64Address src) {
1482 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, null);
1483 asm.vexPrefix(dst, mask, src, size, pp, mmmmm, w, false);
1484 asm.emitByte(op);
1485 asm.emitOperandHelper(dst, src, 0);
1486 }
1487
1488 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register mask, Register src) {
1489 assert assertion.check((AMD64) asm.target.arch, size, src, mask, null);
1490 asm.vexPrefix(src, mask, dst, size, pp, mmmmm, w, false);
1491 asm.emitByte(opReverse);
1492 asm.emitOperandHelper(src, dst, 0);
1493 }
1494 }
1495
1496 /**
1497 * VEX-encoded instructions with an operand order of RVMI.
1498 */
1499 public static final class VexRVMIOp extends VexOp {
1500 // @formatter:off
1501 public static final VexRVMIOp VSHUFPS = new VexRVMIOp("VSHUFPS", P_, M_0F, WIG, 0xC6);
1502 public static final VexRVMIOp VSHUFPD = new VexRVMIOp("VSHUFPD", P_66, M_0F, WIG, 0xC6);
1503 public static final VexRVMIOp VINSERTF128 = new VexRVMIOp("VINSERTF128", P_66, M_0F3A, W0, 0x18, VEXOpAssertion.AVX1_256ONLY);
1504 public static final VexRVMIOp VINSERTI128 = new VexRVMIOp("VINSERTI128", P_66, M_0F3A, W0, 0x38, VEXOpAssertion.AVX2_256ONLY);
1505 // @formatter:on
1506
1507 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op) {
1508 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1509 }
1510
1511 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1512 super(opcode, pp, mmmmm, w, op, assertion);
1513 }
1514
1515 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, int imm8) {
1516 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1517 assert (imm8 & 0xFF) == imm8;
1518 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1519 asm.emitByte(op);
1520 asm.emitModRM(dst, src2);
1521 asm.emitByte(imm8);
1522 }
1523
1524 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, int imm8) {
1525 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1526 assert (imm8 & 0xFF) == imm8;
1527 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1528 asm.emitByte(op);
1529 asm.emitOperandHelper(dst, src2, 1);
1530 asm.emitByte(imm8);
1531 }
1532 }
1533
1534 /**
1535 * VEX-encoded comparison operation with an operand order of RVMI. The immediate operand is a
1536 * comparison operator.
1537 */
1538 public static final class VexFloatCompareOp extends VexOp {
1539 // @formatter:off
1540 public static final VexFloatCompareOp VCMPPS = new VexFloatCompareOp("VCMPPS", P_, M_0F, WIG, 0xC2);
1541 public static final VexFloatCompareOp VCMPPD = new VexFloatCompareOp("VCMPPD", P_66, M_0F, WIG, 0xC2);
1542 public static final VexFloatCompareOp VCMPSS = new VexFloatCompareOp("VCMPSS", P_F2, M_0F, WIG, 0xC2);
1543 public static final VexFloatCompareOp VCMPSD = new VexFloatCompareOp("VCMPSD", P_F2, M_0F, WIG, 0xC2);
1544 // @formatter:on
1545
1546 public enum Predicate {
1547 EQ_OQ(0x00),
1611 return LT_OQ;
1612 case LE:
1613 return LE_OQ;
1614 case GT:
1615 return GT_OQ;
1616 case GE:
1617 return GE_OQ;
1618 default:
1619 throw GraalError.shouldNotReachHere();
1620 }
1621 }
1622 }
1623 }
1624
1625 private VexFloatCompareOp(String opcode, int pp, int mmmmm, int w, int op) {
1626 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1627 }
1628
1629 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, Predicate p) {
1630 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1631 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1632 asm.emitByte(op);
1633 asm.emitModRM(dst, src2);
1634 asm.emitByte(p.imm8);
1635 }
1636
1637 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, Predicate p) {
1638 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1639 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1640 asm.emitByte(op);
1641 asm.emitOperandHelper(dst, src2, 1);
1642 asm.emitByte(p.imm8);
1643 }
1644 }
1645
1646 public final void addl(AMD64Address dst, int imm32) {
1647 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1648 }
1649
1650 public final void addl(Register dst, int imm32) {
1651 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1652 }
1653
1654 public final void addl(Register dst, Register src) {
1655 ADD.rmOp.emit(this, DWORD, dst, src);
1656 }
1657
1658 public final void addpd(Register dst, Register src) {
1659 SSEOp.ADD.emit(this, PD, dst, src);
1959 prefix(src, dst);
1960 emitByte(0x8D);
1961 emitOperandHelper(dst, src, 0);
1962 }
1963
1964 public final void leaq(Register dst, AMD64Address src) {
1965 prefixq(src, dst);
1966 emitByte(0x8D);
1967 emitOperandHelper(dst, src, 0);
1968 }
1969
1970 public final void leave() {
1971 emitByte(0xC9);
1972 }
1973
1974 public final void lock() {
1975 emitByte(0xF0);
1976 }
1977
1978 public final void movapd(Register dst, Register src) {
1979 assert inRC(XMM, dst) && inRC(XMM, src);
1980 simdPrefix(dst, Register.None, src, PD, P_0F, false);
1981 emitByte(0x28);
1982 emitModRM(dst, src);
1983 }
1984
1985 public final void movaps(Register dst, Register src) {
1986 assert inRC(XMM, dst) && inRC(XMM, src);
1987 simdPrefix(dst, Register.None, src, PS, P_0F, false);
1988 emitByte(0x28);
1989 emitModRM(dst, src);
1990 }
1991
1992 public final void movb(AMD64Address dst, int imm8) {
1993 prefix(dst);
1994 emitByte(0xC6);
1995 emitOperandHelper(0, dst, 1);
1996 emitByte(imm8);
1997 }
1998
1999 public final void movb(AMD64Address dst, Register src) {
2000 assert inRC(CPU, src) : "must have byte register";
2001 prefixb(dst, src);
2002 emitByte(0x88);
2003 emitOperandHelper(src, dst, 0);
2004 }
2005
2006 public final void movl(Register dst, int imm32) {
2007 movl(dst, imm32, false);
2008 }
2009
2010 public final void movl(Register dst, int imm32, boolean annotateImm) {
2011 int insnPos = position();
2012 prefix(dst);
2013 emitByte(0xB8 + encode(dst));
2014 int immPos = position();
2015 emitInt(imm32);
2016 int nextInsnPos = position();
2017 if (annotateImm && codePatchingAnnotationConsumer != null) {
2018 codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos));
2019 }
2020 }
2043 public final void movl(AMD64Address dst, int imm32) {
2044 prefix(dst);
2045 emitByte(0xC7);
2046 emitOperandHelper(0, dst, 4);
2047 emitInt(imm32);
2048 }
2049
2050 public final void movl(AMD64Address dst, Register src) {
2051 prefix(dst, src);
2052 emitByte(0x89);
2053 emitOperandHelper(src, dst, 0);
2054 }
2055
2056 /**
2057 * New CPUs require use of movsd and movss to avoid partial register stall when loading from
2058 * memory. But for old Opteron use movlpd instead of movsd. The selection is done in
2059 * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and
2060 * {@link AMD64MacroAssembler#movflt(Register, Register)}.
2061 */
2062 public final void movlpd(Register dst, AMD64Address src) {
2063 assert inRC(XMM, dst);
2064 simdPrefix(dst, dst, src, PD, P_0F, false);
2065 emitByte(0x12);
2066 emitOperandHelper(dst, src, 0);
2067 }
2068
2069 public final void movlhps(Register dst, Register src) {
2070 assert inRC(XMM, dst) && inRC(XMM, src);
2071 simdPrefix(dst, src, src, PS, P_0F, false);
2072 emitByte(0x16);
2073 emitModRM(dst, src);
2074 }
2075
2076 public final void movq(Register dst, AMD64Address src) {
2077 movq(dst, src, false);
2078 }
2079
2080 public final void movq(Register dst, AMD64Address src, boolean force4BytesDisplacement) {
2081 if (inRC(XMM, dst)) {
2082 // Insn: MOVQ xmm, r/m64
2083 // Code: F3 0F 7E /r
2084 // An alternative instruction would be 66 REX.W 0F 6E /r. We prefer the REX.W free
2085 // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction
2086 // when applicable.
2087 simdPrefix(dst, Register.None, src, SS, P_0F, false);
2088 emitByte(0x7E);
2089 emitOperandHelper(dst, src, force4BytesDisplacement, 0);
2090 } else {
2091 // gpr version of movq
2092 prefixq(src, dst);
2093 emitByte(0x8B);
2094 emitOperandHelper(dst, src, force4BytesDisplacement, 0);
2095 }
2096 }
2097
2098 public final void movq(Register dst, Register src) {
2099 assert inRC(CPU, dst) && inRC(CPU, src);
2100 prefixq(dst, src);
2101 emitByte(0x8B);
2102 emitModRM(dst, src);
2103 }
2104
2105 public final void movq(AMD64Address dst, Register src) {
2106 if (inRC(XMM, src)) {
2107 // Insn: MOVQ r/m64, xmm
2108 // Code: 66 0F D6 /r
2109 // An alternative instruction would be 66 REX.W 0F 7E /r. We prefer the REX.W free
2110 // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction
2111 // when applicable.
2112 simdPrefix(src, Register.None, dst, PD, P_0F, false);
2113 emitByte(0xD6);
2114 emitOperandHelper(src, dst, 0);
2115 } else {
2116 // gpr version of movq
2117 prefixq(dst, src);
2118 emitByte(0x89);
2119 emitOperandHelper(src, dst, 0);
2120 }
2121 }
2122
2123 public final void movsbl(Register dst, AMD64Address src) {
2124 prefix(src, dst);
2125 emitByte(0x0F);
2126 emitByte(0xBE);
2127 emitOperandHelper(dst, src, 0);
2128 }
2129
2130 public final void movsbl(Register dst, Register src) {
2131 prefix(dst, false, src, true);
2132 emitByte(0x0F);
2453 case 2:
2454 emitByte(0x66);
2455 emitByte(0x90);
2456 break;
2457 case 1:
2458 emitByte(0x90);
2459 break;
2460 default:
2461 assert i == 0;
2462 }
2463 }
2464
2465 public final void orl(Register dst, Register src) {
2466 OR.rmOp.emit(this, DWORD, dst, src);
2467 }
2468
2469 public final void orl(Register dst, int imm32) {
2470 OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2471 }
2472
2473 // Insn: VPACKUSWB xmm1, xmm2, xmm3/m128
2474 // -----
2475 // Insn: VPACKUSWB xmm1, xmm1, xmm2
2476
2477 public final void packuswb(Register dst, Register src) {
2478 assert inRC(XMM, dst) && inRC(XMM, src);
2479 // Code: VEX.NDS.128.66.0F.WIG 67 /r
2480 simdPrefix(dst, dst, src, PD, P_0F, false);
2481 emitByte(0x67);
2482 emitModRM(dst, src);
2483 }
2484
2485 public final void pop(Register dst) {
2486 prefix(dst);
2487 emitByte(0x58 + encode(dst));
2488 }
2489
2490 public void popfq() {
2491 emitByte(0x9D);
2492 }
2493
2494 public final void ptest(Register dst, Register src) {
2495 assert supports(CPUFeature.SSE4_1);
2496 assert inRC(XMM, dst) && inRC(XMM, src);
2497 simdPrefix(dst, Register.None, src, PD, P_0F38, false);
2498 emitByte(0x17);
2499 emitModRM(dst, src);
2500 }
2501
2502 public final void pcmpeqb(Register dst, Register src) {
2503 assert supports(CPUFeature.SSE2);
2504 assert inRC(XMM, dst) && inRC(XMM, src);
2505 simdPrefix(dst, dst, src, PD, P_0F, false);
2506 emitByte(0x74);
2507 emitModRM(dst, src);
2508 }
2509
2510 public final void pcmpeqw(Register dst, Register src) {
2511 assert supports(CPUFeature.SSE2);
2512 assert inRC(XMM, dst) && inRC(XMM, src);
2513 simdPrefix(dst, dst, src, PD, P_0F, false);
2514 emitByte(0x75);
2515 emitModRM(dst, src);
2516 }
2517
2518 public final void pcmpeqd(Register dst, Register src) {
2519 assert supports(CPUFeature.SSE2);
2520 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM);
2521 simdPrefix(dst, dst, src, PD, P_0F, false);
2522 emitByte(0x76);
2523 emitModRM(dst, src);
2524 }
2525
2526 public final void pcmpestri(Register dst, AMD64Address src, int imm8) {
2527 assert supports(CPUFeature.SSE4_2);
2528 assert inRC(XMM, dst);
2529 simdPrefix(dst, Register.None, src, PD, P_0F3A, false);
2530 emitByte(0x61);
2531 emitOperandHelper(dst, src, 0);
2532 emitByte(imm8);
2533 }
2534
2535 public final void pcmpestri(Register dst, Register src, int imm8) {
2536 assert supports(CPUFeature.SSE4_2);
2537 assert inRC(XMM, dst) && inRC(XMM, src);
2538 simdPrefix(dst, Register.None, src, PD, P_0F3A, false);
2539 emitByte(0x61);
2540 emitModRM(dst, src);
2541 emitByte(imm8);
2542 }
2543
2544 public final void pmovmskb(Register dst, Register src) {
2545 assert supports(CPUFeature.SSE2);
2546 assert inRC(CPU, dst) && inRC(XMM, src);
2547 simdPrefix(dst, Register.None, src, PD, P_0F, false);
2548 emitByte(0xD7);
2549 emitModRM(dst, src);
2550 }
2551
2552 // Insn: VPMOVZXBW xmm1, xmm2/m64
2553
2554 public final void pmovzxbw(Register dst, AMD64Address src) {
2555 assert supports(CPUFeature.SSE4_1);
2556 assert inRC(XMM, dst);
2557 simdPrefix(dst, Register.None, src, PD, P_0F38, false);
2558 emitByte(0x30);
2559 emitOperandHelper(dst, src, 0);
2560 }
2561
2562 public final void pmovzxbw(Register dst, Register src) {
2563 assert supports(CPUFeature.SSE4_1);
2564 assert inRC(XMM, dst) && inRC(XMM, src);
2565 simdPrefix(dst, Register.None, src, PD, P_0F38, false);
2566 emitByte(0x30);
2567 emitModRM(dst, src);
2568 }
2569
2570 public final void push(Register src) {
2571 prefix(src);
2572 emitByte(0x50 + encode(src));
2573 }
2574
2575 public void pushfq() {
2576 emitByte(0x9c);
2577 }
2578
2579 public final void paddd(Register dst, Register src) {
2580 assert inRC(XMM, dst) && inRC(XMM, src);
2581 simdPrefix(dst, dst, src, PD, P_0F, false);
2582 emitByte(0xFE);
2583 emitModRM(dst, src);
2584 }
2585
2586 public final void paddq(Register dst, Register src) {
2587 assert inRC(XMM, dst) && inRC(XMM, src);
2588 simdPrefix(dst, dst, src, PD, P_0F, false);
2589 emitByte(0xD4);
2590 emitModRM(dst, src);
2591 }
2592
2593 public final void pextrw(Register dst, Register src, int imm8) {
2594 assert inRC(CPU, dst) && inRC(XMM, src);
2595 simdPrefix(dst, Register.None, src, PD, P_0F, false);
2596 emitByte(0xC5);
2597 emitModRM(dst, src);
2598 emitByte(imm8);
2599 }
2600
2601 public final void pinsrw(Register dst, Register src, int imm8) {
2602 assert inRC(XMM, dst) && inRC(CPU, src);
2603 simdPrefix(dst, dst, src, PD, P_0F, false);
2604 emitByte(0xC4);
2605 emitModRM(dst, src);
2606 emitByte(imm8);
2607 }
2608
2609 public final void por(Register dst, Register src) {
2610 assert inRC(XMM, dst) && inRC(XMM, src);
2611 simdPrefix(dst, dst, src, PD, P_0F, false);
2612 emitByte(0xEB);
2613 emitModRM(dst, src);
2614 }
2615
2616 public final void pand(Register dst, Register src) {
2617 assert inRC(XMM, dst) && inRC(XMM, src);
2618 simdPrefix(dst, dst, src, PD, P_0F, false);
2619 emitByte(0xDB);
2620 emitModRM(dst, src);
2621 }
2622
2623 public final void pxor(Register dst, Register src) {
2624 assert inRC(XMM, dst) && inRC(XMM, src);
2625 simdPrefix(dst, dst, src, PD, P_0F, false);
2626 emitByte(0xEF);
2627 emitModRM(dst, src);
2628 }
2629
2630 public final void pslld(Register dst, int imm8) {
2631 assert isUByte(imm8) : "invalid value";
2632 assert inRC(XMM, dst);
2633 // XMM6 is for /6 encoding: 66 0F 72 /6 ib
2634 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false);
2635 emitByte(0x72);
2636 emitModRM(6, dst);
2637 emitByte(imm8 & 0xFF);
2638 }
2639
2640 public final void psllq(Register dst, Register shift) {
2641 assert inRC(XMM, dst) && inRC(XMM, shift);
2642 simdPrefix(dst, dst, shift, PD, P_0F, false);
2643 emitByte(0xF3);
2644 emitModRM(dst, shift);
2645 }
2646
2647 public final void psllq(Register dst, int imm8) {
2648 assert isUByte(imm8) : "invalid value";
2649 assert inRC(XMM, dst);
2650 // XMM6 is for /6 encoding: 66 0F 73 /6 ib
2651 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false);
2652 emitByte(0x73);
2653 emitModRM(6, dst);
2654 emitByte(imm8);
2655 }
2656
2657 public final void psrad(Register dst, int imm8) {
2658 assert isUByte(imm8) : "invalid value";
2659 assert inRC(XMM, dst);
2660 // XMM4 is for /4 encoding: 66 0F 72 /4 ib
2661 simdPrefix(AMD64.xmm4, dst, dst, PD, P_0F, false);
2662 emitByte(0x72);
2663 emitModRM(4, dst);
2664 emitByte(imm8);
2665 }
2666
2667 public final void psrld(Register dst, int imm8) {
2668 assert isUByte(imm8) : "invalid value";
2669 assert inRC(XMM, dst);
2670 // XMM2 is for /2 encoding: 66 0F 72 /2 ib
2671 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false);
2672 emitByte(0x72);
2673 emitModRM(2, dst);
2674 emitByte(imm8);
2675 }
2676
2677 public final void psrlq(Register dst, int imm8) {
2678 assert isUByte(imm8) : "invalid value";
2679 assert inRC(XMM, dst);
2680 // XMM2 is for /2 encoding: 66 0F 73 /2 ib
2681 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false);
2682 emitByte(0x73);
2683 emitModRM(2, dst);
2684 emitByte(imm8);
2685 }
2686
2687 public final void psrldq(Register dst, int imm8) {
2688 assert isUByte(imm8) : "invalid value";
2689 assert inRC(XMM, dst);
2690 simdPrefix(AMD64.xmm3, dst, dst, PD, P_0F, false);
2691 emitByte(0x73);
2692 emitModRM(3, dst);
2693 emitByte(imm8);
2694 }
2695
2696 public final void pshufb(Register dst, Register src) {
2697 assert supports(CPUFeature.SSSE3);
2698 assert inRC(XMM, dst) && inRC(XMM, src);
2699 simdPrefix(dst, dst, src, PD, P_0F38, false);
2700 emitByte(0x00);
2701 emitModRM(dst, src);
2702 }
2703
2704 public final void pshuflw(Register dst, Register src, int imm8) {
2705 assert supports(CPUFeature.SSE2);
2706 assert isUByte(imm8) : "invalid value";
2707 assert inRC(XMM, dst) && inRC(XMM, src);
2708 simdPrefix(dst, Register.None, src, SD, P_0F, false);
2709 emitByte(0x70);
2710 emitModRM(dst, src);
2711 emitByte(imm8);
2712 }
2713
2714 public final void pshufd(Register dst, Register src, int imm8) {
2715 assert isUByte(imm8) : "invalid value";
2716 assert inRC(XMM, dst) && inRC(XMM, src);
2717 simdPrefix(dst, Register.None, src, PD, P_0F, false);
2718 emitByte(0x70);
2719 emitModRM(dst, src);
2720 emitByte(imm8);
2721 }
2722
2723 public final void psubd(Register dst, Register src) {
2724 assert inRC(XMM, dst) && inRC(XMM, src);
2725 simdPrefix(dst, dst, src, PD, P_0F, false);
2726 emitByte(0xFA);
2727 emitModRM(dst, src);
2728 }
2729
2730 public final void punpcklbw(Register dst, Register src) {
2731 assert supports(CPUFeature.SSE2);
2732 assert inRC(XMM, dst) && inRC(XMM, src);
2733 simdPrefix(dst, dst, src, PD, P_0F, false);
2734 emitByte(0x60);
2735 emitModRM(dst, src);
2736 }
2737
2738 public final void rcpps(Register dst, Register src) {
2739 assert inRC(XMM, dst) && inRC(XMM, src);
2740 simdPrefix(dst, Register.None, src, PS, P_0F, false);
2741 emitByte(0x53);
2742 emitModRM(dst, src);
2743 }
2744
2745 public final void ret(int imm16) {
2746 if (imm16 == 0) {
2747 emitByte(0xC3);
2748 } else {
2749 emitByte(0xC2);
2750 emitShort(imm16);
2751 }
2752 }
2753
2754 public final void sarl(Register dst, int imm8) {
2755 prefix(dst);
2756 assert isShiftCount(imm8 >> 1) : "illegal shift count";
2757 if (imm8 == 1) {
2758 emitByte(0xD1);
2759 emitModRM(7, dst);
2767 public final void shll(Register dst, int imm8) {
2768 assert isShiftCount(imm8 >> 1) : "illegal shift count";
2769 prefix(dst);
2770 if (imm8 == 1) {
2771 emitByte(0xD1);
2772 emitModRM(4, dst);
2773 } else {
2774 emitByte(0xC1);
2775 emitModRM(4, dst);
2776 emitByte(imm8);
2777 }
2778 }
2779
2780 public final void shll(Register dst) {
2781 // Multiply dst by 2, CL times.
2782 prefix(dst);
2783 emitByte(0xD3);
2784 emitModRM(4, dst);
2785 }
2786
2787 // Insn: SHLX r32a, r/m32, r32b
2788
2789 public final void shlxl(Register dst, Register src1, Register src2) {
2790 VexGeneralPurposeRMVOp.SHLX.emit(this, AVXSize.DWORD, dst, src1, src2);
2791 }
2792
2793 public final void shrl(Register dst, int imm8) {
2794 assert isShiftCount(imm8 >> 1) : "illegal shift count";
2795 prefix(dst);
2796 emitByte(0xC1);
2797 emitModRM(5, dst);
2798 emitByte(imm8);
2799 }
2800
2801 public final void shrl(Register dst) {
2802 // Unsigned divide dst by 2, CL times.
2803 prefix(dst);
2804 emitByte(0xD3);
2805 emitModRM(5, dst);
2806 }
2807
2808 public final void subl(AMD64Address dst, int imm32) {
2809 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2810 }
2811
2812 public final void subl(Register dst, int imm32) {
2839 prefix(dst);
2840 emitByte(0xF7);
2841 emitModRM(0, dst);
2842 }
2843 emitInt(imm32);
2844 }
2845
2846 public final void testl(Register dst, Register src) {
2847 prefix(dst, src);
2848 emitByte(0x85);
2849 emitModRM(dst, src);
2850 }
2851
2852 public final void testl(Register dst, AMD64Address src) {
2853 prefix(src, dst);
2854 emitByte(0x85);
2855 emitOperandHelper(dst, src, 0);
2856 }
2857
2858 public final void unpckhpd(Register dst, Register src) {
2859 assert inRC(XMM, dst) && inRC(XMM, src);
2860 simdPrefix(dst, dst, src, PD, P_0F, false);
2861 emitByte(0x15);
2862 emitModRM(dst, src);
2863 }
2864
2865 public final void unpcklpd(Register dst, Register src) {
2866 assert inRC(XMM, dst) && inRC(XMM, src);
2867 simdPrefix(dst, dst, src, PD, P_0F, false);
2868 emitByte(0x14);
2869 emitModRM(dst, src);
2870 }
2871
2872 public final void xorl(Register dst, Register src) {
2873 XOR.rmOp.emit(this, DWORD, dst, src);
2874 }
2875
2876 public final void xorpd(Register dst, Register src) {
2877 SSEOp.XOR.emit(this, PD, dst, src);
2878 }
2879
2880 public final void xorps(Register dst, Register src) {
2881 SSEOp.XOR.emit(this, PS, dst, src);
2882 }
2883
2884 protected final void decl(Register dst) {
2885 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
2886 prefix(dst);
2957 public final void cmpq(Register dst, int imm32) {
2958 CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
2959 }
2960
2961 public final void cmpq(Register dst, Register src) {
2962 CMP.rmOp.emit(this, QWORD, dst, src);
2963 }
2964
2965 public final void cmpq(Register dst, AMD64Address src) {
2966 CMP.rmOp.emit(this, QWORD, dst, src);
2967 }
2968
2969 public final void cmpxchgq(Register reg, AMD64Address adr) {
2970 prefixq(adr, reg);
2971 emitByte(0x0F);
2972 emitByte(0xB1);
2973 emitOperandHelper(reg, adr, 0);
2974 }
2975
2976 public final void cvtdq2pd(Register dst, Register src) {
2977 assert inRC(XMM, dst) && inRC(XMM, src);
2978 simdPrefix(dst, Register.None, src, SS, P_0F, false);
2979 emitByte(0xE6);
2980 emitModRM(dst, src);
2981 }
2982
2983 public final void cvtsi2sdq(Register dst, Register src) {
2984 SSEOp.CVTSI2SD.emit(this, QWORD, dst, src);
2985 }
2986
2987 public final void cvttsd2siq(Register dst, Register src) {
2988 SSEOp.CVTTSD2SI.emit(this, QWORD, dst, src);
2989 }
2990
2991 public final void cvttpd2dq(Register dst, Register src) {
2992 assert inRC(XMM, dst) && inRC(XMM, src);
2993 simdPrefix(dst, Register.None, src, PD, P_0F, false);
2994 emitByte(0xE6);
2995 emitModRM(dst, src);
2996 }
2997
2998 public final void decq(Register dst) {
2999 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
3000 prefixq(dst);
3001 emitByte(0xFF);
3002 emitModRM(1, dst);
3003 }
3004
3005 public final void decq(AMD64Address dst) {
3006 DEC.emit(this, QWORD, dst);
3007 }
3008
3009 public final void imulq(Register dst, Register src) {
3010 prefixq(dst, src);
3011 emitByte(0x0F);
3012 emitByte(0xAF);
3013 emitModRM(dst, src);
3014 }
3015
3016 public final void incq(Register dst) {
3017 // Don't use it directly. Use Macroincrementq() instead.
3018 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3019 prefixq(dst);
3040 codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos));
3041 }
3042 }
3043
3044 public final void movslq(Register dst, int imm32) {
3045 prefixq(dst);
3046 emitByte(0xC7);
3047 emitModRM(0, dst);
3048 emitInt(imm32);
3049 }
3050
3051 public final void movdq(Register dst, AMD64Address src) {
3052 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src);
3053 }
3054
3055 public final void movdq(AMD64Address dst, Register src) {
3056 AMD64MROp.MOVQ.emit(this, QWORD, dst, src);
3057 }
3058
3059 public final void movdq(Register dst, Register src) {
3060 if (inRC(XMM, dst) && inRC(CPU, src)) {
3061 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src);
3062 } else if (inRC(XMM, src) && inRC(CPU, dst)) {
3063 AMD64MROp.MOVQ.emit(this, QWORD, dst, src);
3064 } else {
3065 throw new InternalError("should not reach here");
3066 }
3067 }
3068
3069 public final void movdl(Register dst, Register src) {
3070 if (inRC(XMM, dst) && inRC(CPU, src)) {
3071 AMD64RMOp.MOVD.emit(this, DWORD, dst, src);
3072 } else if (inRC(XMM, src) && inRC(CPU, dst)) {
3073 AMD64MROp.MOVD.emit(this, DWORD, dst, src);
3074 } else {
3075 throw new InternalError("should not reach here");
3076 }
3077 }
3078
3079 public final void movdl(Register dst, AMD64Address src) {
3080 AMD64RMOp.MOVD.emit(this, DWORD, dst, src);
3081 }
3082
3083 public final void movddup(Register dst, Register src) {
3084 assert supports(CPUFeature.SSE3);
3085 assert inRC(XMM, dst) && inRC(XMM, src);
3086 simdPrefix(dst, Register.None, src, SD, P_0F, false);
3087 emitByte(0x12);
3088 emitModRM(dst, src);
3089 }
3090
3091 public final void movdqu(Register dst, AMD64Address src) {
3092 assert inRC(XMM, dst);
3093 simdPrefix(dst, Register.None, src, SS, P_0F, false);
3094 emitByte(0x6F);
3095 emitOperandHelper(dst, src, 0);
3096 }
3097
3098 public final void movdqu(Register dst, Register src) {
3099 assert inRC(XMM, dst) && inRC(XMM, src);
3100 simdPrefix(dst, Register.None, src, SS, P_0F, false);
3101 emitByte(0x6F);
3102 emitModRM(dst, src);
3103 }
3104
3105 // Insn: VMOVDQU xmm2/m128, xmm1
3106
3107 public final void movdqu(AMD64Address dst, Register src) {
3108 assert inRC(XMM, src);
3109 // Code: VEX.128.F3.0F.WIG 7F /r
3110 simdPrefix(src, Register.None, dst, SS, P_0F, false);
3111 emitByte(0x7F);
3112 emitOperandHelper(src, dst, 0);
3113 }
3114
3115 public final void movslq(AMD64Address dst, int imm32) {
3116 prefixq(dst);
3117 emitByte(0xC7);
3118 emitOperandHelper(0, dst, 4);
3119 emitInt(imm32);
3120 }
3121
3122 public final void movslq(Register dst, AMD64Address src) {
3123 prefixq(src, dst);
3124 emitByte(0x63);
3125 emitOperandHelper(dst, src, 0);
3126 }
3127
3128 public final void movslq(Register dst, Register src) {
3129 prefixq(dst, src);
3130 emitByte(0x63);
3131 emitModRM(dst, src);
3132 }
3133
3134 public final void negq(Register dst) {
3275 // We only have to handle StoreLoad
3276 if ((barriers & STORE_LOAD) != 0) {
3277 // All usable chips support "locked" instructions which suffice
3278 // as barriers, and are much faster than the alternative of
3279 // using cpuid instruction. We use here a locked add [rsp],0.
3280 // This is conveniently otherwise a no-op except for blowing
3281 // flags.
3282 // Any change to this code may need to revisit other places in
3283 // the code where this idiom is used, in particular the
3284 // orderAccess code.
3285 lock();
3286 addl(new AMD64Address(AMD64.rsp, 0), 0); // Assert the lock# signal here
3287 }
3288 }
3289 }
3290
3291 @Override
3292 protected final void patchJumpTarget(int branch, int branchTarget) {
3293 int op = getByte(branch);
3294 assert op == 0xE8 // call
3295 || op == 0x00 // jump table entry
3296 || op == 0xE9 // jmp
3297 || op == 0xEB // short jmp
3298 || (op & 0xF0) == 0x70 // short jcc
3299 || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc
3300 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
3301
3302 if (op == 0x00) {
3303 int offsetToJumpTableBase = getShort(branch + 1);
3304 int jumpTableBase = branch - offsetToJumpTableBase;
3305 int imm32 = branchTarget - jumpTableBase;
3306 emitInt(imm32, branch);
3307 } else if (op == 0xEB || (op & 0xF0) == 0x70) {
3308
3309 // short offset operators (jmp and jcc)
3310 final int imm8 = branchTarget - (branch + 2);
3311 /*
3312 * Since a wrongly patched short branch can potentially lead to working but really bad
3313 * behaving code we should always fail with an exception instead of having an assert.
3314 */
3315 if (!NumUtil.isByte(imm8)) {
3536 emitByte(0xae);
3537 emitByte(0xe8);
3538 }
3539
3540 public final void vptest(Register dst, Register src) {
3541 VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src);
3542 }
3543
3544 public final void vpxor(Register dst, Register nds, Register src) {
3545 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
3546 }
3547
3548 public final void vpxor(Register dst, Register nds, AMD64Address src) {
3549 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
3550 }
3551
3552 public final void vmovdqu(Register dst, AMD64Address src) {
3553 VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src);
3554 }
3555
3556 public final void vmovdqu(AMD64Address dst, Register src) {
3557 assert inRC(XMM, src);
3558 VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src);
3559 }
3560
3561 public final void vpmovzxbw(Register dst, AMD64Address src) {
3562 assert supports(CPUFeature.AVX2);
3563 VexRMOp.VPMOVZXBW.emit(this, AVXSize.YMM, dst, src);
3564 }
3565
3566 public final void vzeroupper() {
3567 emitVEX(L128, P_, M_0F, W0, 0, 0, true);
3568 emitByte(0x77);
3569 }
3570
3571 // Insn: KORTESTD k1, k2
3572
3573 // This instruction produces ZF or CF flags
3574 public final void kortestd(Register src1, Register src2) {
3575 assert supports(CPUFeature.AVX512BW);
3576 assert inRC(MASK, src1) && inRC(MASK, src2);
3577 // Code: VEX.L0.66.0F.W1 98 /r
3578 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true);
3579 emitByte(0x98);
3580 emitModRM(src1, src2);
3581 }
3582
3583 // Insn: KORTESTQ k1, k2
3584
3585 // This instruction produces ZF or CF flags
3586 public final void kortestq(Register src1, Register src2) {
3587 assert supports(CPUFeature.AVX512BW);
3588 assert inRC(MASK, src1) && inRC(MASK, src2);
3589 // Code: VEX.L0.0F.W1 98 /r
3590 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_, M_0F, W1, true);
3591 emitByte(0x98);
3592 emitModRM(src1, src2);
3593 }
3594
3595 public final void kmovd(Register dst, Register src) {
3596 assert supports(CPUFeature.AVX512BW);
3597 assert inRC(MASK, dst) || inRC(CPU, dst);
3598 assert inRC(MASK, src) || inRC(CPU, src);
3599 assert !(inRC(CPU, dst) && inRC(CPU, src));
3600
3601 if (inRC(MASK, dst)) {
3602 if (inRC(MASK, src)) {
3603 // kmovd(KRegister dst, KRegister src):
3604 // Insn: KMOVD k1, k2/m32
3605 // Code: VEX.L0.66.0F.W1 90 /r
3606 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_66, M_0F, W1, true);
3607 emitByte(0x90);
3608 emitModRM(dst, src);
3609 } else {
3610 // kmovd(KRegister dst, Register src)
3611 // Insn: KMOVD k1, r32
3612 // Code: VEX.L0.F2.0F.W0 92 /r
3613 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true);
3614 emitByte(0x92);
3615 emitModRM(dst, src);
3616 }
3617 } else {
3618 if (inRC(MASK, src)) {
3619 // kmovd(Register dst, KRegister src)
3620 // Insn: KMOVD r32, k1
3621 // Code: VEX.L0.F2.0F.W0 93 /r
3622 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true);
3623 emitByte(0x93);
3624 emitModRM(dst, src);
3625 } else {
3626 throw GraalError.shouldNotReachHere();
3627 }
3628 }
3629 }
3630
3631 public final void kmovq(Register dst, Register src) {
3632 assert supports(CPUFeature.AVX512BW);
3633 assert inRC(MASK, dst) || inRC(CPU, dst);
3634 assert inRC(MASK, src) || inRC(CPU, src);
3635 assert !(inRC(CPU, dst) && inRC(CPU, src));
3636
3637 if (inRC(MASK, dst)) {
3638 if (inRC(MASK, src)) {
3639 // kmovq(KRegister dst, KRegister src):
3640 // Insn: KMOVQ k1, k2/m64
3641 // Code: VEX.L0.0F.W1 90 /r
3642 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_, M_0F, W1, true);
3643 emitByte(0x90);
3644 emitModRM(dst, src);
3645 } else {
3646 // kmovq(KRegister dst, Register src)
3647 // Insn: KMOVQ k1, r64
3648 // Code: VEX.L0.F2.0F.W1 92 /r
3649 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true);
3650 emitByte(0x92);
3651 emitModRM(dst, src);
3652 }
3653 } else {
3654 if (inRC(MASK, src)) {
3655 // kmovq(Register dst, KRegister src)
3656 // Insn: KMOVQ r64, k1
3657 // Code: VEX.L0.F2.0F.W1 93 /r
3658 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true);
3659 emitByte(0x93);
3660 emitModRM(dst, src);
3661 } else {
3662 throw GraalError.shouldNotReachHere();
3663 }
3664 }
3665 }
3666
3667 // Insn: KTESTD k1, k2
3668
3669 public final void ktestd(Register src1, Register src2) {
3670 assert supports(CPUFeature.AVX512BW);
3671 assert inRC(MASK, src1) && inRC(MASK, src2);
3672 // Code: VEX.L0.66.0F.W1 99 /r
3673 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true);
3674 emitByte(0x99);
3675 emitModRM(src1, src2);
3676 }
3677
3678 public final void evmovdqu64(Register dst, AMD64Address src) {
3679 assert supports(CPUFeature.AVX512F);
3680 assert inRC(XMM, dst);
3681 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F3, M_0F, W1, Z0, B0);
3682 emitByte(0x6F);
3683 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3684 }
3685
3686 // Insn: VPMOVZXBW zmm1, m256
3687
3688 public final void evpmovzxbw(Register dst, AMD64Address src) {
3689 assert supports(CPUFeature.AVX512BW);
3690 assert inRC(XMM, dst);
3691 // Code: EVEX.512.66.0F38.WIG 30 /r
3692 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0);
3693 emitByte(0x30);
3694 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
3695 }
3696
3697 public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src) {
3698 assert supports(CPUFeature.AVX512BW);
3699 assert inRC(MASK, kdst) && inRC(XMM, nds);
3700 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F, WIG, Z0, B0);
3701 emitByte(0x74);
3702 emitEVEXOperandHelper(kdst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3703 }
3704
3705 // Insn: VMOVDQU16 zmm1 {k1}{z}, zmm2/m512
3706 // -----
3707 // Insn: VMOVDQU16 zmm1, m512
3708
3709 public final void evmovdqu16(Register dst, AMD64Address src) {
3710 assert supports(CPUFeature.AVX512BW);
3711 assert inRC(XMM, dst);
3712 // Code: EVEX.512.F2.0F.W1 6F /r
3713 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
3714 emitByte(0x6F);
3715 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3716 }
3717
3718 // Insn: VMOVDQU16 zmm1, k1:z, m512
3719
3720 public final void evmovdqu16(Register dst, Register mask, AMD64Address src) {
3721 assert supports(CPUFeature.AVX512BW);
3722 assert inRC(XMM, dst) && inRC(MASK, mask);
3723 // Code: EVEX.512.F2.0F.W1 6F /r
3724 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z1, B0);
3725 emitByte(0x6F);
3726 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3727 }
3728
3729 // Insn: VMOVDQU16 zmm2/m512 {k1}{z}, zmm1
3730 // -----
3731 // Insn: VMOVDQU16 m512, zmm1
3732
3733 public final void evmovdqu16(AMD64Address dst, Register src) {
3734 assert supports(CPUFeature.AVX512BW);
3735 assert inRC(XMM, src);
3736 // Code: EVEX.512.F2.0F.W1 7F /r
3737 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
3738 emitByte(0x7F);
3739 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3740 }
3741
3742 // Insn: VMOVDQU16 m512, k1, zmm1
3743
3744 public final void evmovdqu16(AMD64Address dst, Register mask, Register src) {
3745 assert supports(CPUFeature.AVX512BW);
3746 assert inRC(MASK, mask) && inRC(XMM, src);
3747 // Code: EVEX.512.F2.0F.W1 7F /r
3748 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
3749 emitByte(0x7F);
3750 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3751 }
3752
3753 // Insn: VPBROADCASTW zmm1 {k1}{z}, reg
3754 // -----
3755 // Insn: VPBROADCASTW zmm1, reg
3756
3757 public final void evpbroadcastw(Register dst, Register src) {
3758 assert supports(CPUFeature.AVX512BW);
3759 assert inRC(XMM, dst) && inRC(CPU, src);
3760 // Code: EVEX.512.66.0F38.W0 7B /r
3761 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, W0, Z0, B0);
3762 emitByte(0x7B);
3763 emitModRM(dst, src);
3764 }
3765
3766 // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8
3767 // -----
3768 // Insn: VPCMPUW k1, zmm2, zmm3, imm8
3769
3770 public final void evpcmpuw(Register kdst, Register nds, Register src, int vcc) {
3771 assert supports(CPUFeature.AVX512BW);
3772 assert inRC(MASK, kdst) && inRC(XMM, nds) && inRC(XMM, src);
3773 // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib
3774 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0);
3775 emitByte(0x3E);
3776 emitModRM(kdst, src);
3777 emitByte(vcc);
3778 }
3779
3780 // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8
3781 // -----
3782 // Insn: VPCMPUW k1, k2, zmm2, zmm3, imm8
3783
3784 public final void evpcmpuw(Register kdst, Register mask, Register nds, Register src, int vcc) {
3785 assert supports(CPUFeature.AVX512BW);
3786 assert inRC(MASK, kdst) && inRC(MASK, mask);
3787 assert inRC(XMM, nds) && inRC(XMM, src);
3788 // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib
3789 evexPrefix(kdst, mask, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0);
3790 emitByte(0x3E);
3791 emitModRM(kdst, src);
3792 emitByte(vcc);
3793 }
3794
3795 // Insn: VPMOVWB ymm1/m256 {k1}{z}, zmm2
3796 // -----
3797 // Insn: VPMOVWB m256, zmm2
3798
3799 public final void evpmovwb(AMD64Address dst, Register src) {
3800 assert supports(CPUFeature.AVX512BW);
3801 assert inRC(XMM, src);
3802 // Code: EVEX.512.F3.0F38.W0 30 /r
3803 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0);
3804 emitByte(0x30);
3805 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
3806 }
3807
3808 // Insn: VPMOVWB m256, k1, zmm2
3809
3810 public final void evpmovwb(AMD64Address dst, Register mask, Register src) {
3811 assert supports(CPUFeature.AVX512BW);
3812 assert inRC(MASK, mask) && inRC(XMM, src);
3813 // Code: EVEX.512.F3.0F38.W0 30 /r
3814 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0);
3815 emitByte(0x30);
3816 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
3817 }
3818
3819 // Insn: VPMOVZXBW zmm1 {k1}{z}, ymm2/m256
3820 // -----
3821 // Insn: VPMOVZXBW zmm1, k1, m256
3822
3823 public final void evpmovzxbw(Register dst, Register mask, AMD64Address src) {
3824 assert supports(CPUFeature.AVX512BW);
3825 assert inRC(MASK, mask) && inRC(XMM, dst);
3826 // Code: EVEX.512.66.0F38.WIG 30 /r
3827 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0);
3828 emitByte(0x30);
3829 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
3830 }
3831
3832 }
|