10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24
25 package org.graalvm.compiler.asm.amd64;
26
27 import static jdk.vm.ci.amd64.AMD64.CPU;
28 import static jdk.vm.ci.amd64.AMD64.MASK;
29 import static jdk.vm.ci.amd64.AMD64.XMM;
30 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD;
31 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop;
32 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseIntelNops;
33 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop;
34 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD;
35 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND;
36 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP;
37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR;
38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB;
39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB;
40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR;
41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC;
42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC;
43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG;
44 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT;
45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0;
46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0;
47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1;
48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.BYTE;
49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.DWORD;
50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PD;
51 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PS;
52 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.QWORD;
53 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SD;
54 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SS;
55 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.WORD;
56 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128;
57 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256;
58 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ;
59 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F;
60 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38;
61 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A;
62 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_;
63 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66;
64 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2;
65 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3;
66 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0;
67 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1;
68 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG;
69 import static org.graalvm.compiler.core.common.NumUtil.isByte;
70 import static org.graalvm.compiler.core.common.NumUtil.isInt;
71 import static org.graalvm.compiler.core.common.NumUtil.isShiftCount;
72 import static org.graalvm.compiler.core.common.NumUtil.isUByte;
73
74 import java.util.EnumSet;
75
76 import org.graalvm.compiler.asm.Label;
77 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
878 public static final AMD64Shift ROL = new AMD64Shift("ROL", 0);
879 public static final AMD64Shift ROR = new AMD64Shift("ROR", 1);
880 public static final AMD64Shift RCL = new AMD64Shift("RCL", 2);
881 public static final AMD64Shift RCR = new AMD64Shift("RCR", 3);
882 public static final AMD64Shift SHL = new AMD64Shift("SHL", 4);
883 public static final AMD64Shift SHR = new AMD64Shift("SHR", 5);
884 public static final AMD64Shift SAR = new AMD64Shift("SAR", 7);
885 // @formatter:on
886
887 public final AMD64MOp m1Op;
888 public final AMD64MOp mcOp;
889 public final AMD64MIOp miOp;
890
891 private AMD64Shift(String opcode, int code) {
892 m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.WordOrLargerAssertion);
893 mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.WordOrLargerAssertion);
894 miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.WordOrLargerAssertion);
895 }
896 }
897
898 private enum VEXOpAssertion {
899 AVX1(CPUFeature.AVX, CPUFeature.AVX),
900 AVX1_2(CPUFeature.AVX, CPUFeature.AVX2),
901 AVX2(CPUFeature.AVX2, CPUFeature.AVX2),
902 AVX1_128ONLY(CPUFeature.AVX, null),
903 AVX1_256ONLY(null, CPUFeature.AVX),
904 AVX2_256ONLY(null, CPUFeature.AVX2),
905 XMM_CPU(CPUFeature.AVX, null, XMM, null, CPU, null),
906 XMM_XMM_CPU(CPUFeature.AVX, null, XMM, XMM, CPU, null),
907 CPU_XMM(CPUFeature.AVX, null, CPU, null, XMM, null),
908 AVX1_2_CPU_XMM(CPUFeature.AVX, CPUFeature.AVX2, CPU, null, XMM, null),
909 BMI1(CPUFeature.BMI1, null, CPU, CPU, CPU, null),
910 BMI2(CPUFeature.BMI2, null, CPU, CPU, CPU, null),
911 FMA(CPUFeature.FMA, null, XMM, XMM, XMM, null);
912
913 private final CPUFeature l128feature;
914 private final CPUFeature l256feature;
915
916 private final RegisterCategory rCategory;
917 private final RegisterCategory vCategory;
918 private final RegisterCategory mCategory;
919 private final RegisterCategory imm8Category;
920
921 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature) {
922 this(l128feature, l256feature, XMM, XMM, XMM, XMM);
923 }
924
925 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature, RegisterCategory rCategory, RegisterCategory vCategory, RegisterCategory mCategory, RegisterCategory imm8Category) {
926 this.l128feature = l128feature;
927 this.l256feature = l256feature;
928 this.rCategory = rCategory;
929 this.vCategory = vCategory;
930 this.mCategory = mCategory;
931 this.imm8Category = imm8Category;
932 }
933
934 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m) {
935 return check(arch, getLFlag(size), r, v, m, null);
936 }
937
938 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m, Register imm8) {
939 return check(arch, getLFlag(size), r, v, m, imm8);
940 }
941
942 public boolean check(AMD64 arch, int l, Register r, Register v, Register m, Register imm8) {
943 switch (l) {
944 case L128:
945 assert l128feature != null && arch.getFeatures().contains(l128feature) : "emitting illegal 128 bit instruction";
946 break;
947 case L256:
948 assert l256feature != null && arch.getFeatures().contains(l256feature) : "emitting illegal 256 bit instruction";
949 break;
950 }
951 if (r != null) {
952 assert r.getRegisterCategory().equals(rCategory);
953 }
954 if (v != null) {
955 assert v.getRegisterCategory().equals(vCategory);
956 }
957 if (m != null) {
958 assert m.getRegisterCategory().equals(mCategory);
959 }
960 if (imm8 != null) {
961 assert imm8.getRegisterCategory().equals(imm8Category);
962 }
963 return true;
964 }
965
966 public boolean supports(EnumSet<CPUFeature> features, AVXSize avxSize) {
967 switch (avxSize) {
968 case XMM:
969 return l128feature != null && features.contains(l128feature);
970 case YMM:
971 return l256feature != null && features.contains(l256feature);
972 default:
973 throw GraalError.shouldNotReachHere();
974 }
975 }
976 }
977
978 /**
979 * Base class for VEX-encoded instructions.
980 */
981 public static class VexOp {
982 protected final int pp;
983 protected final int mmmmm;
984 protected final int w;
985 protected final int op;
986
987 private final String opcode;
988 protected final VEXOpAssertion assertion;
989
990 protected VexOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
991 this.pp = pp;
992 this.mmmmm = mmmmm;
993 this.w = w;
994 this.op = op;
995 this.opcode = opcode;
996 this.assertion = assertion;
997 }
998
999 public final boolean isSupported(AMD64Assembler vasm, AVXSize size) {
1000 return assertion.supports(((AMD64) vasm.target.arch).getFeatures(), size);
1001 }
1002
1003 @Override
1004 public String toString() {
1005 return opcode;
1006 }
1007 }
1008
1009 /**
1010 * VEX-encoded instructions with an operand order of RM, but the M operand must be a register.
1011 */
1012 public static class VexRROp extends VexOp {
1013 // @formatter:off
1014 public static final VexRROp VMASKMOVDQU = new VexRROp("VMASKMOVDQU", P_66, M_0F, WIG, 0xF7, VEXOpAssertion.AVX1_128ONLY);
1015 // @formatter:on
1016
1017 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op) {
1018 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1019 }
1020
1021 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1022 super(opcode, pp, mmmmm, w, op, assertion);
1023 }
1024
1025 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1026 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src);
1027 assert op != 0x1A || op != 0x5A;
1028 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false);
1029 asm.emitByte(op);
1030 asm.emitModRM(dst, src);
1031 }
1032 }
1033
1034 /**
1035 * VEX-encoded instructions with an operand order of RM.
1036 */
1037 public static class VexRMOp extends VexRROp {
1038 // @formatter:off
1039 public static final VexRMOp VCVTTSS2SI = new VexRMOp("VCVTTSS2SI", P_F3, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM);
1040 public static final VexRMOp VCVTTSS2SQ = new VexRMOp("VCVTTSS2SQ", P_F3, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM);
1041 public static final VexRMOp VCVTTSD2SI = new VexRMOp("VCVTTSD2SI", P_F2, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM);
1042 public static final VexRMOp VCVTTSD2SQ = new VexRMOp("VCVTTSD2SQ", P_F2, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM);
1043 public static final VexRMOp VCVTPS2PD = new VexRMOp("VCVTPS2PD", P_, M_0F, WIG, 0x5A);
1044 public static final VexRMOp VCVTPD2PS = new VexRMOp("VCVTPD2PS", P_66, M_0F, WIG, 0x5A);
1045 public static final VexRMOp VCVTDQ2PS = new VexRMOp("VCVTDQ2PS", P_, M_0F, WIG, 0x5B);
1046 public static final VexRMOp VCVTTPS2DQ = new VexRMOp("VCVTTPS2DQ", P_F3, M_0F, WIG, 0x5B);
1047 public static final VexRMOp VCVTTPD2DQ = new VexRMOp("VCVTTPD2DQ", P_66, M_0F, WIG, 0xE6);
1048 public static final VexRMOp VCVTDQ2PD = new VexRMOp("VCVTDQ2PD", P_F3, M_0F, WIG, 0xE6);
1060 public static final VexRMOp VPMOVSXBQ = new VexRMOp("VPMOVSXBQ", P_66, M_0F38, WIG, 0x22);
1061 public static final VexRMOp VPMOVSXWD = new VexRMOp("VPMOVSXWD", P_66, M_0F38, WIG, 0x23);
1062 public static final VexRMOp VPMOVSXWQ = new VexRMOp("VPMOVSXWQ", P_66, M_0F38, WIG, 0x24);
1063 public static final VexRMOp VPMOVSXDQ = new VexRMOp("VPMOVSXDQ", P_66, M_0F38, WIG, 0x25);
1064 public static final VexRMOp VPMOVZXBW = new VexRMOp("VPMOVZXBW", P_66, M_0F38, WIG, 0x30);
1065 public static final VexRMOp VPMOVZXBD = new VexRMOp("VPMOVZXBD", P_66, M_0F38, WIG, 0x31);
1066 public static final VexRMOp VPMOVZXBQ = new VexRMOp("VPMOVZXBQ", P_66, M_0F38, WIG, 0x32);
1067 public static final VexRMOp VPMOVZXWD = new VexRMOp("VPMOVZXWD", P_66, M_0F38, WIG, 0x33);
1068 public static final VexRMOp VPMOVZXWQ = new VexRMOp("VPMOVZXWQ", P_66, M_0F38, WIG, 0x34);
1069 public static final VexRMOp VPMOVZXDQ = new VexRMOp("VPMOVZXDQ", P_66, M_0F38, WIG, 0x35);
1070 public static final VexRMOp VPTEST = new VexRMOp("VPTEST", P_66, M_0F38, WIG, 0x17);
1071 public static final VexRMOp VSQRTPD = new VexRMOp("VSQRTPD", P_66, M_0F, WIG, 0x51);
1072 public static final VexRMOp VSQRTPS = new VexRMOp("VSQRTPS", P_, M_0F, WIG, 0x51);
1073 public static final VexRMOp VSQRTSD = new VexRMOp("VSQRTSD", P_F2, M_0F, WIG, 0x51);
1074 public static final VexRMOp VSQRTSS = new VexRMOp("VSQRTSS", P_F3, M_0F, WIG, 0x51);
1075 public static final VexRMOp VUCOMISS = new VexRMOp("VUCOMISS", P_, M_0F, WIG, 0x2E);
1076 public static final VexRMOp VUCOMISD = new VexRMOp("VUCOMISD", P_66, M_0F, WIG, 0x2E);
1077 // @formatter:on
1078
1079 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op) {
1080 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1081 }
1082
1083 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1084 super(opcode, pp, mmmmm, w, op, assertion);
1085 }
1086
1087 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) {
1088 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1089 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false);
1090 asm.emitByte(op);
1091 asm.emitOperandHelper(dst, src, 0);
1092 }
1093 }
1094
1095 /**
1096 * VEX-encoded move instructions.
1097 * <p>
1098 * These instructions have two opcodes: op is the forward move instruction with an operand order
1099 * of RM, and opReverse is the reverse move instruction with an operand order of MR.
1100 */
1101 public static final class VexMoveOp extends VexRMOp {
1102 // @formatter:off
1103 public static final VexMoveOp VMOVDQA = new VexMoveOp("VMOVDQA", P_66, M_0F, WIG, 0x6F, 0x7F);
1104 public static final VexMoveOp VMOVDQU = new VexMoveOp("VMOVDQU", P_F3, M_0F, WIG, 0x6F, 0x7F);
1105 public static final VexMoveOp VMOVAPS = new VexMoveOp("VMOVAPS", P_, M_0F, WIG, 0x28, 0x29);
1106 public static final VexMoveOp VMOVAPD = new VexMoveOp("VMOVAPD", P_66, M_0F, WIG, 0x28, 0x29);
1107 public static final VexMoveOp VMOVUPS = new VexMoveOp("VMOVUPS", P_, M_0F, WIG, 0x10, 0x11);
1108 public static final VexMoveOp VMOVUPD = new VexMoveOp("VMOVUPD", P_66, M_0F, WIG, 0x10, 0x11);
1109 public static final VexMoveOp VMOVSS = new VexMoveOp("VMOVSS", P_F3, M_0F, WIG, 0x10, 0x11);
1110 public static final VexMoveOp VMOVSD = new VexMoveOp("VMOVSD", P_F2, M_0F, WIG, 0x10, 0x11);
1111 public static final VexMoveOp VMOVD = new VexMoveOp("VMOVD", P_66, M_0F, W0, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU);
1112 public static final VexMoveOp VMOVQ = new VexMoveOp("VMOVQ", P_66, M_0F, W1, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU);
1113 // @formatter:on
1114
1115 private final int opReverse;
1116
1117 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) {
1118 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1);
1119 }
1120
1121 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) {
1122 super(opcode, pp, mmmmm, w, op, assertion);
1123 this.opReverse = opReverse;
1124 }
1125
1126 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src) {
1127 assert assertion.check((AMD64) asm.target.arch, size, src, null, null);
1128 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false);
1129 asm.emitByte(opReverse);
1130 asm.emitOperandHelper(src, dst, 0);
1131 }
1132
1133 public void emitReverse(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1134 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst);
1135 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false);
1136 asm.emitByte(opReverse);
1137 asm.emitModRM(src, dst);
1138 }
1139 }
1140
1141 public interface VexRRIOp {
1142 void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8);
1143 }
1144
1145 /**
1146 * VEX-encoded instructions with an operand order of RMI.
1147 */
1148 public static final class VexRMIOp extends VexOp implements VexRRIOp {
1149 // @formatter:off
1150 public static final VexRMIOp VPERMQ = new VexRMIOp("VPERMQ", P_66, M_0F3A, W1, 0x00, VEXOpAssertion.AVX2_256ONLY);
1151 public static final VexRMIOp VPSHUFLW = new VexRMIOp("VPSHUFLW", P_F2, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2);
1152 public static final VexRMIOp VPSHUFHW = new VexRMIOp("VPSHUFHW", P_F3, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2);
1153 public static final VexRMIOp VPSHUFD = new VexRMIOp("VPSHUFD", P_66, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2);
1154 // @formatter:on
1155
1156 private VexRMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1157 super(opcode, pp, mmmmm, w, op, assertion);
1158 }
1159
1160 @Override
1161 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1162 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src);
1163 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false);
1164 asm.emitByte(op);
1165 asm.emitModRM(dst, src);
1166 asm.emitByte(imm8);
1167 }
1168
1169 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src, int imm8) {
1170 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1171 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false);
1172 asm.emitByte(op);
1173 asm.emitOperandHelper(dst, src, 1);
1174 asm.emitByte(imm8);
1175 }
1176 }
1177
1178 /**
1179 * VEX-encoded instructions with an operand order of MRI.
1180 */
1181 public static final class VexMRIOp extends VexOp implements VexRRIOp {
1182 // @formatter:off
1183 public static final VexMRIOp VEXTRACTF128 = new VexMRIOp("VEXTRACTF128", P_66, M_0F3A, W0, 0x19, VEXOpAssertion.AVX1_256ONLY);
1184 public static final VexMRIOp VEXTRACTI128 = new VexMRIOp("VEXTRACTI128", P_66, M_0F3A, W0, 0x39, VEXOpAssertion.AVX2_256ONLY);
1185 public static final VexMRIOp VPEXTRB = new VexMRIOp("VPEXTRB", P_66, M_0F3A, W0, 0x14, VEXOpAssertion.XMM_CPU);
1186 public static final VexMRIOp VPEXTRW = new VexMRIOp("VPEXTRW", P_66, M_0F3A, W0, 0x15, VEXOpAssertion.XMM_CPU);
1187 public static final VexMRIOp VPEXTRD = new VexMRIOp("VPEXTRD", P_66, M_0F3A, W0, 0x16, VEXOpAssertion.XMM_CPU);
1188 public static final VexMRIOp VPEXTRQ = new VexMRIOp("VPEXTRQ", P_66, M_0F3A, W1, 0x16, VEXOpAssertion.XMM_CPU);
1189 // @formatter:on
1190
1191 private VexMRIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1192 super(opcode, pp, mmmmm, w, op, assertion);
1193 }
1194
1195 @Override
1196 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1197 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst);
1198 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false);
1199 asm.emitByte(op);
1200 asm.emitModRM(src, dst);
1201 asm.emitByte(imm8);
1202 }
1203
1204 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src, int imm8) {
1205 assert assertion.check((AMD64) asm.target.arch, size, src, null, null);
1206 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false);
1207 asm.emitByte(op);
1208 asm.emitOperandHelper(src, dst, 1);
1209 asm.emitByte(imm8);
1210 }
1211 }
1212
1213 /**
1214 * VEX-encoded instructions with an operand order of RVMR.
1215 */
1216 public static class VexRVMROp extends VexOp {
1217 // @formatter:off
1218 public static final VexRVMROp VPBLENDVB = new VexRVMROp("VPBLENDVB", P_66, M_0F3A, W0, 0x4C, VEXOpAssertion.AVX1_2);
1219 public static final VexRVMROp VPBLENDVPS = new VexRVMROp("VPBLENDVPS", P_66, M_0F3A, W0, 0x4A, VEXOpAssertion.AVX1);
1220 public static final VexRVMROp VPBLENDVPD = new VexRVMROp("VPBLENDVPD", P_66, M_0F3A, W0, 0x4B, VEXOpAssertion.AVX1);
1221 // @formatter:on
1222
1223 protected VexRVMROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1224 super(opcode, pp, mmmmm, w, op, assertion);
1225 }
1226
1227 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, Register src2) {
1228 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, src2);
1229 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1230 asm.emitByte(op);
1231 asm.emitModRM(dst, src2);
1232 asm.emitByte(mask.encoding() << 4);
1233 }
1234
1235 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, AMD64Address src2) {
1236 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, null);
1237 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1238 asm.emitByte(op);
1239 asm.emitOperandHelper(dst, src2, 0);
1240 asm.emitByte(mask.encoding() << 4);
1241 }
1242 }
1243
1244 /**
1245 * VEX-encoded instructions with an operand order of RVM.
1246 */
1247 public static class VexRVMOp extends VexOp {
1248 // @formatter:off
1249 public static final VexRVMOp VANDPS = new VexRVMOp("VANDPS", P_, M_0F, WIG, 0x54);
1250 public static final VexRVMOp VANDPD = new VexRVMOp("VANDPD", P_66, M_0F, WIG, 0x54);
1251 public static final VexRVMOp VANDNPS = new VexRVMOp("VANDNPS", P_, M_0F, WIG, 0x55);
1252 public static final VexRVMOp VANDNPD = new VexRVMOp("VANDNPD", P_66, M_0F, WIG, 0x55);
1253 public static final VexRVMOp VORPS = new VexRVMOp("VORPS", P_, M_0F, WIG, 0x56);
1254 public static final VexRVMOp VORPD = new VexRVMOp("VORPD", P_66, M_0F, WIG, 0x56);
1255 public static final VexRVMOp VXORPS = new VexRVMOp("VXORPS", P_, M_0F, WIG, 0x57);
1256 public static final VexRVMOp VXORPD = new VexRVMOp("VXORPD", P_66, M_0F, WIG, 0x57);
1257 public static final VexRVMOp VADDPS = new VexRVMOp("VADDPS", P_, M_0F, WIG, 0x58);
1258 public static final VexRVMOp VADDPD = new VexRVMOp("VADDPD", P_66, M_0F, WIG, 0x58);
1259 public static final VexRVMOp VADDSS = new VexRVMOp("VADDSS", P_F3, M_0F, WIG, 0x58);
1307 public static final VexRVMOp VPCMPEQD = new VexRVMOp("VPCMPEQD", P_66, M_0F, WIG, 0x76, VEXOpAssertion.AVX1_2);
1308 public static final VexRVMOp VPCMPEQQ = new VexRVMOp("VPCMPEQQ", P_66, M_0F38, WIG, 0x29, VEXOpAssertion.AVX1_2);
1309 public static final VexRVMOp VPCMPGTB = new VexRVMOp("VPCMPGTB", P_66, M_0F, WIG, 0x64, VEXOpAssertion.AVX1_2);
1310 public static final VexRVMOp VPCMPGTW = new VexRVMOp("VPCMPGTW", P_66, M_0F, WIG, 0x65, VEXOpAssertion.AVX1_2);
1311 public static final VexRVMOp VPCMPGTD = new VexRVMOp("VPCMPGTD", P_66, M_0F, WIG, 0x66, VEXOpAssertion.AVX1_2);
1312 public static final VexRVMOp VPCMPGTQ = new VexRVMOp("VPCMPGTQ", P_66, M_0F38, WIG, 0x37, VEXOpAssertion.AVX1_2);
1313 public static final VexRVMOp VFMADD231SS = new VexRVMOp("VFMADD231SS", P_66, M_0F38, W0, 0xB9, VEXOpAssertion.FMA);
1314 public static final VexRVMOp VFMADD231SD = new VexRVMOp("VFMADD231SD", P_66, M_0F38, W1, 0xB9, VEXOpAssertion.FMA);
1315 // @formatter:on
1316
1317 private VexRVMOp(String opcode, int pp, int mmmmm, int w, int op) {
1318 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1319 }
1320
1321 protected VexRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1322 super(opcode, pp, mmmmm, w, op, assertion);
1323 }
1324
1325 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1326 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1327 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1328 asm.emitByte(op);
1329 asm.emitModRM(dst, src2);
1330 }
1331
1332 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) {
1333 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1334 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1335 asm.emitByte(op);
1336 asm.emitOperandHelper(dst, src2, 0);
1337 }
1338 }
1339
1340 public static final class VexGeneralPurposeRVMOp extends VexRVMOp {
1341 // @formatter:off
1342 public static final VexGeneralPurposeRVMOp ANDN = new VexGeneralPurposeRVMOp("ANDN", P_, M_0F38, WIG, 0xF2, VEXOpAssertion.BMI1);
1343 public static final VexGeneralPurposeRVMOp MULX = new VexGeneralPurposeRVMOp("MULX", P_F2, M_0F38, WIG, 0xF6, VEXOpAssertion.BMI2);
1344 public static final VexGeneralPurposeRVMOp PDEP = new VexGeneralPurposeRVMOp("PDEP", P_F2, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1345 public static final VexGeneralPurposeRVMOp PEXT = new VexGeneralPurposeRVMOp("PEXT", P_F3, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1346 // @formatter:on
1347
1348 private VexGeneralPurposeRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1349 super(opcode, pp, mmmmm, w, op, assertion);
1350 }
1351
1352 @Override
1353 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1354 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, src2, null);
1355 assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1356 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1357 asm.emitByte(op);
1358 asm.emitModRM(dst, src2);
1359 }
1360
1361 @Override
1362 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) {
1363 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, null, null);
1364 assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1365 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1366 asm.emitByte(op);
1367 asm.emitOperandHelper(dst, src2, 0);
1368 }
1369 }
1370
1371 public static final class VexGeneralPurposeRMVOp extends VexOp {
1372 // @formatter:off
1373 public static final VexGeneralPurposeRMVOp BEXTR = new VexGeneralPurposeRMVOp("BEXTR", P_, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI1);
1374 public static final VexGeneralPurposeRMVOp BZHI = new VexGeneralPurposeRMVOp("BZHI", P_, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1375 public static final VexGeneralPurposeRMVOp SARX = new VexGeneralPurposeRMVOp("SARX", P_F3, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1376 public static final VexGeneralPurposeRMVOp SHRX = new VexGeneralPurposeRMVOp("SHRX", P_F2, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1377 public static final VexGeneralPurposeRMVOp SHLX = new VexGeneralPurposeRMVOp("SHLX", P_66, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1378 // @formatter:on
1379
1380 private VexGeneralPurposeRMVOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1381 super(opcode, pp, mmmmm, w, op, assertion);
1382 }
1383
1384 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1385 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, src1, null);
1386 assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1387 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1388 asm.emitByte(op);
1389 asm.emitModRM(dst, src1);
1390 }
1391
1392 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src1, Register src2) {
1393 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, null, null);
1394 assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1395 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1396 asm.emitByte(op);
1397 asm.emitOperandHelper(dst, src1, 0);
1398 }
1399 }
1400
1401 public static final class VexGeneralPurposeRMOp extends VexRMOp {
1402 // @formatter:off
1403 public static final VexGeneralPurposeRMOp BLSI = new VexGeneralPurposeRMOp("BLSI", P_, M_0F38, WIG, 0xF3, 3, VEXOpAssertion.BMI1);
1404 public static final VexGeneralPurposeRMOp BLSMSK = new VexGeneralPurposeRMOp("BLSMSK", P_, M_0F38, WIG, 0xF3, 2, VEXOpAssertion.BMI1);
1405 public static final VexGeneralPurposeRMOp BLSR = new VexGeneralPurposeRMOp("BLSR", P_, M_0F38, WIG, 0xF3, 1, VEXOpAssertion.BMI1);
1406 // @formatter:on
1407 private final int ext;
1408
1409 private VexGeneralPurposeRMOp(String opcode, int pp, int mmmmm, int w, int op, int ext, VEXOpAssertion assertion) {
1410 super(opcode, pp, mmmmm, w, op, assertion);
1411 this.ext = ext;
1412 }
1413
1414 @Override
1415 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1416 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1417 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1418 asm.emitByte(op);
1419 asm.emitModRM(ext, src);
1420 }
1421
1422 @Override
1423 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) {
1424 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1425 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false);
1426 asm.emitByte(op);
1427 asm.emitOperandHelper(ext, src, 0);
1428 }
1429 }
1430
1431 /**
1432 * VEX-encoded shift instructions with an operand order of either RVM or VMI.
1433 */
1434 public static final class VexShiftOp extends VexRVMOp implements VexRRIOp {
1435 // @formatter:off
1436 public static final VexShiftOp VPSRLW = new VexShiftOp("VPSRLW", P_66, M_0F, WIG, 0xD1, 0x71, 2);
1437 public static final VexShiftOp VPSRLD = new VexShiftOp("VPSRLD", P_66, M_0F, WIG, 0xD2, 0x72, 2);
1438 public static final VexShiftOp VPSRLQ = new VexShiftOp("VPSRLQ", P_66, M_0F, WIG, 0xD3, 0x73, 2);
1439 public static final VexShiftOp VPSRAW = new VexShiftOp("VPSRAW", P_66, M_0F, WIG, 0xE1, 0x71, 4);
1440 public static final VexShiftOp VPSRAD = new VexShiftOp("VPSRAD", P_66, M_0F, WIG, 0xE2, 0x72, 4);
1441 public static final VexShiftOp VPSLLW = new VexShiftOp("VPSLLW", P_66, M_0F, WIG, 0xF1, 0x71, 6);
1442 public static final VexShiftOp VPSLLD = new VexShiftOp("VPSLLD", P_66, M_0F, WIG, 0xF2, 0x72, 6);
1443 public static final VexShiftOp VPSLLQ = new VexShiftOp("VPSLLQ", P_66, M_0F, WIG, 0xF3, 0x73, 6);
1444 // @formatter:on
1445
1446 private final int immOp;
1447 private final int r;
1448
1449 private VexShiftOp(String opcode, int pp, int mmmmm, int w, int op, int immOp, int r) {
1450 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1_2);
1451 this.immOp = immOp;
1452 this.r = r;
1453 }
1454
1455 @Override
1456 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1457 assert assertion.check((AMD64) asm.target.arch, size, null, dst, src);
1458 asm.vexPrefix(null, dst, src, size, pp, mmmmm, w, false);
1459 asm.emitByte(immOp);
1460 asm.emitModRM(r, src);
1461 asm.emitByte(imm8);
1462 }
1463 }
1464
1465 public static final class VexMaskMoveOp extends VexOp {
1466 // @formatter:off
1467 public static final VexMaskMoveOp VMASKMOVPS = new VexMaskMoveOp("VMASKMOVPS", P_66, M_0F38, W0, 0x2C, 0x2E);
1468 public static final VexMaskMoveOp VMASKMOVPD = new VexMaskMoveOp("VMASKMOVPD", P_66, M_0F38, W0, 0x2D, 0x2F);
1469 public static final VexMaskMoveOp VPMASKMOVD = new VexMaskMoveOp("VPMASKMOVD", P_66, M_0F38, W0, 0x8C, 0x8E, VEXOpAssertion.AVX2);
1470 public static final VexMaskMoveOp VPMASKMOVQ = new VexMaskMoveOp("VPMASKMOVQ", P_66, M_0F38, W1, 0x8C, 0x8E, VEXOpAssertion.AVX2);
1471 // @formatter:on
1472
1473 private final int opReverse;
1474
1475 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) {
1476 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1);
1477 }
1478
1479 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) {
1480 super(opcode, pp, mmmmm, w, op, assertion);
1481 this.opReverse = opReverse;
1482 }
1483
1484 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, AMD64Address src) {
1485 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, null);
1486 asm.vexPrefix(dst, mask, src, size, pp, mmmmm, w, false);
1487 asm.emitByte(op);
1488 asm.emitOperandHelper(dst, src, 0);
1489 }
1490
1491 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register mask, Register src) {
1492 assert assertion.check((AMD64) asm.target.arch, size, src, mask, null);
1493 asm.vexPrefix(src, mask, dst, size, pp, mmmmm, w, false);
1494 asm.emitByte(opReverse);
1495 asm.emitOperandHelper(src, dst, 0);
1496 }
1497 }
1498
1499 /**
1500 * VEX-encoded instructions with an operand order of RVMI.
1501 */
1502 public static final class VexRVMIOp extends VexOp {
1503 // @formatter:off
1504 public static final VexRVMIOp VSHUFPS = new VexRVMIOp("VSHUFPS", P_, M_0F, WIG, 0xC6);
1505 public static final VexRVMIOp VSHUFPD = new VexRVMIOp("VSHUFPD", P_66, M_0F, WIG, 0xC6);
1506 public static final VexRVMIOp VINSERTF128 = new VexRVMIOp("VINSERTF128", P_66, M_0F3A, W0, 0x18, VEXOpAssertion.AVX1_256ONLY);
1507 public static final VexRVMIOp VINSERTI128 = new VexRVMIOp("VINSERTI128", P_66, M_0F3A, W0, 0x38, VEXOpAssertion.AVX2_256ONLY);
1508 // @formatter:on
1509
1510 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op) {
1511 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1512 }
1513
1514 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1515 super(opcode, pp, mmmmm, w, op, assertion);
1516 }
1517
1518 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, int imm8) {
1519 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1520 assert (imm8 & 0xFF) == imm8;
1521 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1522 asm.emitByte(op);
1523 asm.emitModRM(dst, src2);
1524 asm.emitByte(imm8);
1525 }
1526
1527 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, int imm8) {
1528 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1529 assert (imm8 & 0xFF) == imm8;
1530 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1531 asm.emitByte(op);
1532 asm.emitOperandHelper(dst, src2, 1);
1533 asm.emitByte(imm8);
1534 }
1535 }
1536
1537 /**
1538 * VEX-encoded comparison operation with an operand order of RVMI. The immediate operand is a
1539 * comparison operator.
1540 */
1541 public static final class VexFloatCompareOp extends VexOp {
1542 // @formatter:off
1543 public static final VexFloatCompareOp VCMPPS = new VexFloatCompareOp("VCMPPS", P_, M_0F, WIG, 0xC2);
1544 public static final VexFloatCompareOp VCMPPD = new VexFloatCompareOp("VCMPPD", P_66, M_0F, WIG, 0xC2);
1545 public static final VexFloatCompareOp VCMPSS = new VexFloatCompareOp("VCMPSS", P_F2, M_0F, WIG, 0xC2);
1546 public static final VexFloatCompareOp VCMPSD = new VexFloatCompareOp("VCMPSD", P_F2, M_0F, WIG, 0xC2);
1547 // @formatter:on
1548
1549 public enum Predicate {
1550 EQ_OQ(0x00),
1551 LT_OS(0x01),
1552 LE_OS(0x02),
1614 return LT_OQ;
1615 case LE:
1616 return LE_OQ;
1617 case GT:
1618 return GT_OQ;
1619 case GE:
1620 return GE_OQ;
1621 default:
1622 throw GraalError.shouldNotReachHere();
1623 }
1624 }
1625 }
1626 }
1627
1628 private VexFloatCompareOp(String opcode, int pp, int mmmmm, int w, int op) {
1629 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1630 }
1631
1632 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, Predicate p) {
1633 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1634 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1635 asm.emitByte(op);
1636 asm.emitModRM(dst, src2);
1637 asm.emitByte(p.imm8);
1638 }
1639
1640 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, Predicate p) {
1641 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1642 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false);
1643 asm.emitByte(op);
1644 asm.emitOperandHelper(dst, src2, 1);
1645 asm.emitByte(p.imm8);
1646 }
1647 }
1648
1649 public final void addl(AMD64Address dst, int imm32) {
1650 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1651 }
1652
1653 public final void addl(Register dst, int imm32) {
1654 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1655 }
1656
1657 public final void addl(Register dst, Register src) {
1658 ADD.rmOp.emit(this, DWORD, dst, src);
1659 }
1660
1661 public final void addpd(Register dst, Register src) {
1662 SSEOp.ADD.emit(this, PD, dst, src);
1663 }
1664
3731
3732 public void lfence() {
3733 emitByte(0x0f);
3734 emitByte(0xae);
3735 emitByte(0xe8);
3736 }
3737
3738 public final void vptest(Register dst, Register src) {
3739 VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src);
3740 }
3741
3742 public final void vpxor(Register dst, Register nds, Register src) {
3743 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
3744 }
3745
3746 public final void vpxor(Register dst, Register nds, AMD64Address src) {
3747 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
3748 }
3749
3750 public final void vmovdqu(Register dst, AMD64Address src) {
3751 VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src);
3752 }
3753
3754 public final void vmovdqu(AMD64Address dst, Register src) {
3755 assert inRC(XMM, src);
3756 VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src);
3757 }
3758
3759 public final void vpmovzxbw(Register dst, AMD64Address src) {
3760 assert supports(CPUFeature.AVX2);
3761 VexRMOp.VPMOVZXBW.emit(this, AVXSize.YMM, dst, src);
3762 }
3763
3764 public final void vzeroupper() {
3765 emitVEX(L128, P_, M_0F, W0, 0, 0, true);
3766 emitByte(0x77);
3767 }
3768
3769 // Insn: KORTESTD k1, k2
3770
3771 // This instruction produces ZF or CF flags
3772 public final void kortestd(Register src1, Register src2) {
3773 assert supports(CPUFeature.AVX512BW);
3774 assert inRC(MASK, src1) && inRC(MASK, src2);
3775 // Code: VEX.L0.66.0F.W1 98 /r
3776 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true);
3777 emitByte(0x98);
3778 emitModRM(src1, src2);
3779 }
3780
3781 // Insn: KORTESTQ k1, k2
3782
3783 // This instruction produces ZF or CF flags
3784 public final void kortestq(Register src1, Register src2) {
3785 assert supports(CPUFeature.AVX512BW);
3786 assert inRC(MASK, src1) && inRC(MASK, src2);
3787 // Code: VEX.L0.0F.W1 98 /r
3788 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_, M_0F, W1, true);
3789 emitByte(0x98);
3790 emitModRM(src1, src2);
3791 }
3792
3793 public final void kmovd(Register dst, Register src) {
3794 assert supports(CPUFeature.AVX512BW);
3795 assert inRC(MASK, dst) || inRC(CPU, dst);
3796 assert inRC(MASK, src) || inRC(CPU, src);
3797 assert !(inRC(CPU, dst) && inRC(CPU, src));
3798
3799 if (inRC(MASK, dst)) {
3800 if (inRC(MASK, src)) {
3801 // kmovd(KRegister dst, KRegister src):
3802 // Insn: KMOVD k1, k2/m32
3803 // Code: VEX.L0.66.0F.W1 90 /r
3804 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_66, M_0F, W1, true);
3805 emitByte(0x90);
3806 emitModRM(dst, src);
3807 } else {
3808 // kmovd(KRegister dst, Register src)
3809 // Insn: KMOVD k1, r32
3810 // Code: VEX.L0.F2.0F.W0 92 /r
3811 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true);
3812 emitByte(0x92);
3813 emitModRM(dst, src);
3814 }
3815 } else {
3816 if (inRC(MASK, src)) {
3817 // kmovd(Register dst, KRegister src)
3818 // Insn: KMOVD r32, k1
3819 // Code: VEX.L0.F2.0F.W0 93 /r
3820 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true);
3821 emitByte(0x93);
3822 emitModRM(dst, src);
3823 } else {
3824 throw GraalError.shouldNotReachHere();
3825 }
3826 }
3827 }
3828
3829 public final void kmovq(Register dst, Register src) {
3830 assert supports(CPUFeature.AVX512BW);
3831 assert inRC(MASK, dst) || inRC(CPU, dst);
3832 assert inRC(MASK, src) || inRC(CPU, src);
3833 assert !(inRC(CPU, dst) && inRC(CPU, src));
3834
3835 if (inRC(MASK, dst)) {
3836 if (inRC(MASK, src)) {
3837 // kmovq(KRegister dst, KRegister src):
3838 // Insn: KMOVQ k1, k2/m64
3839 // Code: VEX.L0.0F.W1 90 /r
3840 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_, M_0F, W1, true);
3841 emitByte(0x90);
3842 emitModRM(dst, src);
3843 } else {
3844 // kmovq(KRegister dst, Register src)
3845 // Insn: KMOVQ k1, r64
3846 // Code: VEX.L0.F2.0F.W1 92 /r
3847 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true);
3848 emitByte(0x92);
3849 emitModRM(dst, src);
3850 }
3851 } else {
3852 if (inRC(MASK, src)) {
3853 // kmovq(Register dst, KRegister src)
3854 // Insn: KMOVQ r64, k1
3855 // Code: VEX.L0.F2.0F.W1 93 /r
3856 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true);
3857 emitByte(0x93);
3858 emitModRM(dst, src);
3859 } else {
3860 throw GraalError.shouldNotReachHere();
3861 }
3862 }
3863 }
3864
3865 // Insn: KTESTD k1, k2
3866
3867 public final void ktestd(Register src1, Register src2) {
3868 assert supports(CPUFeature.AVX512BW);
3869 assert inRC(MASK, src1) && inRC(MASK, src2);
3870 // Code: VEX.L0.66.0F.W1 99 /r
3871 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true);
3872 emitByte(0x99);
3873 emitModRM(src1, src2);
3874 }
3875
3876 public final void evmovdqu64(Register dst, AMD64Address src) {
3877 assert supports(CPUFeature.AVX512F);
3878 assert inRC(XMM, dst);
3879 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F3, M_0F, W1, Z0, B0);
3880 emitByte(0x6F);
3881 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3882 }
3883
3884 // Insn: VPMOVZXBW zmm1, m256
3885
3886 public final void evpmovzxbw(Register dst, AMD64Address src) {
3887 assert supports(CPUFeature.AVX512BW);
3888 assert inRC(XMM, dst);
3889 // Code: EVEX.512.66.0F38.WIG 30 /r
3890 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0);
3891 emitByte(0x30);
3892 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
3893 }
3894
3895 public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src) {
3896 assert supports(CPUFeature.AVX512BW);
3897 assert inRC(MASK, kdst) && inRC(XMM, nds);
3898 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F, WIG, Z0, B0);
3899 emitByte(0x74);
3900 emitEVEXOperandHelper(kdst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3901 }
3902
3903 // Insn: VMOVDQU16 zmm1 {k1}{z}, zmm2/m512
3904 // -----
3905 // Insn: VMOVDQU16 zmm1, m512
3906
3907 public final void evmovdqu16(Register dst, AMD64Address src) {
3908 assert supports(CPUFeature.AVX512BW);
3909 assert inRC(XMM, dst);
3910 // Code: EVEX.512.F2.0F.W1 6F /r
3911 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
3912 emitByte(0x6F);
3913 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3914 }
3915
3916 // Insn: VMOVDQU16 zmm1, k1:z, m512
3917
3918 public final void evmovdqu16(Register dst, Register mask, AMD64Address src) {
3919 assert supports(CPUFeature.AVX512BW);
3920 assert inRC(XMM, dst) && inRC(MASK, mask);
3921 // Code: EVEX.512.F2.0F.W1 6F /r
3922 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z1, B0);
3923 emitByte(0x6F);
3924 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3925 }
3926
3927 // Insn: VMOVDQU16 zmm2/m512 {k1}{z}, zmm1
3928 // -----
3929 // Insn: VMOVDQU16 m512, zmm1
3930
3931 public final void evmovdqu16(AMD64Address dst, Register src) {
3932 assert supports(CPUFeature.AVX512BW);
3933 assert inRC(XMM, src);
3934 // Code: EVEX.512.F2.0F.W1 7F /r
3935 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
3936 emitByte(0x7F);
3937 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3938 }
3939
3940 // Insn: VMOVDQU16 m512, k1, zmm1
3941
3942 public final void evmovdqu16(AMD64Address dst, Register mask, Register src) {
3943 assert supports(CPUFeature.AVX512BW);
3944 assert inRC(MASK, mask) && inRC(XMM, src);
3945 // Code: EVEX.512.F2.0F.W1 7F /r
3946 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
3947 emitByte(0x7F);
3948 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3949 }
3950
3951 // Insn: VPBROADCASTW zmm1 {k1}{z}, reg
3952 // -----
3953 // Insn: VPBROADCASTW zmm1, reg
3954
3955 public final void evpbroadcastw(Register dst, Register src) {
3956 assert supports(CPUFeature.AVX512BW);
3957 assert inRC(XMM, dst) && inRC(CPU, src);
3958 // Code: EVEX.512.66.0F38.W0 7B /r
3959 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, W0, Z0, B0);
3960 emitByte(0x7B);
3961 emitModRM(dst, src);
3962 }
3963
3964 // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8
3965 // -----
3966 // Insn: VPCMPUW k1, zmm2, zmm3, imm8
3967
3968 public final void evpcmpuw(Register kdst, Register nds, Register src, int vcc) {
3983 assert supports(CPUFeature.AVX512BW);
3984 assert inRC(MASK, kdst) && inRC(MASK, mask);
3985 assert inRC(XMM, nds) && inRC(XMM, src);
3986 // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib
3987 evexPrefix(kdst, mask, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0);
3988 emitByte(0x3E);
3989 emitModRM(kdst, src);
3990 emitByte(vcc);
3991 }
3992
3993 // Insn: VPMOVWB ymm1/m256 {k1}{z}, zmm2
3994 // -----
3995 // Insn: VPMOVWB m256, zmm2
3996
3997 public final void evpmovwb(AMD64Address dst, Register src) {
3998 assert supports(CPUFeature.AVX512BW);
3999 assert inRC(XMM, src);
4000 // Code: EVEX.512.F3.0F38.W0 30 /r
4001 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0);
4002 emitByte(0x30);
4003 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
4004 }
4005
4006 // Insn: VPMOVWB m256, k1, zmm2
4007
4008 public final void evpmovwb(AMD64Address dst, Register mask, Register src) {
4009 assert supports(CPUFeature.AVX512BW);
4010 assert inRC(MASK, mask) && inRC(XMM, src);
4011 // Code: EVEX.512.F3.0F38.W0 30 /r
4012 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0);
4013 emitByte(0x30);
4014 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
4015 }
4016
4017 // Insn: VPMOVZXBW zmm1 {k1}{z}, ymm2/m256
4018 // -----
4019 // Insn: VPMOVZXBW zmm1, k1, m256
4020
4021 public final void evpmovzxbw(Register dst, Register mask, AMD64Address src) {
4022 assert supports(CPUFeature.AVX512BW);
4023 assert inRC(MASK, mask) && inRC(XMM, dst);
4024 // Code: EVEX.512.66.0F38.WIG 30 /r
4025 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0);
4026 emitByte(0x30);
4027 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
4028 }
4029
4030 }
|
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24
25 package org.graalvm.compiler.asm.amd64;
26
27 import static jdk.vm.ci.amd64.AMD64.CPU;
28 import static jdk.vm.ci.amd64.AMD64.MASK;
29 import static jdk.vm.ci.amd64.AMD64.XMM;
30 import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX512BW;
31 import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX512CD;
32 import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX512DQ;
33 import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX512F;
34 import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX512VL;
35 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD;
36 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop;
37 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseIntelNops;
38 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop;
39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD;
40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND;
41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP;
42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR;
43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB;
44 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB;
45 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR;
46 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC;
47 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC;
48 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG;
49 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT;
50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0;
51 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0;
52 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1;
53 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.BYTE;
54 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.DWORD;
55 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PD;
56 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PS;
57 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.QWORD;
58 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SD;
59 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SS;
60 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.WORD;
61 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128;
62 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256;
63 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L512;
64 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ;
65 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F;
66 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38;
67 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A;
68 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_;
69 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66;
70 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2;
71 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3;
72 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0;
73 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1;
74 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG;
75 import static org.graalvm.compiler.core.common.NumUtil.isByte;
76 import static org.graalvm.compiler.core.common.NumUtil.isInt;
77 import static org.graalvm.compiler.core.common.NumUtil.isShiftCount;
78 import static org.graalvm.compiler.core.common.NumUtil.isUByte;
79
80 import java.util.EnumSet;
81
82 import org.graalvm.compiler.asm.Label;
83 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
884 public static final AMD64Shift ROL = new AMD64Shift("ROL", 0);
885 public static final AMD64Shift ROR = new AMD64Shift("ROR", 1);
886 public static final AMD64Shift RCL = new AMD64Shift("RCL", 2);
887 public static final AMD64Shift RCR = new AMD64Shift("RCR", 3);
888 public static final AMD64Shift SHL = new AMD64Shift("SHL", 4);
889 public static final AMD64Shift SHR = new AMD64Shift("SHR", 5);
890 public static final AMD64Shift SAR = new AMD64Shift("SAR", 7);
891 // @formatter:on
892
893 public final AMD64MOp m1Op;
894 public final AMD64MOp mcOp;
895 public final AMD64MIOp miOp;
896
897 private AMD64Shift(String opcode, int code) {
898 m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.WordOrLargerAssertion);
899 mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.WordOrLargerAssertion);
900 miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.WordOrLargerAssertion);
901 }
902 }
903
904 private enum EVEXFeatureAssertion {
905 AVX512F_ALL(EnumSet.of(AVX512F), EnumSet.of(AVX512F), EnumSet.of(AVX512F)),
906 AVX512F_128ONLY(EnumSet.of(AVX512F), null, null),
907 AVX512F_VL(EnumSet.of(AVX512F, AVX512VL), EnumSet.of(AVX512F, AVX512VL), EnumSet.of(AVX512F)),
908 AVX512CD_VL(EnumSet.of(AVX512F, AVX512CD, AVX512VL), EnumSet.of(AVX512F, AVX512CD, AVX512VL), EnumSet.of(AVX512F, AVX512CD)),
909 AVX512DQ_VL(EnumSet.of(AVX512F, AVX512DQ, AVX512VL), EnumSet.of(AVX512F, AVX512DQ, AVX512VL), EnumSet.of(AVX512F, AVX512DQ)),
910 AVX512BW_VL(EnumSet.of(AVX512F, AVX512BW, AVX512VL), EnumSet.of(AVX512F, AVX512BW, AVX512VL), EnumSet.of(AVX512F, AVX512BW));
911
912 private final EnumSet<CPUFeature> l128features;
913 private final EnumSet<CPUFeature> l256features;
914 private final EnumSet<CPUFeature> l512features;
915
916 EVEXFeatureAssertion(EnumSet<CPUFeature> l128features, EnumSet<CPUFeature> l256features, EnumSet<CPUFeature> l512features) {
917 this.l128features = l128features;
918 this.l256features = l256features;
919 this.l512features = l512features;
920 }
921
922 public boolean check(AMD64 arch, int l) {
923 switch (l) {
924 case L128:
925 assert l128features != null && arch.getFeatures().containsAll(l128features) : "emitting illegal 128 bit instruction";
926 break;
927 case L256:
928 assert l256features != null && arch.getFeatures().containsAll(l256features) : "emitting illegal 256 bit instruction";
929 break;
930 case L512:
931 assert l512features != null && arch.getFeatures().containsAll(l512features) : "emitting illegal 512 bit instruction";
932 break;
933 }
934 return true;
935 }
936
937 public boolean supports(EnumSet<CPUFeature> features, AVXSize avxSize) {
938 switch (avxSize) {
939 case XMM:
940 return l128features != null && features.containsAll(l128features);
941 case YMM:
942 return l256features != null && features.containsAll(l256features);
943 case ZMM:
944 return l512features != null && features.containsAll(l512features);
945 default:
946 throw GraalError.shouldNotReachHere();
947 }
948 }
949 }
950
951 private enum VEXOpAssertion {
952 AVX1(CPUFeature.AVX, CPUFeature.AVX, null),
953 AVX1_2(CPUFeature.AVX, CPUFeature.AVX2, null),
954 AVX2(CPUFeature.AVX2, CPUFeature.AVX2, null),
955 AVX1_128ONLY(CPUFeature.AVX, null, null),
956 AVX1_256ONLY(null, CPUFeature.AVX, null),
957 AVX2_256ONLY(null, CPUFeature.AVX2, null),
958 XMM_CPU(CPUFeature.AVX, null, null, XMM, null, CPU, null),
959 XMM_XMM_CPU(CPUFeature.AVX, null, null, XMM, XMM, CPU, null),
960 CPU_XMM(CPUFeature.AVX, null, null, CPU, null, XMM, null),
961 AVX1_2_CPU_XMM(CPUFeature.AVX, CPUFeature.AVX2, null, CPU, null, XMM, null),
962 BMI1(CPUFeature.BMI1, null, null, CPU, CPU, CPU, null),
963 BMI2(CPUFeature.BMI2, null, null, CPU, CPU, CPU, null),
964 FMA(CPUFeature.FMA, null, null, XMM, XMM, XMM, null),
965
966 XMM_CPU_AVX512F_128ONLY(CPUFeature.AVX, null, EVEXFeatureAssertion.AVX512F_128ONLY, XMM, null, CPU, null),
967 AVX1_AVX512F_ALL(CPUFeature.AVX, CPUFeature.AVX, EVEXFeatureAssertion.AVX512F_ALL),
968 AVX1_AVX512F_VL(CPUFeature.AVX, CPUFeature.AVX, EVEXFeatureAssertion.AVX512F_VL);
969
970 private final CPUFeature l128feature;
971 private final CPUFeature l256feature;
972 private final EVEXFeatureAssertion l512features;
973
974 private final RegisterCategory rCategory;
975 private final RegisterCategory vCategory;
976 private final RegisterCategory mCategory;
977 private final RegisterCategory imm8Category;
978
979 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature, EVEXFeatureAssertion l512features) {
980 this(l128feature, l256feature, l512features, XMM, XMM, XMM, XMM);
981 }
982
983 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature, EVEXFeatureAssertion l512features, RegisterCategory rCategory, RegisterCategory vCategory, RegisterCategory mCategory,
984 RegisterCategory imm8Category) {
985 this.l128feature = l128feature;
986 this.l256feature = l256feature;
987 this.l512features = l512features;
988 this.rCategory = rCategory;
989 this.vCategory = vCategory;
990 this.mCategory = mCategory;
991 this.imm8Category = imm8Category;
992 }
993
994 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m) {
995 return check(arch, getLFlag(size), r, v, m, null);
996 }
997
998 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m, Register imm8) {
999 return check(arch, getLFlag(size), r, v, m, imm8);
1000 }
1001
1002 public boolean check(AMD64 arch, int l, Register r, Register v, Register m, Register imm8) {
1003 if (isAVX512Register(r) || isAVX512Register(v) || isAVX512Register(m) || l == L512) {
1004 assert l512features != null && l512features.check(arch, l);
1005 } else if (l == L128) {
1006 assert l128feature != null && arch.getFeatures().contains(l128feature) : "emitting illegal 128 bit instruction";
1007 } else if (l == L256) {
1008 assert l256feature != null && arch.getFeatures().contains(l256feature) : "emitting illegal 256 bit instruction";
1009 }
1010 if (r != null) {
1011 assert r.getRegisterCategory().equals(rCategory);
1012 }
1013 if (v != null) {
1014 assert v.getRegisterCategory().equals(vCategory);
1015 }
1016 if (m != null) {
1017 assert m.getRegisterCategory().equals(mCategory);
1018 }
1019 if (imm8 != null) {
1020 assert imm8.getRegisterCategory().equals(imm8Category);
1021 }
1022 return true;
1023 }
1024
1025 public boolean supports(EnumSet<CPUFeature> features, AVXSize avxSize, boolean useZMMRegisters) {
1026 if (useZMMRegisters || avxSize == AVXSize.ZMM) {
1027 return l512features != null && l512features.supports(features, avxSize);
1028 } else if (avxSize == AVXSize.XMM) {
1029 return l128feature != null && features.contains(l128feature);
1030 } else if (avxSize == AVXSize.YMM) {
1031 return l256feature != null && features.contains(l256feature);
1032 }
1033 throw GraalError.shouldNotReachHere();
1034 }
1035 }
1036
1037 /**
1038 * Base class for VEX-encoded instructions.
1039 */
1040 public static class VexOp {
1041 protected final int pp;
1042 protected final int mmmmm;
1043 protected final int w;
1044 protected final int op;
1045
1046 private final String opcode;
1047 protected final VEXOpAssertion assertion;
1048
1049 protected final EVEXTuple evexTuple;
1050 protected final int wEvex;
1051
1052 protected VexOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex) {
1053 this.pp = pp;
1054 this.mmmmm = mmmmm;
1055 this.w = w;
1056 this.op = op;
1057 this.opcode = opcode;
1058 this.assertion = assertion;
1059 this.evexTuple = evexTuple;
1060 this.wEvex = wEvex;
1061 }
1062
1063 protected VexOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1064 this(opcode, pp, mmmmm, w, op, assertion, EVEXTuple.INVALID, WIG);
1065 }
1066
1067 public final boolean isSupported(AMD64Assembler vasm, AVXSize size) {
1068 return isSupported(vasm, size, false);
1069 }
1070
1071 public final boolean isSupported(AMD64Assembler vasm, AVXSize size, boolean useZMMRegisters) {
1072 return assertion.supports(((AMD64) vasm.target.arch).getFeatures(), size, useZMMRegisters);
1073 }
1074
1075 @Override
1076 public String toString() {
1077 return opcode;
1078 }
1079
1080 protected final int getDisp8Scale(boolean useEvex, AVXSize size) {
1081 return useEvex ? evexTuple.getDisp8ScalingFactor(size) : DEFAULT_DISP8_SCALE;
1082 }
1083
1084 }
1085
1086 /**
1087 * VEX-encoded instructions with an operand order of RM, but the M operand must be a register.
1088 */
1089 public static class VexRROp extends VexOp {
1090 // @formatter:off
1091 public static final VexRROp VMASKMOVDQU = new VexRROp("VMASKMOVDQU", P_66, M_0F, WIG, 0xF7, VEXOpAssertion.AVX1_128ONLY, EVEXTuple.INVALID, WIG);
1092 // @formatter:on
1093
1094 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex) {
1095 super(opcode, pp, mmmmm, w, op, assertion, evexTuple, wEvex);
1096 }
1097
1098 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1099 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src);
1100 assert op != 0x1A || op != 0x5A;
1101 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, wEvex, false);
1102 asm.emitByte(op);
1103 asm.emitModRM(dst, src);
1104 }
1105 }
1106
1107 /**
1108 * VEX-encoded instructions with an operand order of RM.
1109 */
1110 public static class VexRMOp extends VexRROp {
1111 // @formatter:off
1112 public static final VexRMOp VCVTTSS2SI = new VexRMOp("VCVTTSS2SI", P_F3, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM);
1113 public static final VexRMOp VCVTTSS2SQ = new VexRMOp("VCVTTSS2SQ", P_F3, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM);
1114 public static final VexRMOp VCVTTSD2SI = new VexRMOp("VCVTTSD2SI", P_F2, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM);
1115 public static final VexRMOp VCVTTSD2SQ = new VexRMOp("VCVTTSD2SQ", P_F2, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM);
1116 public static final VexRMOp VCVTPS2PD = new VexRMOp("VCVTPS2PD", P_, M_0F, WIG, 0x5A);
1117 public static final VexRMOp VCVTPD2PS = new VexRMOp("VCVTPD2PS", P_66, M_0F, WIG, 0x5A);
1118 public static final VexRMOp VCVTDQ2PS = new VexRMOp("VCVTDQ2PS", P_, M_0F, WIG, 0x5B);
1119 public static final VexRMOp VCVTTPS2DQ = new VexRMOp("VCVTTPS2DQ", P_F3, M_0F, WIG, 0x5B);
1120 public static final VexRMOp VCVTTPD2DQ = new VexRMOp("VCVTTPD2DQ", P_66, M_0F, WIG, 0xE6);
1121 public static final VexRMOp VCVTDQ2PD = new VexRMOp("VCVTDQ2PD", P_F3, M_0F, WIG, 0xE6);
1133 public static final VexRMOp VPMOVSXBQ = new VexRMOp("VPMOVSXBQ", P_66, M_0F38, WIG, 0x22);
1134 public static final VexRMOp VPMOVSXWD = new VexRMOp("VPMOVSXWD", P_66, M_0F38, WIG, 0x23);
1135 public static final VexRMOp VPMOVSXWQ = new VexRMOp("VPMOVSXWQ", P_66, M_0F38, WIG, 0x24);
1136 public static final VexRMOp VPMOVSXDQ = new VexRMOp("VPMOVSXDQ", P_66, M_0F38, WIG, 0x25);
1137 public static final VexRMOp VPMOVZXBW = new VexRMOp("VPMOVZXBW", P_66, M_0F38, WIG, 0x30);
1138 public static final VexRMOp VPMOVZXBD = new VexRMOp("VPMOVZXBD", P_66, M_0F38, WIG, 0x31);
1139 public static final VexRMOp VPMOVZXBQ = new VexRMOp("VPMOVZXBQ", P_66, M_0F38, WIG, 0x32);
1140 public static final VexRMOp VPMOVZXWD = new VexRMOp("VPMOVZXWD", P_66, M_0F38, WIG, 0x33);
1141 public static final VexRMOp VPMOVZXWQ = new VexRMOp("VPMOVZXWQ", P_66, M_0F38, WIG, 0x34);
1142 public static final VexRMOp VPMOVZXDQ = new VexRMOp("VPMOVZXDQ", P_66, M_0F38, WIG, 0x35);
1143 public static final VexRMOp VPTEST = new VexRMOp("VPTEST", P_66, M_0F38, WIG, 0x17);
1144 public static final VexRMOp VSQRTPD = new VexRMOp("VSQRTPD", P_66, M_0F, WIG, 0x51);
1145 public static final VexRMOp VSQRTPS = new VexRMOp("VSQRTPS", P_, M_0F, WIG, 0x51);
1146 public static final VexRMOp VSQRTSD = new VexRMOp("VSQRTSD", P_F2, M_0F, WIG, 0x51);
1147 public static final VexRMOp VSQRTSS = new VexRMOp("VSQRTSS", P_F3, M_0F, WIG, 0x51);
1148 public static final VexRMOp VUCOMISS = new VexRMOp("VUCOMISS", P_, M_0F, WIG, 0x2E);
1149 public static final VexRMOp VUCOMISD = new VexRMOp("VUCOMISD", P_66, M_0F, WIG, 0x2E);
1150 // @formatter:on
1151
1152 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op) {
1153 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1, EVEXTuple.INVALID, WIG);
1154 }
1155
1156 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1157 this(opcode, pp, mmmmm, w, op, assertion, EVEXTuple.INVALID, WIG);
1158 }
1159
1160 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex) {
1161 super(opcode, pp, mmmmm, w, op, assertion, evexTuple, wEvex);
1162 }
1163
1164 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) {
1165 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1166 boolean useEvex = asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, wEvex, false);
1167 asm.emitByte(op);
1168 asm.emitOperandHelper(dst, src, 0, getDisp8Scale(useEvex, size));
1169 }
1170 }
1171
1172 /**
1173 * VEX-encoded move instructions.
1174 * <p>
1175 * These instructions have two opcodes: op is the forward move instruction with an operand order
1176 * of RM, and opReverse is the reverse move instruction with an operand order of MR.
1177 */
1178 public static final class VexMoveOp extends VexRMOp {
1179 // @formatter:off
1180 public static final VexMoveOp VMOVDQA32 = new VexMoveOp("VMOVDQA32", P_66, M_0F, WIG, 0x6F, 0x7F, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, W0);
1181 public static final VexMoveOp VMOVDQA64 = new VexMoveOp("VMOVDQA64", P_66, M_0F, WIG, 0x6F, 0x7F, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, W1);
1182 public static final VexMoveOp VMOVDQU32 = new VexMoveOp("VMOVDQU32", P_F3, M_0F, WIG, 0x6F, 0x7F, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, W0);
1183 public static final VexMoveOp VMOVDQU64 = new VexMoveOp("VMOVDQU64", P_F3, M_0F, WIG, 0x6F, 0x7F, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, W1);
1184 public static final VexMoveOp VMOVAPS = new VexMoveOp("VMOVAPS", P_, M_0F, WIG, 0x28, 0x29, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, W0);
1185 public static final VexMoveOp VMOVAPD = new VexMoveOp("VMOVAPD", P_66, M_0F, WIG, 0x28, 0x29, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, W1);
1186 public static final VexMoveOp VMOVUPS = new VexMoveOp("VMOVUPS", P_, M_0F, WIG, 0x10, 0x11, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, W0);
1187 public static final VexMoveOp VMOVUPD = new VexMoveOp("VMOVUPD", P_66, M_0F, WIG, 0x10, 0x11, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, W1);
1188 public static final VexMoveOp VMOVSS = new VexMoveOp("VMOVSS", P_F3, M_0F, WIG, 0x10, 0x11, VEXOpAssertion.AVX1_AVX512F_ALL, EVEXTuple.T1S_32BIT, W0);
1189 public static final VexMoveOp VMOVSD = new VexMoveOp("VMOVSD", P_F2, M_0F, WIG, 0x10, 0x11, VEXOpAssertion.AVX1_AVX512F_ALL, EVEXTuple.T1S_64BIT, W1);
1190 public static final VexMoveOp VMOVD = new VexMoveOp("VMOVD", P_66, M_0F, W0, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU_AVX512F_128ONLY, EVEXTuple.T1F_32BIT, W0);
1191 public static final VexMoveOp VMOVQ = new VexMoveOp("VMOVQ", P_66, M_0F, W1, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU_AVX512F_128ONLY, EVEXTuple.T1S_64BIT, W1);
1192 // @formatter:on
1193
1194 private final int opReverse;
1195
1196 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) {
1197 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1, EVEXTuple.INVALID, WIG);
1198 }
1199
1200 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) {
1201 this(opcode, pp, mmmmm, w, op, opReverse, assertion, EVEXTuple.INVALID, WIG);
1202 }
1203
1204 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex) {
1205 super(opcode, pp, mmmmm, w, op, assertion, evexTuple, wEvex);
1206 this.opReverse = opReverse;
1207 }
1208
1209 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src) {
1210 assert assertion.check((AMD64) asm.target.arch, size, src, null, null);
1211 boolean useEvex = asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, wEvex, false);
1212 asm.emitByte(opReverse);
1213 asm.emitOperandHelper(src, dst, 0, getDisp8Scale(useEvex, size));
1214 }
1215
1216 public void emitReverse(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1217 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst);
1218 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, wEvex, false);
1219 asm.emitByte(opReverse);
1220 asm.emitModRM(src, dst);
1221 }
1222 }
1223
1224 public interface VexRRIOp {
1225 void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8);
1226 }
1227
1228 /**
1229 * VEX-encoded instructions with an operand order of RMI.
1230 */
1231 public static final class VexRMIOp extends VexOp implements VexRRIOp {
1232 // @formatter:off
1233 public static final VexRMIOp VPERMQ = new VexRMIOp("VPERMQ", P_66, M_0F3A, W1, 0x00, VEXOpAssertion.AVX2_256ONLY);
1234 public static final VexRMIOp VPSHUFLW = new VexRMIOp("VPSHUFLW", P_F2, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2);
1235 public static final VexRMIOp VPSHUFHW = new VexRMIOp("VPSHUFHW", P_F3, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2);
1236 public static final VexRMIOp VPSHUFD = new VexRMIOp("VPSHUFD", P_66, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2);
1237 // @formatter:on
1238
1239 private VexRMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1240 super(opcode, pp, mmmmm, w, op, assertion);
1241 }
1242
1243 @Override
1244 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1245 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src);
1246 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, wEvex, false);
1247 asm.emitByte(op);
1248 asm.emitModRM(dst, src);
1249 asm.emitByte(imm8);
1250 }
1251
1252 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src, int imm8) {
1253 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1254 boolean useEvex = asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, wEvex, false);
1255 asm.emitByte(op);
1256 asm.emitOperandHelper(dst, src, 1, getDisp8Scale(useEvex, size));
1257 asm.emitByte(imm8);
1258 }
1259 }
1260
1261 /**
1262 * VEX-encoded instructions with an operand order of MRI.
1263 */
1264 public static final class VexMRIOp extends VexOp implements VexRRIOp {
1265 // @formatter:off
1266 public static final VexMRIOp VEXTRACTF128 = new VexMRIOp("VEXTRACTF128", P_66, M_0F3A, W0, 0x19, VEXOpAssertion.AVX1_256ONLY);
1267 public static final VexMRIOp VEXTRACTI128 = new VexMRIOp("VEXTRACTI128", P_66, M_0F3A, W0, 0x39, VEXOpAssertion.AVX2_256ONLY);
1268 public static final VexMRIOp VPEXTRB = new VexMRIOp("VPEXTRB", P_66, M_0F3A, W0, 0x14, VEXOpAssertion.XMM_CPU);
1269 public static final VexMRIOp VPEXTRW = new VexMRIOp("VPEXTRW", P_66, M_0F3A, W0, 0x15, VEXOpAssertion.XMM_CPU);
1270 public static final VexMRIOp VPEXTRD = new VexMRIOp("VPEXTRD", P_66, M_0F3A, W0, 0x16, VEXOpAssertion.XMM_CPU);
1271 public static final VexMRIOp VPEXTRQ = new VexMRIOp("VPEXTRQ", P_66, M_0F3A, W1, 0x16, VEXOpAssertion.XMM_CPU);
1272 // @formatter:on
1273
1274 private VexMRIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1275 super(opcode, pp, mmmmm, w, op, assertion);
1276 }
1277
1278 @Override
1279 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1280 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst);
1281 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, wEvex, false);
1282 asm.emitByte(op);
1283 asm.emitModRM(src, dst);
1284 asm.emitByte(imm8);
1285 }
1286
1287 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src, int imm8) {
1288 assert assertion.check((AMD64) asm.target.arch, size, src, null, null);
1289 boolean useEvex = asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, wEvex, false);
1290 asm.emitByte(op);
1291 asm.emitOperandHelper(src, dst, 1, getDisp8Scale(useEvex, size));
1292 asm.emitByte(imm8);
1293 }
1294 }
1295
1296 /**
1297 * VEX-encoded instructions with an operand order of RVMR.
1298 */
1299 public static class VexRVMROp extends VexOp {
1300 // @formatter:off
1301 public static final VexRVMROp VPBLENDVB = new VexRVMROp("VPBLENDVB", P_66, M_0F3A, W0, 0x4C, VEXOpAssertion.AVX1_2);
1302 public static final VexRVMROp VPBLENDVPS = new VexRVMROp("VPBLENDVPS", P_66, M_0F3A, W0, 0x4A, VEXOpAssertion.AVX1);
1303 public static final VexRVMROp VPBLENDVPD = new VexRVMROp("VPBLENDVPD", P_66, M_0F3A, W0, 0x4B, VEXOpAssertion.AVX1);
1304 // @formatter:on
1305
1306 protected VexRVMROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1307 super(opcode, pp, mmmmm, w, op, assertion);
1308 }
1309
1310 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, Register src2) {
1311 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, src2);
1312 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false);
1313 asm.emitByte(op);
1314 asm.emitModRM(dst, src2);
1315 asm.emitByte(mask.encoding() << 4);
1316 }
1317
1318 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, AMD64Address src2) {
1319 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, null);
1320 boolean useEvex = asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false);
1321 asm.emitByte(op);
1322 asm.emitOperandHelper(dst, src2, 0, getDisp8Scale(useEvex, size));
1323 asm.emitByte(mask.encoding() << 4);
1324 }
1325 }
1326
1327 /**
1328 * VEX-encoded instructions with an operand order of RVM.
1329 */
1330 public static class VexRVMOp extends VexOp {
1331 // @formatter:off
1332 public static final VexRVMOp VANDPS = new VexRVMOp("VANDPS", P_, M_0F, WIG, 0x54);
1333 public static final VexRVMOp VANDPD = new VexRVMOp("VANDPD", P_66, M_0F, WIG, 0x54);
1334 public static final VexRVMOp VANDNPS = new VexRVMOp("VANDNPS", P_, M_0F, WIG, 0x55);
1335 public static final VexRVMOp VANDNPD = new VexRVMOp("VANDNPD", P_66, M_0F, WIG, 0x55);
1336 public static final VexRVMOp VORPS = new VexRVMOp("VORPS", P_, M_0F, WIG, 0x56);
1337 public static final VexRVMOp VORPD = new VexRVMOp("VORPD", P_66, M_0F, WIG, 0x56);
1338 public static final VexRVMOp VXORPS = new VexRVMOp("VXORPS", P_, M_0F, WIG, 0x57);
1339 public static final VexRVMOp VXORPD = new VexRVMOp("VXORPD", P_66, M_0F, WIG, 0x57);
1340 public static final VexRVMOp VADDPS = new VexRVMOp("VADDPS", P_, M_0F, WIG, 0x58);
1341 public static final VexRVMOp VADDPD = new VexRVMOp("VADDPD", P_66, M_0F, WIG, 0x58);
1342 public static final VexRVMOp VADDSS = new VexRVMOp("VADDSS", P_F3, M_0F, WIG, 0x58);
1390 public static final VexRVMOp VPCMPEQD = new VexRVMOp("VPCMPEQD", P_66, M_0F, WIG, 0x76, VEXOpAssertion.AVX1_2);
1391 public static final VexRVMOp VPCMPEQQ = new VexRVMOp("VPCMPEQQ", P_66, M_0F38, WIG, 0x29, VEXOpAssertion.AVX1_2);
1392 public static final VexRVMOp VPCMPGTB = new VexRVMOp("VPCMPGTB", P_66, M_0F, WIG, 0x64, VEXOpAssertion.AVX1_2);
1393 public static final VexRVMOp VPCMPGTW = new VexRVMOp("VPCMPGTW", P_66, M_0F, WIG, 0x65, VEXOpAssertion.AVX1_2);
1394 public static final VexRVMOp VPCMPGTD = new VexRVMOp("VPCMPGTD", P_66, M_0F, WIG, 0x66, VEXOpAssertion.AVX1_2);
1395 public static final VexRVMOp VPCMPGTQ = new VexRVMOp("VPCMPGTQ", P_66, M_0F38, WIG, 0x37, VEXOpAssertion.AVX1_2);
1396 public static final VexRVMOp VFMADD231SS = new VexRVMOp("VFMADD231SS", P_66, M_0F38, W0, 0xB9, VEXOpAssertion.FMA);
1397 public static final VexRVMOp VFMADD231SD = new VexRVMOp("VFMADD231SD", P_66, M_0F38, W1, 0xB9, VEXOpAssertion.FMA);
1398 // @formatter:on
1399
1400 private VexRVMOp(String opcode, int pp, int mmmmm, int w, int op) {
1401 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1402 }
1403
1404 protected VexRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1405 super(opcode, pp, mmmmm, w, op, assertion);
1406 }
1407
1408 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1409 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1410 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false);
1411 asm.emitByte(op);
1412 asm.emitModRM(dst, src2);
1413 }
1414
1415 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) {
1416 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1417 boolean useEvex = asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false);
1418 asm.emitByte(op);
1419 asm.emitOperandHelper(dst, src2, 0, getDisp8Scale(useEvex, size));
1420 }
1421 }
1422
1423 public static final class VexGeneralPurposeRVMOp extends VexRVMOp {
1424 // @formatter:off
1425 public static final VexGeneralPurposeRVMOp ANDN = new VexGeneralPurposeRVMOp("ANDN", P_, M_0F38, WIG, 0xF2, VEXOpAssertion.BMI1);
1426 public static final VexGeneralPurposeRVMOp MULX = new VexGeneralPurposeRVMOp("MULX", P_F2, M_0F38, WIG, 0xF6, VEXOpAssertion.BMI2);
1427 public static final VexGeneralPurposeRVMOp PDEP = new VexGeneralPurposeRVMOp("PDEP", P_F2, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1428 public static final VexGeneralPurposeRVMOp PEXT = new VexGeneralPurposeRVMOp("PEXT", P_F3, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1429 // @formatter:on
1430
1431 private VexGeneralPurposeRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1432 super(opcode, pp, mmmmm, w, op, assertion);
1433 }
1434
1435 @Override
1436 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1437 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, src2, null);
1438 assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1439 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false);
1440 asm.emitByte(op);
1441 asm.emitModRM(dst, src2);
1442 }
1443
1444 @Override
1445 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) {
1446 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, null, null);
1447 assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1448 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false);
1449 asm.emitByte(op);
1450 asm.emitOperandHelper(dst, src2, 0);
1451 }
1452 }
1453
1454 public static final class VexGeneralPurposeRMVOp extends VexOp {
1455 // @formatter:off
1456 public static final VexGeneralPurposeRMVOp BEXTR = new VexGeneralPurposeRMVOp("BEXTR", P_, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI1);
1457 public static final VexGeneralPurposeRMVOp BZHI = new VexGeneralPurposeRMVOp("BZHI", P_, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2);
1458 public static final VexGeneralPurposeRMVOp SARX = new VexGeneralPurposeRMVOp("SARX", P_F3, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1459 public static final VexGeneralPurposeRMVOp SHRX = new VexGeneralPurposeRMVOp("SHRX", P_F2, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1460 public static final VexGeneralPurposeRMVOp SHLX = new VexGeneralPurposeRMVOp("SHLX", P_66, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2);
1461 // @formatter:on
1462
1463 private VexGeneralPurposeRMVOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1464 super(opcode, pp, mmmmm, w, op, assertion);
1465 }
1466
1467 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) {
1468 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, src1, null);
1469 assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1470 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false);
1471 asm.emitByte(op);
1472 asm.emitModRM(dst, src1);
1473 }
1474
1475 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src1, Register src2) {
1476 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, null, null);
1477 assert size == AVXSize.DWORD || size == AVXSize.QWORD;
1478 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false);
1479 asm.emitByte(op);
1480 asm.emitOperandHelper(dst, src1, 0);
1481 }
1482 }
1483
1484 public static final class VexGeneralPurposeRMOp extends VexRMOp {
1485 // @formatter:off
1486 public static final VexGeneralPurposeRMOp BLSI = new VexGeneralPurposeRMOp("BLSI", P_, M_0F38, WIG, 0xF3, 3, VEXOpAssertion.BMI1);
1487 public static final VexGeneralPurposeRMOp BLSMSK = new VexGeneralPurposeRMOp("BLSMSK", P_, M_0F38, WIG, 0xF3, 2, VEXOpAssertion.BMI1);
1488 public static final VexGeneralPurposeRMOp BLSR = new VexGeneralPurposeRMOp("BLSR", P_, M_0F38, WIG, 0xF3, 1, VEXOpAssertion.BMI1);
1489 // @formatter:on
1490 private final int ext;
1491
1492 private VexGeneralPurposeRMOp(String opcode, int pp, int mmmmm, int w, int op, int ext, VEXOpAssertion assertion) {
1493 super(opcode, pp, mmmmm, w, op, assertion);
1494 this.ext = ext;
1495 }
1496
1497 @Override
1498 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) {
1499 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1500 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false);
1501 asm.emitByte(op);
1502 asm.emitModRM(ext, src);
1503 }
1504
1505 @Override
1506 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) {
1507 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null);
1508 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false);
1509 asm.emitByte(op);
1510 asm.emitOperandHelper(ext, src, 0);
1511 }
1512 }
1513
1514 /**
1515 * VEX-encoded shift instructions with an operand order of either RVM or VMI.
1516 */
1517 public static final class VexShiftOp extends VexRVMOp implements VexRRIOp {
1518 // @formatter:off
1519 public static final VexShiftOp VPSRLW = new VexShiftOp("VPSRLW", P_66, M_0F, WIG, 0xD1, 0x71, 2);
1520 public static final VexShiftOp VPSRLD = new VexShiftOp("VPSRLD", P_66, M_0F, WIG, 0xD2, 0x72, 2);
1521 public static final VexShiftOp VPSRLQ = new VexShiftOp("VPSRLQ", P_66, M_0F, WIG, 0xD3, 0x73, 2);
1522 public static final VexShiftOp VPSRAW = new VexShiftOp("VPSRAW", P_66, M_0F, WIG, 0xE1, 0x71, 4);
1523 public static final VexShiftOp VPSRAD = new VexShiftOp("VPSRAD", P_66, M_0F, WIG, 0xE2, 0x72, 4);
1524 public static final VexShiftOp VPSLLW = new VexShiftOp("VPSLLW", P_66, M_0F, WIG, 0xF1, 0x71, 6);
1525 public static final VexShiftOp VPSLLD = new VexShiftOp("VPSLLD", P_66, M_0F, WIG, 0xF2, 0x72, 6);
1526 public static final VexShiftOp VPSLLQ = new VexShiftOp("VPSLLQ", P_66, M_0F, WIG, 0xF3, 0x73, 6);
1527 // @formatter:on
1528
1529 private final int immOp;
1530 private final int r;
1531
1532 private VexShiftOp(String opcode, int pp, int mmmmm, int w, int op, int immOp, int r) {
1533 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1_2);
1534 this.immOp = immOp;
1535 this.r = r;
1536 }
1537
1538 @Override
1539 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
1540 assert assertion.check((AMD64) asm.target.arch, size, null, dst, src);
1541 asm.vexPrefix(null, dst, src, size, pp, mmmmm, w, wEvex, false);
1542 asm.emitByte(immOp);
1543 asm.emitModRM(r, src);
1544 asm.emitByte(imm8);
1545 }
1546 }
1547
1548 public static final class VexMaskMoveOp extends VexOp {
1549 // @formatter:off
1550 public static final VexMaskMoveOp VMASKMOVPS = new VexMaskMoveOp("VMASKMOVPS", P_66, M_0F38, W0, 0x2C, 0x2E);
1551 public static final VexMaskMoveOp VMASKMOVPD = new VexMaskMoveOp("VMASKMOVPD", P_66, M_0F38, W0, 0x2D, 0x2F);
1552 public static final VexMaskMoveOp VPMASKMOVD = new VexMaskMoveOp("VPMASKMOVD", P_66, M_0F38, W0, 0x8C, 0x8E, VEXOpAssertion.AVX2);
1553 public static final VexMaskMoveOp VPMASKMOVQ = new VexMaskMoveOp("VPMASKMOVQ", P_66, M_0F38, W1, 0x8C, 0x8E, VEXOpAssertion.AVX2);
1554 // @formatter:on
1555
1556 private final int opReverse;
1557
1558 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) {
1559 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1);
1560 }
1561
1562 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) {
1563 super(opcode, pp, mmmmm, w, op, assertion);
1564 this.opReverse = opReverse;
1565 }
1566
1567 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, AMD64Address src) {
1568 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, null);
1569 asm.vexPrefix(dst, mask, src, size, pp, mmmmm, w, wEvex, false);
1570 asm.emitByte(op);
1571 asm.emitOperandHelper(dst, src, 0);
1572 }
1573
1574 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register mask, Register src) {
1575 assert assertion.check((AMD64) asm.target.arch, size, src, mask, null);
1576 boolean useEvex = asm.vexPrefix(src, mask, dst, size, pp, mmmmm, w, wEvex, false);
1577 asm.emitByte(opReverse);
1578 asm.emitOperandHelper(src, dst, 0, getDisp8Scale(useEvex, size));
1579 }
1580 }
1581
1582 /**
1583 * VEX-encoded instructions with an operand order of RVMI.
1584 */
1585 public static final class VexRVMIOp extends VexOp {
1586 // @formatter:off
1587 public static final VexRVMIOp VSHUFPS = new VexRVMIOp("VSHUFPS", P_, M_0F, WIG, 0xC6);
1588 public static final VexRVMIOp VSHUFPD = new VexRVMIOp("VSHUFPD", P_66, M_0F, WIG, 0xC6);
1589 public static final VexRVMIOp VINSERTF128 = new VexRVMIOp("VINSERTF128", P_66, M_0F3A, W0, 0x18, VEXOpAssertion.AVX1_256ONLY);
1590 public static final VexRVMIOp VINSERTI128 = new VexRVMIOp("VINSERTI128", P_66, M_0F3A, W0, 0x38, VEXOpAssertion.AVX2_256ONLY);
1591 // @formatter:on
1592
1593 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op) {
1594 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1595 }
1596
1597 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) {
1598 super(opcode, pp, mmmmm, w, op, assertion);
1599 }
1600
1601 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, int imm8) {
1602 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1603 assert (imm8 & 0xFF) == imm8;
1604 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false);
1605 asm.emitByte(op);
1606 asm.emitModRM(dst, src2);
1607 asm.emitByte(imm8);
1608 }
1609
1610 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, int imm8) {
1611 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1612 assert (imm8 & 0xFF) == imm8;
1613 boolean useEvex = asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false);
1614 asm.emitByte(op);
1615 asm.emitOperandHelper(dst, src2, 1, getDisp8Scale(useEvex, size));
1616 asm.emitByte(imm8);
1617 }
1618 }
1619
1620 /**
1621 * VEX-encoded comparison operation with an operand order of RVMI. The immediate operand is a
1622 * comparison operator.
1623 */
1624 public static final class VexFloatCompareOp extends VexOp {
1625 // @formatter:off
1626 public static final VexFloatCompareOp VCMPPS = new VexFloatCompareOp("VCMPPS", P_, M_0F, WIG, 0xC2);
1627 public static final VexFloatCompareOp VCMPPD = new VexFloatCompareOp("VCMPPD", P_66, M_0F, WIG, 0xC2);
1628 public static final VexFloatCompareOp VCMPSS = new VexFloatCompareOp("VCMPSS", P_F2, M_0F, WIG, 0xC2);
1629 public static final VexFloatCompareOp VCMPSD = new VexFloatCompareOp("VCMPSD", P_F2, M_0F, WIG, 0xC2);
1630 // @formatter:on
1631
1632 public enum Predicate {
1633 EQ_OQ(0x00),
1634 LT_OS(0x01),
1635 LE_OS(0x02),
1697 return LT_OQ;
1698 case LE:
1699 return LE_OQ;
1700 case GT:
1701 return GT_OQ;
1702 case GE:
1703 return GE_OQ;
1704 default:
1705 throw GraalError.shouldNotReachHere();
1706 }
1707 }
1708 }
1709 }
1710
1711 private VexFloatCompareOp(String opcode, int pp, int mmmmm, int w, int op) {
1712 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1);
1713 }
1714
1715 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, Predicate p) {
1716 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2);
1717 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false);
1718 asm.emitByte(op);
1719 asm.emitModRM(dst, src2);
1720 asm.emitByte(p.imm8);
1721 }
1722
1723 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, Predicate p) {
1724 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null);
1725 boolean useEvex = asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false);
1726 asm.emitByte(op);
1727 asm.emitOperandHelper(dst, src2, 1, getDisp8Scale(useEvex, size));
1728 asm.emitByte(p.imm8);
1729 }
1730 }
1731
1732 public final void addl(AMD64Address dst, int imm32) {
1733 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1734 }
1735
1736 public final void addl(Register dst, int imm32) {
1737 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1738 }
1739
1740 public final void addl(Register dst, Register src) {
1741 ADD.rmOp.emit(this, DWORD, dst, src);
1742 }
1743
1744 public final void addpd(Register dst, Register src) {
1745 SSEOp.ADD.emit(this, PD, dst, src);
1746 }
1747
3814
3815 public void lfence() {
3816 emitByte(0x0f);
3817 emitByte(0xae);
3818 emitByte(0xe8);
3819 }
3820
3821 public final void vptest(Register dst, Register src) {
3822 VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src);
3823 }
3824
3825 public final void vpxor(Register dst, Register nds, Register src) {
3826 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
3827 }
3828
3829 public final void vpxor(Register dst, Register nds, AMD64Address src) {
3830 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
3831 }
3832
3833 public final void vmovdqu(Register dst, AMD64Address src) {
3834 VexMoveOp.VMOVDQU32.emit(this, AVXSize.YMM, dst, src);
3835 }
3836
3837 public final void vmovdqu(AMD64Address dst, Register src) {
3838 assert inRC(XMM, src);
3839 VexMoveOp.VMOVDQU32.emit(this, AVXSize.YMM, dst, src);
3840 }
3841
3842 public final void vpmovzxbw(Register dst, AMD64Address src) {
3843 assert supports(CPUFeature.AVX2);
3844 VexRMOp.VPMOVZXBW.emit(this, AVXSize.YMM, dst, src);
3845 }
3846
3847 public final void vzeroupper() {
3848 emitVEX(L128, P_, M_0F, W0, 0, 0, true);
3849 emitByte(0x77);
3850 }
3851
3852 // Insn: KORTESTD k1, k2
3853
3854 // This instruction produces ZF or CF flags
3855 public final void kortestd(Register src1, Register src2) {
3856 assert supports(CPUFeature.AVX512BW);
3857 assert inRC(MASK, src1) && inRC(MASK, src2);
3858 // Code: VEX.L0.66.0F.W1 98 /r
3859 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, W1, true);
3860 emitByte(0x98);
3861 emitModRM(src1, src2);
3862 }
3863
3864 // Insn: KORTESTQ k1, k2
3865
3866 // This instruction produces ZF or CF flags
3867 public final void kortestq(Register src1, Register src2) {
3868 assert supports(CPUFeature.AVX512BW);
3869 assert inRC(MASK, src1) && inRC(MASK, src2);
3870 // Code: VEX.L0.0F.W1 98 /r
3871 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_, M_0F, W1, W1, true);
3872 emitByte(0x98);
3873 emitModRM(src1, src2);
3874 }
3875
3876 public final void kmovd(Register dst, Register src) {
3877 assert supports(CPUFeature.AVX512BW);
3878 assert inRC(MASK, dst) || inRC(CPU, dst);
3879 assert inRC(MASK, src) || inRC(CPU, src);
3880 assert !(inRC(CPU, dst) && inRC(CPU, src));
3881
3882 if (inRC(MASK, dst)) {
3883 if (inRC(MASK, src)) {
3884 // kmovd(KRegister dst, KRegister src):
3885 // Insn: KMOVD k1, k2/m32
3886 // Code: VEX.L0.66.0F.W1 90 /r
3887 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_66, M_0F, W1, W1, true);
3888 emitByte(0x90);
3889 emitModRM(dst, src);
3890 } else {
3891 // kmovd(KRegister dst, Register src)
3892 // Insn: KMOVD k1, r32
3893 // Code: VEX.L0.F2.0F.W0 92 /r
3894 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, W0, true);
3895 emitByte(0x92);
3896 emitModRM(dst, src);
3897 }
3898 } else {
3899 if (inRC(MASK, src)) {
3900 // kmovd(Register dst, KRegister src)
3901 // Insn: KMOVD r32, k1
3902 // Code: VEX.L0.F2.0F.W0 93 /r
3903 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, W0, true);
3904 emitByte(0x93);
3905 emitModRM(dst, src);
3906 } else {
3907 throw GraalError.shouldNotReachHere();
3908 }
3909 }
3910 }
3911
3912 public final void kmovq(Register dst, Register src) {
3913 assert supports(CPUFeature.AVX512BW);
3914 assert inRC(MASK, dst) || inRC(CPU, dst);
3915 assert inRC(MASK, src) || inRC(CPU, src);
3916 assert !(inRC(CPU, dst) && inRC(CPU, src));
3917
3918 if (inRC(MASK, dst)) {
3919 if (inRC(MASK, src)) {
3920 // kmovq(KRegister dst, KRegister src):
3921 // Insn: KMOVQ k1, k2/m64
3922 // Code: VEX.L0.0F.W1 90 /r
3923 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_, M_0F, W1, W1, true);
3924 emitByte(0x90);
3925 emitModRM(dst, src);
3926 } else {
3927 // kmovq(KRegister dst, Register src)
3928 // Insn: KMOVQ k1, r64
3929 // Code: VEX.L0.F2.0F.W1 92 /r
3930 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, W1, true);
3931 emitByte(0x92);
3932 emitModRM(dst, src);
3933 }
3934 } else {
3935 if (inRC(MASK, src)) {
3936 // kmovq(Register dst, KRegister src)
3937 // Insn: KMOVQ r64, k1
3938 // Code: VEX.L0.F2.0F.W1 93 /r
3939 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, W1, true);
3940 emitByte(0x93);
3941 emitModRM(dst, src);
3942 } else {
3943 throw GraalError.shouldNotReachHere();
3944 }
3945 }
3946 }
3947
3948 // Insn: KTESTD k1, k2
3949
3950 public final void ktestd(Register src1, Register src2) {
3951 assert supports(CPUFeature.AVX512BW);
3952 assert inRC(MASK, src1) && inRC(MASK, src2);
3953 // Code: VEX.L0.66.0F.W1 99 /r
3954 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, W1, true);
3955 emitByte(0x99);
3956 emitModRM(src1, src2);
3957 }
3958
3959 public final void evmovdqu64(Register dst, AMD64Address src) {
3960 assert supports(CPUFeature.AVX512F);
3961 assert inRC(XMM, dst);
3962 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F3, M_0F, W1, Z0, B0);
3963 emitByte(0x6F);
3964 emitOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3965 }
3966
3967 // Insn: VPMOVZXBW zmm1, m256
3968
3969 public final void evpmovzxbw(Register dst, AMD64Address src) {
3970 assert supports(CPUFeature.AVX512BW);
3971 assert inRC(XMM, dst);
3972 // Code: EVEX.512.66.0F38.WIG 30 /r
3973 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0);
3974 emitByte(0x30);
3975 emitOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
3976 }
3977
3978 public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src) {
3979 assert supports(CPUFeature.AVX512BW);
3980 assert inRC(MASK, kdst) && inRC(XMM, nds);
3981 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F, WIG, Z0, B0);
3982 emitByte(0x74);
3983 emitOperandHelper(kdst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3984 }
3985
3986 // Insn: VMOVDQU16 zmm1 {k1}{z}, zmm2/m512
3987 // -----
3988 // Insn: VMOVDQU16 zmm1, m512
3989
3990 public final void evmovdqu16(Register dst, AMD64Address src) {
3991 assert supports(CPUFeature.AVX512BW);
3992 assert inRC(XMM, dst);
3993 // Code: EVEX.512.F2.0F.W1 6F /r
3994 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
3995 emitByte(0x6F);
3996 emitOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
3997 }
3998
3999 // Insn: VMOVDQU16 zmm1, k1:z, m512
4000
4001 public final void evmovdqu16(Register dst, Register mask, AMD64Address src) {
4002 assert supports(CPUFeature.AVX512BW);
4003 assert inRC(XMM, dst) && inRC(MASK, mask);
4004 // Code: EVEX.512.F2.0F.W1 6F /r
4005 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z1, B0);
4006 emitByte(0x6F);
4007 emitOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
4008 }
4009
4010 // Insn: VMOVDQU16 zmm2/m512 {k1}{z}, zmm1
4011 // -----
4012 // Insn: VMOVDQU16 m512, zmm1
4013
4014 public final void evmovdqu16(AMD64Address dst, Register src) {
4015 assert supports(CPUFeature.AVX512BW);
4016 assert inRC(XMM, src);
4017 // Code: EVEX.512.F2.0F.W1 7F /r
4018 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
4019 emitByte(0x7F);
4020 emitOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
4021 }
4022
4023 // Insn: VMOVDQU16 m512, k1, zmm1
4024
4025 public final void evmovdqu16(AMD64Address dst, Register mask, Register src) {
4026 assert supports(CPUFeature.AVX512BW);
4027 assert inRC(MASK, mask) && inRC(XMM, src);
4028 // Code: EVEX.512.F2.0F.W1 7F /r
4029 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0);
4030 emitByte(0x7F);
4031 emitOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM));
4032 }
4033
4034 // Insn: VPBROADCASTW zmm1 {k1}{z}, reg
4035 // -----
4036 // Insn: VPBROADCASTW zmm1, reg
4037
4038 public final void evpbroadcastw(Register dst, Register src) {
4039 assert supports(CPUFeature.AVX512BW);
4040 assert inRC(XMM, dst) && inRC(CPU, src);
4041 // Code: EVEX.512.66.0F38.W0 7B /r
4042 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, W0, Z0, B0);
4043 emitByte(0x7B);
4044 emitModRM(dst, src);
4045 }
4046
4047 // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8
4048 // -----
4049 // Insn: VPCMPUW k1, zmm2, zmm3, imm8
4050
4051 public final void evpcmpuw(Register kdst, Register nds, Register src, int vcc) {
4066 assert supports(CPUFeature.AVX512BW);
4067 assert inRC(MASK, kdst) && inRC(MASK, mask);
4068 assert inRC(XMM, nds) && inRC(XMM, src);
4069 // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib
4070 evexPrefix(kdst, mask, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0);
4071 emitByte(0x3E);
4072 emitModRM(kdst, src);
4073 emitByte(vcc);
4074 }
4075
4076 // Insn: VPMOVWB ymm1/m256 {k1}{z}, zmm2
4077 // -----
4078 // Insn: VPMOVWB m256, zmm2
4079
4080 public final void evpmovwb(AMD64Address dst, Register src) {
4081 assert supports(CPUFeature.AVX512BW);
4082 assert inRC(XMM, src);
4083 // Code: EVEX.512.F3.0F38.W0 30 /r
4084 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0);
4085 emitByte(0x30);
4086 emitOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
4087 }
4088
4089 // Insn: VPMOVWB m256, k1, zmm2
4090
4091 public final void evpmovwb(AMD64Address dst, Register mask, Register src) {
4092 assert supports(CPUFeature.AVX512BW);
4093 assert inRC(MASK, mask) && inRC(XMM, src);
4094 // Code: EVEX.512.F3.0F38.W0 30 /r
4095 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0);
4096 emitByte(0x30);
4097 emitOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
4098 }
4099
4100 // Insn: VPMOVZXBW zmm1 {k1}{z}, ymm2/m256
4101 // -----
4102 // Insn: VPMOVZXBW zmm1, k1, m256
4103
4104 public final void evpmovzxbw(Register dst, Register mask, AMD64Address src) {
4105 assert supports(CPUFeature.AVX512BW);
4106 assert inRC(MASK, mask) && inRC(XMM, dst);
4107 // Code: EVEX.512.66.0F38.WIG 30 /r
4108 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0);
4109 emitByte(0x30);
4110 emitOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM));
4111 }
4112
4113 }
|