42 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38;
43 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A;
44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_;
45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66;
46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2;
47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3;
48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0;
49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1;
50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG;
51 import static org.graalvm.compiler.core.common.NumUtil.isByte;
52
53 import org.graalvm.compiler.asm.Assembler;
54 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
55 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize;
56 import org.graalvm.compiler.debug.GraalError;
57
58 import jdk.vm.ci.amd64.AMD64;
59 import jdk.vm.ci.amd64.AMD64.CPUFeature;
60 import jdk.vm.ci.amd64.AMD64Kind;
61 import jdk.vm.ci.code.Register;
62 import jdk.vm.ci.code.TargetDescription;
63 import jdk.vm.ci.meta.PlatformKind;
64
65 /**
66 * This class implements an assembler that can encode most X86 instructions.
67 */
68 public abstract class AMD64BaseAssembler extends Assembler {
69
70 private final SIMDEncoder simdEncoder;
71
72 /**
73 * Constructs an assembler for the AMD64 architecture.
74 */
75 public AMD64BaseAssembler(TargetDescription target) {
76 super(target);
77
78 if (supports(CPUFeature.AVX)) {
79 simdEncoder = new VEXEncoderImpl();
80 } else {
81 simdEncoder = new SSEEncoderImpl();
252 * Annotation that stores additional information about the immediate operand, e.g., of a call
253 * instruction, that needs patching.
254 */
255 protected static class ImmediateOperandAnnotation extends OperandDataAnnotation {
256 ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) {
257 super(instructionPosition, operandPosition, operandSize, nextInstructionPosition);
258 }
259 }
260
261 protected void annotatePatchingImmediate(int operandOffset, int operandSize) {
262 if (codePatchingAnnotationConsumer != null) {
263 int pos = position();
264 codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(pos, pos + operandOffset, operandSize, pos + operandOffset + operandSize));
265 }
266 }
267
268 public final boolean supports(CPUFeature feature) {
269 return ((AMD64) target.arch).getFeatures().contains(feature);
270 }
271
272 protected static int encode(Register r) {
273 assert r.encoding >= 0 && (r.getRegisterCategory().equals(XMM) ? r.encoding < 32 : r.encoding < 16) : "encoding out of range: " + r.encoding;
274 return r.encoding & 0x7;
275 }
276
277 private static final int MinEncodingNeedsRex = 8;
278
279 /**
280 * Constants for X86 prefix bytes.
281 */
282 private static class Prefix {
283 private static final int REX = 0x40;
284 private static final int REXB = 0x41;
285 private static final int REXX = 0x42;
286 private static final int REXXB = 0x43;
287 private static final int REXR = 0x44;
288 private static final int REXRB = 0x45;
289 private static final int REXRX = 0x46;
290 private static final int REXRXB = 0x47;
291 private static final int REXW = 0x48;
292 private static final int REXWB = 0x49;
293 private static final int REXWX = 0x4A;
294 private static final int REXWXB = 0x4B;
295 private static final int REXWR = 0x4C;
296 private static final int REXWRB = 0x4D;
297 private static final int REXWRX = 0x4E;
298 private static final int REXWRXB = 0x4F;
299 }
300
301 protected final void rexw() {
302 emitByte(Prefix.REXW);
303 }
304
305 protected final void prefix(Register reg) {
306 prefix(reg, false);
307 }
308
309 protected final void prefix(Register reg, boolean byteinst) {
310 int regEnc = reg.encoding;
311 if (regEnc >= 8) {
312 emitByte(Prefix.REXB);
313 } else if (byteinst && regEnc >= 4) {
314 emitByte(Prefix.REX);
315 }
316 }
317
318 protected final void prefixq(Register reg) {
780 default:
781 return P_;
782 }
783 }
784
785 private int opcodeEscapePrefixToMMMMM(int opcodeEscapePrefix) {
786 switch (opcodeEscapePrefix) {
787 case 0x0F:
788 return M_0F;
789 case 0x380F:
790 return M_0F38;
791 case 0x3A0F:
792 return M_0F3A;
793 default:
794 return 0;
795 }
796 }
797
798 @Override
799 public void simdPrefix(Register reg, Register nds, AMD64Address rm, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
800 emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(reg, rm), nds.isValid() ? nds.encoding : 0);
801 }
802
803 @Override
804 public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
805 emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(dst, src), nds.isValid() ? nds.encoding : 0);
806 }
807 }
808
809 protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) {
810 simdEncoder.simdPrefix(xreg, nds, adr, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW);
811 }
812
813 protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int opcodeEscapePrefix, boolean isRexW) {
814 simdEncoder.simdPrefix(xreg, nds, adr, size.sizePrefix, opcodeEscapePrefix, isRexW);
815 }
816
817 protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) {
818 simdEncoder.simdPrefix(dst, nds, src, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW);
819 }
820
821 protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int opcodeEscapePrefix, boolean isRexW) {
822 simdEncoder.simdPrefix(dst, nds, src, size.sizePrefix, opcodeEscapePrefix, isRexW);
823 }
824
825 /**
826 * Low-level function to encode and emit the VEX prefix.
827 * <p>
828 * 2 byte form: [1100 0101] [R vvvv L pp]<br>
829 * 3 byte form: [1100 0100] [RXB m-mmmm] [W vvvv L pp]
830 * <p>
831 * The RXB and vvvv fields are stored in 1's complement in the prefix encoding. This function
832 * performs the 1s complement conversion, the caller is expected to pass plain unencoded
833 * arguments.
834 * <p>
835 * The pp field encodes an extension to the opcode:<br>
836 * 00: no extension<br>
837 * 01: 66<br>
838 * 10: F3<br>
839 * 11: F2
840 * <p>
841 * The m-mmmm field encodes the leading bytes of the opcode:<br>
842 * 00001: implied 0F leading opcode byte (default in 2-byte encoding)<br>
843 * 00010: implied 0F 38 leading opcode bytes<br>
844 * 00011: implied 0F 3A leading opcode bytes
845 * <p>
846 * This function automatically chooses the 2 or 3 byte encoding, based on the XBW flags and the
847 * m-mmmm field.
848 */
849 protected final void emitVEX(int l, int pp, int mmmmm, int w, int rxb, int vvvv) {
850 assert ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX) : "emitting VEX prefix on a CPU without AVX support";
851
852 assert l == L128 || l == L256 : "invalid value for VEX.L";
853 assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for VEX.pp";
854 assert mmmmm == M_0F || mmmmm == M_0F38 || mmmmm == M_0F3A : "invalid value for VEX.m-mmmm";
855 assert w == W0 || w == W1 : "invalid value for VEX.W";
856
857 assert (rxb & 0x07) == rxb : "invalid value for VEX.RXB";
858 assert (vvvv & 0x0F) == vvvv : "invalid value for VEX.vvvv";
859
860 int rxb1s = rxb ^ 0x07;
861 int vvvv1s = vvvv ^ 0x0F;
862 if ((rxb & 0x03) == 0 && w == WIG && mmmmm == M_0F) {
863 // 2 byte encoding
864 int byte2 = 0;
865 byte2 |= (rxb1s & 0x04) << 5;
866 byte2 |= vvvv1s << 3;
867 byte2 |= l << 2;
868 byte2 |= pp;
869
870 emitByte(0xC5);
871 emitByte(byte2);
872 } else {
873 // 3 byte encoding
874 int byte2 = 0;
875 byte2 = (rxb1s & 0x07) << 5;
876 byte2 |= mmmmm;
877
878 int byte3 = 0;
879 byte3 |= w << 7;
880 byte3 |= vvvv1s << 3;
881 byte3 |= l << 2;
882 byte3 |= pp;
883
884 emitByte(0xC4);
885 emitByte(byte2);
886 emitByte(byte3);
887 }
888 }
889
890 public static int getLFlag(AVXSize size) {
891 switch (size) {
892 case XMM:
893 return L128;
894 case YMM:
895 return L256;
896 case ZMM:
897 return L512;
898 default:
899 return LZ;
900 }
901 }
902
903 public final void vexPrefix(Register dst, Register nds, Register src, AVXSize size, int pp, int mmmmm, int w) {
904 emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0);
905 }
906
907 public final void vexPrefix(Register dst, Register nds, AMD64Address src, AVXSize size, int pp, int mmmmm, int w) {
908 emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0);
909 }
910
911 protected static final class EVEXPrefixConfig {
912 public static final int L512 = 2;
913 public static final int LIG = 0;
914
915 public static final int Z0 = 0x0;
916 public static final int Z1 = 0x1;
917
918 public static final int B0 = 0x0;
919 public static final int B1 = 0x1;
920
921 private EVEXPrefixConfig() {
922 }
923 }
924
925 private static final int NOT_SUPPORTED_VECTOR_LENGTH = -1;
926
927 /**
928 * EVEX-encoded instructions use a compressed displacement scheme by multiplying disp8 with a
969 if (scalingFactor == NOT_SUPPORTED_VECTOR_LENGTH) {
970 throw GraalError.shouldNotReachHere("Invalid scaling factor.");
971 }
972 return scalingFactor;
973 }
974
975 public int getDisp8ScalingFactor(AVXSize size) {
976 switch (size) {
977 case XMM:
978 return verifyScalingFactor(scalingFactorVL128);
979 case YMM:
980 return verifyScalingFactor(scalingFactorVL256);
981 case ZMM:
982 return verifyScalingFactor(scalingFactorVL512);
983 default:
984 throw GraalError.shouldNotReachHere("Unsupported vector size.");
985 }
986 }
987 }
988
989 /**
990 * Low-level function to encode and emit the EVEX prefix.
991 * <p>
992 * 62 [0 1 1 0 0 0 1 0]<br>
993 * P1 [R X B R'0 0 m m]<br>
994 * P2 [W v v v v 1 p p]<br>
995 * P3 [z L'L b V'a a a]
996 * <p>
997 * The pp field encodes an extension to the opcode:<br>
998 * 00: no extension<br>
999 * 01: 66<br>
1000 * 10: F3<br>
1001 * 11: F2
1002 * <p>
1003 * The mm field encodes the leading bytes of the opcode:<br>
1004 * 01: implied 0F leading opcode byte<br>
1005 * 10: implied 0F 38 leading opcode bytes<br>
1006 * 11: implied 0F 3A leading opcode bytes
1007 * <p>
1008 * The z field encodes the merging mode (merge or zero).
1009 * <p>
1010 * The b field encodes the source broadcast or data rounding modes.
1011 * <p>
1012 * The aaa field encodes the operand mask register.
1013 */
1014 private void emitEVEX(int l, int pp, int mm, int w, int rxb, int reg, int vvvvv, int z, int b, int aaa) {
1015 assert ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX512F) : "emitting EVEX prefix on a CPU without AVX512 support";
1016
1017 assert l == L128 || l == L256 || l == L512 : "invalid value for EVEX.L'L";
1018 assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for EVEX.pp";
1019 assert mm == M_0F || mm == M_0F38 || mm == M_0F3A : "invalid value for EVEX.mm";
1020 assert w == W0 || w == W1 : "invalid value for EVEX.W";
1021
1022 assert (rxb & 0x07) == rxb : "invalid value for EVEX.RXB";
1023 assert (reg & 0x1F) == reg : "invalid value for EVEX.R'";
1024 assert (vvvvv & 0x1F) == vvvvv : "invalid value for EVEX.vvvvv";
1025
1026 assert z == Z0 || z == Z1 : "invalid value for EVEX.z";
1027 assert b == B0 || b == B1 : "invalid value for EVEX.b";
1028 assert (aaa & 0x07) == aaa : "invalid value for EVEX.aaa";
1029
1030 emitByte(0x62);
1031 int p1 = 0;
1032 p1 |= ((rxb ^ 0x07) & 0x07) << 5;
1033 p1 |= reg < 16 ? 0x10 : 0;
1034 p1 |= mm;
1035 emitByte(p1);
1036
1037 int p2 = 0;
1038 p2 |= w << 7;
1039 p2 |= ((vvvvv ^ 0x0F) & 0x0F) << 3;
1040 p2 |= 0x4;
1041 p2 |= pp;
1042 emitByte(p2);
1043
1044 int p3 = 0;
1045 p3 |= z << 7;
1046 p3 |= l << 5;
1047 p3 |= b << 4;
1048 p3 |= vvvvv < 16 ? 0x08 : 0;
1049 p3 |= aaa;
1050 emitByte(p3);
1051 }
1052
1053 private static int getRXBForEVEX(Register reg, Register rm) {
1054 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
1055 rxb |= (rm == null ? 0 : rm.encoding & 0x018) >> 3;
1056 return rxb;
1057 }
1058
1059 /**
1060 * Helper method for emitting EVEX prefix in the form of RRRR.
1061 */
1062 protected final void evexPrefix(Register dst, Register mask, Register nds, Register src, AVXSize size, int pp, int mm, int w, int z, int b) {
1063 assert !mask.isValid() || mask.getRegisterCategory().equals(MASK);
1064 emitEVEX(getLFlag(size), pp, mm, w, getRXBForEVEX(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0);
1065 }
1066
1067 /**
1068 * Helper method for emitting EVEX prefix in the form of RRRM. Because the memory addressing in
1069 * EVEX-encoded instructions employ a compressed displacement scheme when using disp8 form, the
1070 * user of this API should make sure to encode the operands using
1071 * {@link #emitEVEXOperandHelper(Register, AMD64Address, int, int)}.
1072 */
1073 protected final void evexPrefix(Register dst, Register mask, Register nds, AMD64Address src, AVXSize size, int pp, int mm, int w, int z, int b) {
1074 assert !mask.isValid() || mask.getRegisterCategory().equals(MASK);
1075 emitEVEX(getLFlag(size), pp, mm, w, getRXB(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0);
1076 }
1077
1078 }
|
42 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38;
43 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A;
44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_;
45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66;
46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2;
47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3;
48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0;
49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1;
50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG;
51 import static org.graalvm.compiler.core.common.NumUtil.isByte;
52
53 import org.graalvm.compiler.asm.Assembler;
54 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
55 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize;
56 import org.graalvm.compiler.debug.GraalError;
57
58 import jdk.vm.ci.amd64.AMD64;
59 import jdk.vm.ci.amd64.AMD64.CPUFeature;
60 import jdk.vm.ci.amd64.AMD64Kind;
61 import jdk.vm.ci.code.Register;
62 import jdk.vm.ci.code.Register.RegisterCategory;
63 import jdk.vm.ci.code.TargetDescription;
64 import jdk.vm.ci.meta.PlatformKind;
65
66 /**
67 * This class implements an assembler that can encode most X86 instructions.
68 */
69 public abstract class AMD64BaseAssembler extends Assembler {
70
71 private final SIMDEncoder simdEncoder;
72
73 /**
74 * Constructs an assembler for the AMD64 architecture.
75 */
76 public AMD64BaseAssembler(TargetDescription target) {
77 super(target);
78
79 if (supports(CPUFeature.AVX)) {
80 simdEncoder = new VEXEncoderImpl();
81 } else {
82 simdEncoder = new SSEEncoderImpl();
253 * Annotation that stores additional information about the immediate operand, e.g., of a call
254 * instruction, that needs patching.
255 */
256 protected static class ImmediateOperandAnnotation extends OperandDataAnnotation {
257 ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) {
258 super(instructionPosition, operandPosition, operandSize, nextInstructionPosition);
259 }
260 }
261
262 protected void annotatePatchingImmediate(int operandOffset, int operandSize) {
263 if (codePatchingAnnotationConsumer != null) {
264 int pos = position();
265 codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(pos, pos + operandOffset, operandSize, pos + operandOffset + operandSize));
266 }
267 }
268
269 public final boolean supports(CPUFeature feature) {
270 return ((AMD64) target.arch).getFeatures().contains(feature);
271 }
272
273 protected static boolean inRC(RegisterCategory rc, Register r) {
274 return r.getRegisterCategory().equals(rc);
275 }
276
277 protected static int encode(Register r) {
278 assert r.encoding >= 0 && (inRC(XMM, r) ? r.encoding < 32 : r.encoding < 16) : "encoding out of range: " + r.encoding;
279 return r.encoding & 0x7;
280 }
281
282 private static final int MinEncodingNeedsRex = 8;
283
284 /**
285 * Constants for X86 prefix bytes.
286 */
287 private static class Prefix {
288 private static final int REX = 0x40;
289 private static final int REXB = 0x41;
290 private static final int REXX = 0x42;
291 private static final int REXXB = 0x43;
292 private static final int REXR = 0x44;
293 private static final int REXRB = 0x45;
294 private static final int REXRX = 0x46;
295 private static final int REXRXB = 0x47;
296 private static final int REXW = 0x48;
297 private static final int REXWB = 0x49;
298 private static final int REXWX = 0x4A;
299 private static final int REXWXB = 0x4B;
300 private static final int REXWR = 0x4C;
301 private static final int REXWRB = 0x4D;
302 private static final int REXWRX = 0x4E;
303 private static final int REXWRXB = 0x4F;
304
305 private static final int VEX2 = 0xC5;
306 private static final int VEX3 = 0xC4;
307 private static final int EVEX = 0x62;
308 }
309
310 protected final void rexw() {
311 emitByte(Prefix.REXW);
312 }
313
314 protected final void prefix(Register reg) {
315 prefix(reg, false);
316 }
317
318 protected final void prefix(Register reg, boolean byteinst) {
319 int regEnc = reg.encoding;
320 if (regEnc >= 8) {
321 emitByte(Prefix.REXB);
322 } else if (byteinst && regEnc >= 4) {
323 emitByte(Prefix.REX);
324 }
325 }
326
327 protected final void prefixq(Register reg) {
789 default:
790 return P_;
791 }
792 }
793
794 private int opcodeEscapePrefixToMMMMM(int opcodeEscapePrefix) {
795 switch (opcodeEscapePrefix) {
796 case 0x0F:
797 return M_0F;
798 case 0x380F:
799 return M_0F38;
800 case 0x3A0F:
801 return M_0F3A;
802 default:
803 return 0;
804 }
805 }
806
807 @Override
808 public void simdPrefix(Register reg, Register nds, AMD64Address rm, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
809 assert reg.encoding < 16 : "encoding out of range: " + reg.encoding;
810 assert nds.encoding < 16 : "encoding out of range: " + nds.encoding;
811 emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(reg, rm), nds.isValid() ? nds.encoding : 0, true);
812 }
813
814 @Override
815 public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
816 assert dst.encoding < 16 : "encoding out of range: " + dst.encoding;
817 assert src.encoding < 16 : "encoding out of range: " + src.encoding;
818 assert nds.encoding < 16 : "encoding out of range: " + nds.encoding;
819 emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(dst, src), nds.isValid() ? nds.encoding : 0, true);
820 }
821 }
822
823 protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) {
824 simdEncoder.simdPrefix(xreg, nds, adr, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW);
825 }
826
827 protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int opcodeEscapePrefix, boolean isRexW) {
828 simdEncoder.simdPrefix(xreg, nds, adr, size.sizePrefix, opcodeEscapePrefix, isRexW);
829 }
830
831 protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) {
832 simdEncoder.simdPrefix(dst, nds, src, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW);
833 }
834
835 protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int opcodeEscapePrefix, boolean isRexW) {
836 simdEncoder.simdPrefix(dst, nds, src, size.sizePrefix, opcodeEscapePrefix, isRexW);
837 }
838
839 // @formatter:off
840 //
841 // Instruction Format and VEX illustrated below (optional []):
842 //
843 // #of bytes: 2,3 1 1 1 1,2,4 1
844 // [Prefixes] VEX OpCode ModR/M [SIB] [Disp8*N] [Immediate]
845 // [Disp16,32]
846 //
847 // VEX: 0xC4 | P1 | P2
848 //
849 // 7 6 5 4 3 2 1 0
850 // P1 R X B m m m m m P[ 7:0]
851 // P2 W v v v v L p p P[15:8]
852 //
853 // VEX: 0xC5 | B1
854 //
855 // 7 6 5 4 3 2 1 0
856 // P1 R v v v v L p p P[7:0]
857 //
858 // Figure. Bit Field Layout of the VEX Prefix
859 //
860 // Table. VEX Prefix Bit Field Functional Grouping
861 //
862 // Notation Bit field Group Position Comment
863 // ---------- ------------------------- -------- -------------------
864 // VEX.RXB Next-8 register specifier P[7:5] Combine with ModR/M.reg, ModR/M.rm (base, index/vidx).
865 // VEX.R REX.R inverse P[7] Combine with EVEX.R and ModR/M.reg.
866 // VEX.X REX.X inverse P[6] Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent.
867 // VEX.B REX.B inverse P[5]
868 // VEX.mmmmmm 0F, 0F_38, 0F_3A encoding P[4:0] b01/0x0F, b10/0F_38, b11/0F_3A (all other reserved)
869 //
870 // VEX.W Opcode specific P[15]
871 // VEX.vvvv A register specifier P[14:11] In inverse form, b1111 if not used.
872 // P[6:3]
873 // VEX.L Vector length/RC P[10] b0/scalar or 128b vec, b1/256b vec.
874 // P[2]
875 // VEX.pp Compressed legacy prefix P[9:8] b00/None, b01/0x66, b10/0xF3, b11/0xF2
876 // P[1:0]
877 // @formatter:on
878
879 /**
880 * Low-level function to encode and emit the VEX prefix.
881 * <p>
882 * 2 byte form: [1100 0101] [R vvvv L pp]<br>
883 * 3 byte form: [1100 0100] [RXB m-mmmm] [W vvvv L pp]
884 * <p>
885 * The RXB and vvvv fields are stored in 1's complement in the prefix encoding. This function
886 * performs the 1s complement conversion, the caller is expected to pass plain unencoded
887 * arguments.
888 * <p>
889 * The pp field encodes an extension to the opcode:<br>
890 * 00: no extension<br>
891 * 01: 66<br>
892 * 10: F3<br>
893 * 11: F2
894 * <p>
895 * The m-mmmm field encodes the leading bytes of the opcode:<br>
896 * 00001: implied 0F leading opcode byte (default in 2-byte encoding)<br>
897 * 00010: implied 0F 38 leading opcode bytes<br>
898 * 00011: implied 0F 3A leading opcode bytes
899 * <p>
900 * This function automatically chooses the 2 or 3 byte encoding, based on the XBW flags and the
901 * m-mmmm field.
902 */
903 protected final void emitVEX(int l, int pp, int mmmmm, int w, int rxb, int vvvv, boolean checkAVX) {
904 assert !checkAVX || ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX) : "emitting VEX prefix on a CPU without AVX support";
905
906 assert l == L128 || l == L256 : "invalid value for VEX.L";
907 assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for VEX.pp";
908 assert mmmmm == M_0F || mmmmm == M_0F38 || mmmmm == M_0F3A : "invalid value for VEX.m-mmmm";
909 assert w == W0 || w == W1 : "invalid value for VEX.W";
910
911 assert (rxb & 0x07) == rxb : "invalid value for VEX.RXB";
912 assert (vvvv & 0x0F) == vvvv : "invalid value for VEX.vvvv";
913
914 int rxb1s = rxb ^ 0x07;
915 int vvvv1s = vvvv ^ 0x0F;
916 if ((rxb & 0x03) == 0 && w == WIG && mmmmm == M_0F) {
917 // 2 byte encoding
918 int byte2 = 0;
919 byte2 |= (rxb1s & 0x04) << 5;
920 byte2 |= vvvv1s << 3;
921 byte2 |= l << 2;
922 byte2 |= pp;
923
924 emitByte(Prefix.VEX2);
925 emitByte(byte2);
926 } else {
927 // 3 byte encoding
928 int byte2 = 0;
929 byte2 = (rxb1s & 0x07) << 5;
930 byte2 |= mmmmm;
931
932 int byte3 = 0;
933 byte3 |= w << 7;
934 byte3 |= vvvv1s << 3;
935 byte3 |= l << 2;
936 byte3 |= pp;
937
938 emitByte(Prefix.VEX3);
939 emitByte(byte2);
940 emitByte(byte3);
941 }
942 }
943
944 public static int getLFlag(AVXSize size) {
945 switch (size) {
946 case XMM:
947 return L128;
948 case YMM:
949 return L256;
950 case ZMM:
951 return L512;
952 default:
953 return LZ;
954 }
955 }
956
957 public final void vexPrefix(Register dst, Register nds, Register src, AVXSize size, int pp, int mmmmm, int w, boolean checkAVX) {
958 emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0, checkAVX);
959 }
960
961 public final void vexPrefix(Register dst, Register nds, AMD64Address src, AVXSize size, int pp, int mmmmm, int w, boolean checkAVX) {
962 emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0, checkAVX);
963 }
964
965 protected static final class EVEXPrefixConfig {
966 public static final int L512 = 2;
967 public static final int LIG = 0;
968
969 public static final int Z0 = 0x0;
970 public static final int Z1 = 0x1;
971
972 public static final int B0 = 0x0;
973 public static final int B1 = 0x1;
974
975 private EVEXPrefixConfig() {
976 }
977 }
978
979 private static final int NOT_SUPPORTED_VECTOR_LENGTH = -1;
980
981 /**
982 * EVEX-encoded instructions use a compressed displacement scheme by multiplying disp8 with a
1023 if (scalingFactor == NOT_SUPPORTED_VECTOR_LENGTH) {
1024 throw GraalError.shouldNotReachHere("Invalid scaling factor.");
1025 }
1026 return scalingFactor;
1027 }
1028
1029 public int getDisp8ScalingFactor(AVXSize size) {
1030 switch (size) {
1031 case XMM:
1032 return verifyScalingFactor(scalingFactorVL128);
1033 case YMM:
1034 return verifyScalingFactor(scalingFactorVL256);
1035 case ZMM:
1036 return verifyScalingFactor(scalingFactorVL512);
1037 default:
1038 throw GraalError.shouldNotReachHere("Unsupported vector size.");
1039 }
1040 }
1041 }
1042
1043 // @formatter:off
1044 //
1045 // Instruction Format and EVEX illustrated below (optional []):
1046 //
1047 // #of bytes: 4 1 1 1 1,2,4 1
1048 // [Prefixes] EVEX OpCode ModR/M [SIB] [Disp8*N] [Immediate]
1049 // [Disp16,32]
1050 //
1051 // The EVEX prefix is a 4-byte prefix, with the first two bytes derived from unused encoding
1052 // form of the 32-bit-mode-only BOUND instruction. The layout of the EVEX prefix is shown in
1053 // the figure below. The first byte must be 0x62, followed by three pay-load bytes, denoted
1054 // as P1, P2, and P3 individually or collectively as P[23:0] (see below).
1055 //
1056 // EVEX: 0x62 | P1 | P2 | P3
1057 //
1058 // 7 6 5 4 3 2 1 0
1059 // P1 R X B R' 0 0 m m P[ 7: 0]
1060 // P2 W v v v v 1 p p P[15: 8]
1061 // P3 z L' L b V' a a a P[23:16]
1062 //
1063 // Figure. Bit Field Layout of the EVEX Prefix
1064 //
1065 // Table. EVEX Prefix Bit Field Functional Grouping
1066 //
1067 // Notation Bit field Group Position Comment
1068 // --------- -------------------------- -------- -----------------------
1069 // EVEX.RXB Next-8 register specifier P[7:5] Combine with ModR/M.reg, ModR/M.rm (base, index/vidx).
1070 // EVEX.X High-16 register specifier P[6] Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent.
1071 // EVEX.R' High-16 register specifier P[4] Combine with EVEX.R and ModR/M.reg.
1072 // -- Reserved P[3:2] Must be 0.
1073 // EVEX.mm Compressed legacy escape P[1:0] Identical to low two bits of VEX.mmmmm.
1074 //
1075 // EVEX.W Osize promotion/Opcode ext P[15]
1076 // EVEX.vvvv NDS register specifier P[14:11] Same as VEX.vvvv.
1077 // -- Fixed Value P[10] Must be 1.
1078 // EVEX.pp Compressed legacy prefix P[9:8] Identical to VEX.pp.
1079 //
1080 // EVEX.z Zeroing/Merging P[23]
1081 // EVEX.L'L Vector length/RC P[22:21]
1082 // EVEX.b Broadcast/RC/SAE Context P[20]
1083 // EVEX.V' High-16 NDS/VIDX register P[19] Combine with EVEX.vvvv or VSIB when present.
1084 // EVEX.aaa Embedded opmask register P[18:16]
1085 //
1086 // @formatter:on
1087
1088 /**
1089 * Low-level function to encode and emit the EVEX prefix.
1090 * <p>
1091 * 62 [0 1 1 0 0 0 1 0]<br>
1092 * P1 [R X B R'0 0 m m]<br>
1093 * P2 [W v v v v 1 p p]<br>
1094 * P3 [z L'L b V'a a a]
1095 * <p>
1096 * The pp field encodes an extension to the opcode:<br>
1097 * 00: no extension<br>
1098 * 01: 66<br>
1099 * 10: F3<br>
1100 * 11: F2
1101 * <p>
1102 * The mm field encodes the leading bytes of the opcode:<br>
1103 * 01: implied 0F leading opcode byte<br>
1104 * 10: implied 0F 38 leading opcode bytes<br>
1105 * 11: implied 0F 3A leading opcode bytes
1106 * <p>
1107 * The z field encodes the merging mode (merge or zero).
1108 * <p>
1109 * The b field encodes the source broadcast or data rounding modes.
1110 * <p>
1111 * The aaa field encodes the operand mask register.
1112 */
1113 private void emitEVEX(int l, int pp, int mm, int w, int rxb, int reg, int vvvvv, int z, int b, int aaa) {
1114 assert ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX512F) : "emitting EVEX prefix on a CPU without AVX512 support";
1115
1116 assert l == L128 || l == L256 || l == L512 : "invalid value for EVEX.L'L";
1117 assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for EVEX.pp";
1118 assert mm == M_0F || mm == M_0F38 || mm == M_0F3A : "invalid value for EVEX.mm";
1119 assert w == W0 || w == W1 : "invalid value for EVEX.W";
1120
1121 assert (rxb & 0x07) == rxb : "invalid value for EVEX.RXB";
1122 assert (reg & 0x1F) == reg : "invalid value for EVEX.R'";
1123 assert (vvvvv & 0x1F) == vvvvv : "invalid value for EVEX.V'vvvv";
1124
1125 assert z == Z0 || z == Z1 : "invalid value for EVEX.z";
1126 assert b == B0 || b == B1 : "invalid value for EVEX.b";
1127 assert (aaa & 0x07) == aaa : "invalid value for EVEX.aaa";
1128
1129 emitByte(Prefix.EVEX);
1130 int p1 = 0;
1131 p1 |= ((rxb ^ 0x07) & 0x07) << 5;
1132 p1 |= reg < 16 ? 0x10 : 0;
1133 p1 |= mm;
1134 emitByte(p1);
1135
1136 int p2 = 0;
1137 p2 |= w << 7;
1138 p2 |= ((vvvvv ^ 0x0F) & 0x0F) << 3;
1139 p2 |= 0x04;
1140 p2 |= pp;
1141 emitByte(p2);
1142
1143 int p3 = 0;
1144 p3 |= z << 7;
1145 p3 |= l << 5;
1146 p3 |= b << 4;
1147 p3 |= vvvvv < 16 ? 0x08 : 0;
1148 p3 |= aaa;
1149 emitByte(p3);
1150 }
1151
1152 /**
1153 * Get RXB bits for register-register instructions in EVEX-encoding, where ModRM.rm contains a
1154 * register index. The R bit extends the ModRM.reg field and the X and B bits extends the
1155 * ModRM.rm field.
1156 */
1157 private static int getRXBForEVEX(Register reg, Register rm) {
1158 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
1159 rxb |= (rm == null ? 0 : rm.encoding & 0x018) >> 3;
1160 return rxb;
1161 }
1162
1163 /**
1164 * Helper method for emitting EVEX prefix in the form of RRRR.
1165 */
1166 protected final void evexPrefix(Register dst, Register mask, Register nds, Register src, AVXSize size, int pp, int mm, int w, int z, int b) {
1167 assert !mask.isValid() || inRC(MASK, mask);
1168 emitEVEX(getLFlag(size), pp, mm, w, getRXBForEVEX(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0);
1169 }
1170
1171 /**
1172 * Helper method for emitting EVEX prefix in the form of RRRM. Because the memory addressing in
1173 * EVEX-encoded instructions employ a compressed displacement scheme when using disp8 form, the
1174 * user of this API should make sure to encode the operands using
1175 * {@link #emitEVEXOperandHelper(Register, AMD64Address, int, int)}.
1176 */
1177 protected final void evexPrefix(Register dst, Register mask, Register nds, AMD64Address src, AVXSize size, int pp, int mm, int w, int z, int b) {
1178 assert !mask.isValid() || inRC(MASK, mask);
1179 emitEVEX(getLFlag(size), pp, mm, w, getRXB(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0);
1180 }
1181
1182 }
|