Old src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64BaseAssembler.java

   1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 
  25 package org.graalvm.compiler.asm.amd64;
  26 
  27 import static jdk.vm.ci.amd64.AMD64.MASK;
  28 import static jdk.vm.ci.amd64.AMD64.XMM;
  29 import static jdk.vm.ci.amd64.AMD64.r12;
  30 import static jdk.vm.ci.amd64.AMD64.r13;
  31 import static jdk.vm.ci.amd64.AMD64.rbp;
  32 import static jdk.vm.ci.amd64.AMD64.rsp;
  33 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0;
  34 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B1;
  35 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.L512;
  36 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0;
  37 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1;
  38 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128;
  39 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256;
  40 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ;
  41 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F;
  42 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38;
  43 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A;
  44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_;
  45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66;
  46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2;
  47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3;
  48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0;
  49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1;
  50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG;
  51 import static org.graalvm.compiler.core.common.NumUtil.isByte;
  52 
  53 import org.graalvm.compiler.asm.Assembler;
  54 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
  55 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize;
  56 import org.graalvm.compiler.debug.GraalError;
  57 
  58 import jdk.vm.ci.amd64.AMD64;
  59 import jdk.vm.ci.amd64.AMD64.CPUFeature;
  60 import jdk.vm.ci.amd64.AMD64Kind;
  61 import jdk.vm.ci.code.Register;
  62 import jdk.vm.ci.code.TargetDescription;
  63 import jdk.vm.ci.meta.PlatformKind;
  64 
  65 /**
  66  * This class implements an assembler that can encode most X86 instructions.
  67  */
  68 public abstract class AMD64BaseAssembler extends Assembler {
  69 
  70     private final SIMDEncoder simdEncoder;
  71 
  72     /**
  73      * Constructs an assembler for the AMD64 architecture.
  74      */
  75     public AMD64BaseAssembler(TargetDescription target) {
  76         super(target);
  77 
  78         if (supports(CPUFeature.AVX)) {
  79             simdEncoder = new VEXEncoderImpl();
  80         } else {
  81             simdEncoder = new SSEEncoderImpl();
  82         }
  83     }
  84 
  85     /**
  86      * The x86 operand sizes.
  87      */
  88     public enum OperandSize {
  89         BYTE(1, AMD64Kind.BYTE) {
  90             @Override
  91             protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
  92                 assert imm == (byte) imm;
  93                 asm.emitByte(imm);
  94             }
  95 
  96             @Override
  97             protected int immediateSize() {
  98                 return 1;
  99             }
 100         },
 101 
 102         WORD(2, AMD64Kind.WORD, 0x66) {
 103             @Override
 104             protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
 105                 assert imm == (short) imm;
 106                 asm.emitShort(imm);
 107             }
 108 
 109             @Override
 110             protected int immediateSize() {
 111                 return 2;
 112             }
 113         },
 114 
 115         DWORD(4, AMD64Kind.DWORD) {
 116             @Override
 117             protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
 118                 asm.emitInt(imm);
 119             }
 120 
 121             @Override
 122             protected int immediateSize() {
 123                 return 4;
 124             }
 125         },
 126 
 127         QWORD(8, AMD64Kind.QWORD) {
 128             @Override
 129             protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
 130                 asm.emitInt(imm);
 131             }
 132 
 133             @Override
 134             protected int immediateSize() {
 135                 return 4;
 136             }
 137         },
 138 
 139         SS(4, AMD64Kind.SINGLE, 0xF3, true),
 140 
 141         SD(8, AMD64Kind.DOUBLE, 0xF2, true),
 142 
 143         PS(16, AMD64Kind.V128_SINGLE, true),
 144 
 145         PD(16, AMD64Kind.V128_DOUBLE, 0x66, true);
 146 
 147         private final int sizePrefix;
 148         private final int bytes;
 149         private final boolean xmm;
 150         private final AMD64Kind kind;
 151 
 152         OperandSize(int bytes, AMD64Kind kind) {
 153             this(bytes, kind, 0);
 154         }
 155 
 156         OperandSize(int bytes, AMD64Kind kind, int sizePrefix) {
 157             this(bytes, kind, sizePrefix, false);
 158         }
 159 
 160         OperandSize(int bytes, AMD64Kind kind, boolean xmm) {
 161             this(bytes, kind, 0, xmm);
 162         }
 163 
 164         OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm) {
 165             this.sizePrefix = sizePrefix;
 166             this.bytes = bytes;
 167             this.kind = kind;
 168             this.xmm = xmm;
 169         }
 170 
 171         public int getSizePrefix() {
 172             return sizePrefix;
 173         }
 174 
 175         public int getBytes() {
 176             return bytes;
 177         }
 178 
 179         public boolean isXmmType() {
 180             return xmm;
 181         }
 182 
 183         public AMD64Kind getKind() {
 184             return kind;
 185         }
 186 
 187         public static OperandSize get(PlatformKind kind) {
 188             for (OperandSize operandSize : OperandSize.values()) {
 189                 if (operandSize.kind.equals(kind)) {
 190                     return operandSize;
 191                 }
 192             }
 193             throw GraalError.shouldNotReachHere("Unexpected kind: " + kind.toString());
 194         }
 195 
 196         /**
 197          * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded
 198          * as sign-extended 32-bit values.
 199          *
 200          * @param asm
 201          * @param imm
 202          */
 203         protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
 204             throw new UnsupportedOperationException();
 205         }
 206 
 207         protected int immediateSize() {
 208             throw new UnsupportedOperationException();
 209         }
 210     }
 211 
 212     public abstract static class OperandDataAnnotation extends CodeAnnotation {
 213         /**
 214          * The position (bytes from the beginning of the method) of the operand.
 215          */
 216         public final int operandPosition;
 217         /**
 218          * The size of the operand, in bytes.
 219          */
 220         public final int operandSize;
 221         /**
 222          * The position (bytes from the beginning of the method) of the next instruction. On AMD64,
 223          * RIP-relative operands are relative to this position.
 224          */
 225         public final int nextInstructionPosition;
 226 
 227         OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) {
 228             super(instructionPosition);
 229 
 230             this.operandPosition = operandPosition;
 231             this.operandSize = operandSize;
 232             this.nextInstructionPosition = nextInstructionPosition;
 233         }
 234 
 235         @Override
 236         public String toString() {
 237             return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize;
 238         }
 239     }
 240 
 241     /**
 242      * Annotation that stores additional information about the displacement of a
 243      * {@link Assembler#getPlaceholder placeholder address} that needs patching.
 244      */
 245     protected static class AddressDisplacementAnnotation extends OperandDataAnnotation {
 246         AddressDisplacementAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) {
 247             super(instructionPosition, operandPosition, operandSize, nextInstructionPosition);
 248         }
 249     }
 250 
 251     /**
 252      * Annotation that stores additional information about the immediate operand, e.g., of a call
 253      * instruction, that needs patching.
 254      */
 255     protected static class ImmediateOperandAnnotation extends OperandDataAnnotation {
 256         ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) {
 257             super(instructionPosition, operandPosition, operandSize, nextInstructionPosition);
 258         }
 259     }
 260 
 261     protected void annotatePatchingImmediate(int operandOffset, int operandSize) {
 262         if (codePatchingAnnotationConsumer != null) {
 263             int pos = position();
 264             codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(pos, pos + operandOffset, operandSize, pos + operandOffset + operandSize));
 265         }
 266     }
 267 
 268     public final boolean supports(CPUFeature feature) {
 269         return ((AMD64) target.arch).getFeatures().contains(feature);
 270     }
 271 
 272     protected static int encode(Register r) {
 273         assert r.encoding >= 0 && (r.getRegisterCategory().equals(XMM) ? r.encoding < 32 : r.encoding < 16) : "encoding out of range: " + r.encoding;
 274         return r.encoding & 0x7;
 275     }
 276 
 277     private static final int MinEncodingNeedsRex = 8;
 278 
 279     /**
 280      * Constants for X86 prefix bytes.
 281      */
 282     private static class Prefix {
 283         private static final int REX = 0x40;
 284         private static final int REXB = 0x41;
 285         private static final int REXX = 0x42;
 286         private static final int REXXB = 0x43;
 287         private static final int REXR = 0x44;
 288         private static final int REXRB = 0x45;
 289         private static final int REXRX = 0x46;
 290         private static final int REXRXB = 0x47;
 291         private static final int REXW = 0x48;
 292         private static final int REXWB = 0x49;
 293         private static final int REXWX = 0x4A;
 294         private static final int REXWXB = 0x4B;
 295         private static final int REXWR = 0x4C;
 296         private static final int REXWRB = 0x4D;
 297         private static final int REXWRX = 0x4E;
 298         private static final int REXWRXB = 0x4F;
 299     }
 300 
 301     protected final void rexw() {
 302         emitByte(Prefix.REXW);
 303     }
 304 
 305     protected final void prefix(Register reg) {
 306         prefix(reg, false);
 307     }
 308 
 309     protected final void prefix(Register reg, boolean byteinst) {
 310         int regEnc = reg.encoding;
 311         if (regEnc >= 8) {
 312             emitByte(Prefix.REXB);
 313         } else if (byteinst && regEnc >= 4) {
 314             emitByte(Prefix.REX);
 315         }
 316     }
 317 
 318     protected final void prefixq(Register reg) {
 319         if (reg.encoding < 8) {
 320             emitByte(Prefix.REXW);
 321         } else {
 322             emitByte(Prefix.REXWB);
 323         }
 324     }
 325 
 326     protected final void prefix(Register dst, Register src) {
 327         prefix(dst, false, src, false);
 328     }
 329 
 330     protected final void prefix(Register dst, boolean dstIsByte, Register src, boolean srcIsByte) {
 331         int dstEnc = dst.encoding;
 332         int srcEnc = src.encoding;
 333         if (dstEnc < 8) {
 334             if (srcEnc >= 8) {
 335                 emitByte(Prefix.REXB);
 336             } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) {
 337                 emitByte(Prefix.REX);
 338             }
 339         } else {
 340             if (srcEnc < 8) {
 341                 emitByte(Prefix.REXR);
 342             } else {
 343                 emitByte(Prefix.REXRB);
 344             }
 345         }
 346     }
 347 
 348     /**
 349      * Creates prefix for the operands. If the given operands exceed 3 bits, the 4th bit is encoded
 350      * in the prefix.
 351      */
 352     protected final void prefixq(Register reg, Register rm) {
 353         int regEnc = reg.encoding;
 354         int rmEnc = rm.encoding;
 355         if (regEnc < 8) {
 356             if (rmEnc < 8) {
 357                 emitByte(Prefix.REXW);
 358             } else {
 359                 emitByte(Prefix.REXWB);
 360             }
 361         } else {
 362             if (rmEnc < 8) {
 363                 emitByte(Prefix.REXWR);
 364             } else {
 365                 emitByte(Prefix.REXWRB);
 366             }
 367         }
 368     }
 369 
 370     private static boolean needsRex(Register reg) {
 371         return reg.encoding >= MinEncodingNeedsRex;
 372     }
 373 
 374     protected final void prefix(AMD64Address adr) {
 375         if (needsRex(adr.getBase())) {
 376             if (needsRex(adr.getIndex())) {
 377                 emitByte(Prefix.REXXB);
 378             } else {
 379                 emitByte(Prefix.REXB);
 380             }
 381         } else {
 382             if (needsRex(adr.getIndex())) {
 383                 emitByte(Prefix.REXX);
 384             }
 385         }
 386     }
 387 
 388     protected final void prefixq(AMD64Address adr) {
 389         if (needsRex(adr.getBase())) {
 390             if (needsRex(adr.getIndex())) {
 391                 emitByte(Prefix.REXWXB);
 392             } else {
 393                 emitByte(Prefix.REXWB);
 394             }
 395         } else {
 396             if (needsRex(adr.getIndex())) {
 397                 emitByte(Prefix.REXWX);
 398             } else {
 399                 emitByte(Prefix.REXW);
 400             }
 401         }
 402     }
 403 
 404     protected void prefixb(AMD64Address adr, Register reg) {
 405         prefix(adr, reg, true);
 406     }
 407 
 408     protected void prefix(AMD64Address adr, Register reg) {
 409         prefix(adr, reg, false);
 410     }
 411 
 412     protected void prefix(AMD64Address adr, Register reg, boolean byteinst) {
 413         if (reg.encoding < 8) {
 414             if (needsRex(adr.getBase())) {
 415                 if (needsRex(adr.getIndex())) {
 416                     emitByte(Prefix.REXXB);
 417                 } else {
 418                     emitByte(Prefix.REXB);
 419                 }
 420             } else {
 421                 if (needsRex(adr.getIndex())) {
 422                     emitByte(Prefix.REXX);
 423                 } else if (byteinst && reg.encoding >= 4) {
 424                     emitByte(Prefix.REX);
 425                 }
 426             }
 427         } else {
 428             if (needsRex(adr.getBase())) {
 429                 if (needsRex(adr.getIndex())) {
 430                     emitByte(Prefix.REXRXB);
 431                 } else {
 432                     emitByte(Prefix.REXRB);
 433                 }
 434             } else {
 435                 if (needsRex(adr.getIndex())) {
 436                     emitByte(Prefix.REXRX);
 437                 } else {
 438                     emitByte(Prefix.REXR);
 439                 }
 440             }
 441         }
 442     }
 443 
 444     protected void prefixq(AMD64Address adr, Register src) {
 445         if (src.encoding < 8) {
 446             if (needsRex(adr.getBase())) {
 447                 if (needsRex(adr.getIndex())) {
 448                     emitByte(Prefix.REXWXB);
 449                 } else {
 450                     emitByte(Prefix.REXWB);
 451                 }
 452             } else {
 453                 if (needsRex(adr.getIndex())) {
 454                     emitByte(Prefix.REXWX);
 455                 } else {
 456                     emitByte(Prefix.REXW);
 457                 }
 458             }
 459         } else {
 460             if (needsRex(adr.getBase())) {
 461                 if (needsRex(adr.getIndex())) {
 462                     emitByte(Prefix.REXWRXB);
 463                 } else {
 464                     emitByte(Prefix.REXWRB);
 465                 }
 466             } else {
 467                 if (needsRex(adr.getIndex())) {
 468                     emitByte(Prefix.REXWRX);
 469                 } else {
 470                     emitByte(Prefix.REXWR);
 471                 }
 472             }
 473         }
 474     }
 475 
 476     /**
 477      * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a
 478      * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm
 479      * field. The X bit must be 0.
 480      */
 481     protected static int getRXB(Register reg, Register rm) {
 482         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
 483         rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3;
 484         return rxb;
 485     }
 486 
 487     /**
 488      * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There
 489      * are two cases for the memory operand:<br>
 490      * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0.
 491      * <br>
 492      * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base.
 493      */
 494     protected static int getRXB(Register reg, AMD64Address rm) {
 495         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
 496         if (!rm.getIndex().equals(Register.None)) {
 497             rxb |= (rm.getIndex().encoding & 0x08) >> 2;
 498         }
 499         if (!rm.getBase().equals(Register.None)) {
 500             rxb |= (rm.getBase().encoding & 0x08) >> 3;
 501         }
 502         return rxb;
 503     }
 504 
 505     /**
 506      * Emit the ModR/M byte for one register operand and an opcode extension in the R field.
 507      * <p>
 508      * Format: [ 11 reg r/m ]
 509      */
 510     protected final void emitModRM(int reg, Register rm) {
 511         assert (reg & 0x07) == reg;
 512         emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07));
 513     }
 514 
 515     /**
 516      * Emit the ModR/M byte for two register operands.
 517      * <p>
 518      * Format: [ 11 reg r/m ]
 519      */
 520     protected final void emitModRM(Register reg, Register rm) {
 521         emitModRM(reg.encoding & 0x07, rm);
 522     }
 523 
 524     /**
 525      * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand.
 526      *
 527      * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
 528      */
 529     protected final void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
 530         assert !reg.equals(Register.None);
 531         emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize, 1);
 532     }
 533 
 534     protected final void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) {
 535         emitOperandHelper(reg, addr, false, additionalInstructionSize, 1);
 536     }
 537 
 538     protected final void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) {
 539         assert !reg.equals(Register.None);
 540         emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, 1);
 541     }
 542 
 543     protected final void emitEVEXOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize, int evexDisp8Scale) {
 544         assert !reg.equals(Register.None);
 545         emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, evexDisp8Scale);
 546     }
 547 
 548     /**
 549      * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode
 550      * extension in the R field.
 551      *
 552      * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
 553      * @param additionalInstructionSize the number of bytes that will be emitted after the operand,
 554      *            so that the start position of the next instruction can be computed even though
 555      *            this instruction has not been completely emitted yet.
 556      * @param evexDisp8Scale the scaling factor for computing the compressed displacement of
 557      *            EVEX-encoded instructions. This scaling factor only matters when the emitted
 558      *            instruction uses one-byte-displacement form.
 559      */
 560     private void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize, int evexDisp8Scale) {
 561         assert (reg & 0x07) == reg;
 562         int regenc = reg << 3;
 563 
 564         Register base = addr.getBase();
 565         Register index = addr.getIndex();
 566 
 567         Scale scale = addr.getScale();
 568         int disp = addr.getDisplacement();
 569 
 570         if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder()
 571             // [00 000 101] disp32
 572             assert index.equals(Register.None) : "cannot use RIP relative addressing with index register";
 573             emitByte(0x05 | regenc);
 574             if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) {
 575                 codePatchingAnnotationConsumer.accept(new AddressDisplacementAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize));
 576             }
 577             emitInt(disp);
 578         } else if (base.isValid()) {
 579             boolean overriddenForce4Byte = force4Byte;
 580             int baseenc = base.isValid() ? encode(base) : 0;
 581 
 582             if (index.isValid()) {
 583                 int indexenc = encode(index) << 3;
 584                 // [base + indexscale + disp]
 585                 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
 586                     // [base + indexscale]
 587                     // [00 reg 100][ss index base]
 588                     assert !index.equals(rsp) : "illegal addressing mode";
 589                     emitByte(0x04 | regenc);
 590                     emitByte(scale.log2 << 6 | indexenc | baseenc);
 591                 } else {
 592                     if (evexDisp8Scale > 1 && !overriddenForce4Byte) {
 593                         if (disp % evexDisp8Scale == 0) {
 594                             int newDisp = disp / evexDisp8Scale;
 595                             if (isByte(newDisp)) {
 596                                 disp = newDisp;
 597                                 assert isByte(disp) && !overriddenForce4Byte;
 598                             }
 599                         } else {
 600                             overriddenForce4Byte = true;
 601                         }
 602                     }
 603                     if (isByte(disp) && !overriddenForce4Byte) {
 604                         // [base + indexscale + imm8]
 605                         // [01 reg 100][ss index base] imm8
 606                         assert !index.equals(rsp) : "illegal addressing mode";
 607                         emitByte(0x44 | regenc);
 608                         emitByte(scale.log2 << 6 | indexenc | baseenc);
 609                         emitByte(disp & 0xFF);
 610                     } else {
 611                         // [base + indexscale + disp32]
 612                         // [10 reg 100][ss index base] disp32
 613                         assert !index.equals(rsp) : "illegal addressing mode";
 614                         emitByte(0x84 | regenc);
 615                         emitByte(scale.log2 << 6 | indexenc | baseenc);
 616                         emitInt(disp);
 617                     }
 618                 }
 619             } else if (base.equals(rsp) || base.equals(r12)) {
 620                 // [rsp + disp]
 621                 if (disp == 0) {
 622                     // [rsp]
 623                     // [00 reg 100][00 100 100]
 624                     emitByte(0x04 | regenc);
 625                     emitByte(0x24);
 626                 } else {
 627                     if (evexDisp8Scale > 1 && !overriddenForce4Byte) {
 628                         if (disp % evexDisp8Scale == 0) {
 629                             int newDisp = disp / evexDisp8Scale;
 630                             if (isByte(newDisp)) {
 631                                 disp = newDisp;
 632                                 assert isByte(disp) && !overriddenForce4Byte;
 633                             }
 634                         } else {
 635                             overriddenForce4Byte = true;
 636                         }
 637                     }
 638                     if (isByte(disp) && !overriddenForce4Byte) {
 639                         // [rsp + imm8]
 640                         // [01 reg 100][00 100 100] disp8
 641                         emitByte(0x44 | regenc);
 642                         emitByte(0x24);
 643                         emitByte(disp & 0xFF);
 644                     } else {
 645                         // [rsp + imm32]
 646                         // [10 reg 100][00 100 100] disp32
 647                         emitByte(0x84 | regenc);
 648                         emitByte(0x24);
 649                         emitInt(disp);
 650                     }
 651                 }
 652             } else {
 653                 // [base + disp]
 654                 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode";
 655                 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
 656                     // [base]
 657                     // [00 reg base]
 658                     emitByte(0x00 | regenc | baseenc);
 659                 } else {
 660                     if (evexDisp8Scale > 1 && !overriddenForce4Byte) {
 661                         if (disp % evexDisp8Scale == 0) {
 662                             int newDisp = disp / evexDisp8Scale;
 663                             if (isByte(newDisp)) {
 664                                 disp = newDisp;
 665                                 assert isByte(disp) && !overriddenForce4Byte;
 666                             }
 667                         } else {
 668                             overriddenForce4Byte = true;
 669                         }
 670                     }
 671                     if (isByte(disp) && !overriddenForce4Byte) {
 672                         // [base + disp8]
 673                         // [01 reg base] disp8
 674                         emitByte(0x40 | regenc | baseenc);
 675                         emitByte(disp & 0xFF);
 676                     } else {
 677                         // [base + disp32]
 678                         // [10 reg base] disp32
 679                         emitByte(0x80 | regenc | baseenc);
 680                         emitInt(disp);
 681                     }
 682                 }
 683             }
 684         } else {
 685             if (index.isValid()) {
 686                 int indexenc = encode(index) << 3;
 687                 // [indexscale + disp]
 688                 // [00 reg 100][ss index 101] disp32
 689                 assert !index.equals(rsp) : "illegal addressing mode";
 690                 emitByte(0x04 | regenc);
 691                 emitByte(scale.log2 << 6 | indexenc | 0x05);
 692                 emitInt(disp);
 693             } else {
 694                 // [disp] ABSOLUTE
 695                 // [00 reg 100][00 100 101] disp32
 696                 emitByte(0x04 | regenc);
 697                 emitByte(0x25);
 698                 emitInt(disp);
 699             }
 700         }
 701     }
 702 
 703     private interface SIMDEncoder {
 704 
 705         void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW);
 706 
 707         void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW);
 708 
 709     }
 710 
 711     private class SSEEncoderImpl implements SIMDEncoder {
 712 
 713         @Override
 714         public void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
 715             if (sizePrefix > 0) {
 716                 emitByte(sizePrefix);
 717             }
 718             if (isRexW) {
 719                 prefixq(adr, xreg);
 720             } else {
 721                 prefix(adr, xreg);
 722             }
 723             if (opcodeEscapePrefix > 0xFF) {
 724                 emitShort(opcodeEscapePrefix);
 725             } else if (opcodeEscapePrefix > 0) {
 726                 emitByte(opcodeEscapePrefix);
 727             }
 728         }
 729 
 730         @Override
 731         public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
 732             if (sizePrefix > 0) {
 733                 emitByte(sizePrefix);
 734             }
 735             if (isRexW) {
 736                 prefixq(dst, src);
 737             } else {
 738                 prefix(dst, src);
 739             }
 740             if (opcodeEscapePrefix > 0xFF) {
 741                 emitShort(opcodeEscapePrefix);
 742             } else if (opcodeEscapePrefix > 0) {
 743                 emitByte(opcodeEscapePrefix);
 744             }
 745         }
 746     }
 747 
 748     public static final class VEXPrefixConfig {
 749         public static final int L128 = 0;
 750         public static final int L256 = 1;
 751         public static final int LZ = 0;
 752 
 753         public static final int W0 = 0;
 754         public static final int W1 = 1;
 755         public static final int WIG = 0;
 756 
 757         public static final int P_ = 0x0;
 758         public static final int P_66 = 0x1;
 759         public static final int P_F3 = 0x2;
 760         public static final int P_F2 = 0x3;
 761 
 762         public static final int M_0F = 0x1;
 763         public static final int M_0F38 = 0x2;
 764         public static final int M_0F3A = 0x3;
 765 
 766         private VEXPrefixConfig() {
 767         }
 768     }
 769 
 770     private class VEXEncoderImpl implements SIMDEncoder {
 771 
 772         private int sizePrefixToPP(int sizePrefix) {
 773             switch (sizePrefix) {
 774                 case 0x66:
 775                     return P_66;
 776                 case 0xF2:
 777                     return P_F2;
 778                 case 0xF3:
 779                     return P_F3;
 780                 default:
 781                     return P_;
 782             }
 783         }
 784 
 785         private int opcodeEscapePrefixToMMMMM(int opcodeEscapePrefix) {
 786             switch (opcodeEscapePrefix) {
 787                 case 0x0F:
 788                     return M_0F;
 789                 case 0x380F:
 790                     return M_0F38;
 791                 case 0x3A0F:
 792                     return M_0F3A;
 793                 default:
 794                     return 0;
 795             }
 796         }
 797 
 798         @Override
 799         public void simdPrefix(Register reg, Register nds, AMD64Address rm, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
 800             emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(reg, rm), nds.isValid() ? nds.encoding : 0);
 801         }
 802 
 803         @Override
 804         public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
 805             emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(dst, src), nds.isValid() ? nds.encoding : 0);
 806         }
 807     }
 808 
 809     protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) {
 810         simdEncoder.simdPrefix(xreg, nds, adr, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW);
 811     }
 812 
 813     protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int opcodeEscapePrefix, boolean isRexW) {
 814         simdEncoder.simdPrefix(xreg, nds, adr, size.sizePrefix, opcodeEscapePrefix, isRexW);
 815     }
 816 
 817     protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) {
 818         simdEncoder.simdPrefix(dst, nds, src, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW);
 819     }
 820 
 821     protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int opcodeEscapePrefix, boolean isRexW) {
 822         simdEncoder.simdPrefix(dst, nds, src, size.sizePrefix, opcodeEscapePrefix, isRexW);
 823     }
 824 
 825     /**
 826      * Low-level function to encode and emit the VEX prefix.
 827      * <p>
 828      * 2 byte form: [1100 0101] [R vvvv L pp]<br>
 829      * 3 byte form: [1100 0100] [RXB m-mmmm] [W vvvv L pp]
 830      * <p>
 831      * The RXB and vvvv fields are stored in 1's complement in the prefix encoding. This function
 832      * performs the 1s complement conversion, the caller is expected to pass plain unencoded
 833      * arguments.
 834      * <p>
 835      * The pp field encodes an extension to the opcode:<br>
 836      * 00: no extension<br>
 837      * 01: 66<br>
 838      * 10: F3<br>
 839      * 11: F2
 840      * <p>
 841      * The m-mmmm field encodes the leading bytes of the opcode:<br>
 842      * 00001: implied 0F leading opcode byte (default in 2-byte encoding)<br>
 843      * 00010: implied 0F 38 leading opcode bytes<br>
 844      * 00011: implied 0F 3A leading opcode bytes
 845      * <p>
 846      * This function automatically chooses the 2 or 3 byte encoding, based on the XBW flags and the
 847      * m-mmmm field.
 848      */
 849     protected final void emitVEX(int l, int pp, int mmmmm, int w, int rxb, int vvvv) {
 850         assert ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX) : "emitting VEX prefix on a CPU without AVX support";
 851 
 852         assert l == L128 || l == L256 : "invalid value for VEX.L";
 853         assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for VEX.pp";
 854         assert mmmmm == M_0F || mmmmm == M_0F38 || mmmmm == M_0F3A : "invalid value for VEX.m-mmmm";
 855         assert w == W0 || w == W1 : "invalid value for VEX.W";
 856 
 857         assert (rxb & 0x07) == rxb : "invalid value for VEX.RXB";
 858         assert (vvvv & 0x0F) == vvvv : "invalid value for VEX.vvvv";
 859 
 860         int rxb1s = rxb ^ 0x07;
 861         int vvvv1s = vvvv ^ 0x0F;
 862         if ((rxb & 0x03) == 0 && w == WIG && mmmmm == M_0F) {
 863             // 2 byte encoding
 864             int byte2 = 0;
 865             byte2 |= (rxb1s & 0x04) << 5;
 866             byte2 |= vvvv1s << 3;
 867             byte2 |= l << 2;
 868             byte2 |= pp;
 869 
 870             emitByte(0xC5);
 871             emitByte(byte2);
 872         } else {
 873             // 3 byte encoding
 874             int byte2 = 0;
 875             byte2 = (rxb1s & 0x07) << 5;
 876             byte2 |= mmmmm;
 877 
 878             int byte3 = 0;
 879             byte3 |= w << 7;
 880             byte3 |= vvvv1s << 3;
 881             byte3 |= l << 2;
 882             byte3 |= pp;
 883 
 884             emitByte(0xC4);
 885             emitByte(byte2);
 886             emitByte(byte3);
 887         }
 888     }
 889 
 890     public static int getLFlag(AVXSize size) {
 891         switch (size) {
 892             case XMM:
 893                 return L128;
 894             case YMM:
 895                 return L256;
 896             case ZMM:
 897                 return L512;
 898             default:
 899                 return LZ;
 900         }
 901     }
 902 
 903     public final void vexPrefix(Register dst, Register nds, Register src, AVXSize size, int pp, int mmmmm, int w) {
 904         emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0);
 905     }
 906 
 907     public final void vexPrefix(Register dst, Register nds, AMD64Address src, AVXSize size, int pp, int mmmmm, int w) {
 908         emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0);
 909     }
 910 
 911     protected static final class EVEXPrefixConfig {
 912         public static final int L512 = 2;
 913         public static final int LIG = 0;
 914 
 915         public static final int Z0 = 0x0;
 916         public static final int Z1 = 0x1;
 917 
 918         public static final int B0 = 0x0;
 919         public static final int B1 = 0x1;
 920 
 921         private EVEXPrefixConfig() {
 922         }
 923     }
 924 
 925     private static final int NOT_SUPPORTED_VECTOR_LENGTH = -1;
 926 
 927     /**
 928      * EVEX-encoded instructions use a compressed displacement scheme by multiplying disp8 with a
 929      * scaling factor N depending on the tuple type and the vector length.
 930      *
 931      * Reference: Intel Software Developer's Manual Volume 2, Section 2.6.5
 932      */
 933     protected enum EVEXTuple {
 934         FV_NO_BROADCAST_32BIT(16, 32, 64),
 935         FV_BROADCAST_32BIT(4, 4, 4),
 936         FV_NO_BROADCAST_64BIT(16, 32, 64),
 937         FV_BROADCAST_64BIT(8, 8, 8),
 938         HV_NO_BROADCAST_32BIT(8, 16, 32),
 939         HV_BROADCAST_32BIT(4, 4, 4),
 940         FVM(16, 32, 64),
 941         T1S_8BIT(1, 1, 1),
 942         T1S_16BIT(2, 2, 2),
 943         T1S_32BIT(4, 4, 4),
 944         T1S_64BIT(8, 8, 8),
 945         T1F_32BIT(4, 4, 4),
 946         T1F_64BIT(8, 8, 8),
 947         T2_32BIT(8, 8, 8),
 948         T2_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16),
 949         T4_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16),
 950         T4_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32),
 951         T8_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32),
 952         HVM(8, 16, 32),
 953         QVM(4, 8, 16),
 954         OVM(2, 4, 8),
 955         M128(16, 16, 16),
 956         DUP(8, 32, 64);
 957 
 958         private final int scalingFactorVL128;
 959         private final int scalingFactorVL256;
 960         private final int scalingFactorVL512;
 961 
 962         EVEXTuple(int scalingFactorVL128, int scalingFactorVL256, int scalingFactorVL512) {
 963             this.scalingFactorVL128 = scalingFactorVL128;
 964             this.scalingFactorVL256 = scalingFactorVL256;
 965             this.scalingFactorVL512 = scalingFactorVL512;
 966         }
 967 
 968         private static int verifyScalingFactor(int scalingFactor) {
 969             if (scalingFactor == NOT_SUPPORTED_VECTOR_LENGTH) {
 970                 throw GraalError.shouldNotReachHere("Invalid scaling factor.");
 971             }
 972             return scalingFactor;
 973         }
 974 
 975         public int getDisp8ScalingFactor(AVXSize size) {
 976             switch (size) {
 977                 case XMM:
 978                     return verifyScalingFactor(scalingFactorVL128);
 979                 case YMM:
 980                     return verifyScalingFactor(scalingFactorVL256);
 981                 case ZMM:
 982                     return verifyScalingFactor(scalingFactorVL512);
 983                 default:
 984                     throw GraalError.shouldNotReachHere("Unsupported vector size.");
 985             }
 986         }
 987     }
 988 
 989     /**
 990      * Low-level function to encode and emit the EVEX prefix.
 991      * <p>
 992      * 62 [0 1 1 0 0 0 1 0]<br>
 993      * P1 [R X B R'0 0 m m]<br>
 994      * P2 [W v v v v 1 p p]<br>
 995      * P3 [z L'L b V'a a a]
 996      * <p>
 997      * The pp field encodes an extension to the opcode:<br>
 998      * 00: no extension<br>
 999      * 01: 66<br>
1000      * 10: F3<br>
1001      * 11: F2
1002      * <p>
1003      * The mm field encodes the leading bytes of the opcode:<br>
1004      * 01: implied 0F leading opcode byte<br>
1005      * 10: implied 0F 38 leading opcode bytes<br>
1006      * 11: implied 0F 3A leading opcode bytes
1007      * <p>
1008      * The z field encodes the merging mode (merge or zero).
1009      * <p>
1010      * The b field encodes the source broadcast or data rounding modes.
1011      * <p>
1012      * The aaa field encodes the operand mask register.
1013      */
1014     private void emitEVEX(int l, int pp, int mm, int w, int rxb, int reg, int vvvvv, int z, int b, int aaa) {
1015         assert ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX512F) : "emitting EVEX prefix on a CPU without AVX512 support";
1016 
1017         assert l == L128 || l == L256 || l == L512 : "invalid value for EVEX.L'L";
1018         assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for EVEX.pp";
1019         assert mm == M_0F || mm == M_0F38 || mm == M_0F3A : "invalid value for EVEX.mm";
1020         assert w == W0 || w == W1 : "invalid value for EVEX.W";
1021 
1022         assert (rxb & 0x07) == rxb : "invalid value for EVEX.RXB";
1023         assert (reg & 0x1F) == reg : "invalid value for EVEX.R'";
1024         assert (vvvvv & 0x1F) == vvvvv : "invalid value for EVEX.vvvvv";
1025 
1026         assert z == Z0 || z == Z1 : "invalid value for EVEX.z";
1027         assert b == B0 || b == B1 : "invalid value for EVEX.b";
1028         assert (aaa & 0x07) == aaa : "invalid value for EVEX.aaa";
1029 
1030         emitByte(0x62);
1031         int p1 = 0;
1032         p1 |= ((rxb ^ 0x07) & 0x07) << 5;
1033         p1 |= reg < 16 ? 0x10 : 0;
1034         p1 |= mm;
1035         emitByte(p1);
1036 
1037         int p2 = 0;
1038         p2 |= w << 7;
1039         p2 |= ((vvvvv ^ 0x0F) & 0x0F) << 3;
1040         p2 |= 0x4;
1041         p2 |= pp;
1042         emitByte(p2);
1043 
1044         int p3 = 0;
1045         p3 |= z << 7;
1046         p3 |= l << 5;
1047         p3 |= b << 4;
1048         p3 |= vvvvv < 16 ? 0x08 : 0;
1049         p3 |= aaa;
1050         emitByte(p3);
1051     }
1052 
1053     private static int getRXBForEVEX(Register reg, Register rm) {
1054         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
1055         rxb |= (rm == null ? 0 : rm.encoding & 0x018) >> 3;
1056         return rxb;
1057     }
1058 
1059     /**
1060      * Helper method for emitting EVEX prefix in the form of RRRR.
1061      */
1062     protected final void evexPrefix(Register dst, Register mask, Register nds, Register src, AVXSize size, int pp, int mm, int w, int z, int b) {
1063         assert !mask.isValid() || mask.getRegisterCategory().equals(MASK);
1064         emitEVEX(getLFlag(size), pp, mm, w, getRXBForEVEX(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0);
1065     }
1066 
1067     /**
1068      * Helper method for emitting EVEX prefix in the form of RRRM. Because the memory addressing in
1069      * EVEX-encoded instructions employ a compressed displacement scheme when using disp8 form, the
1070      * user of this API should make sure to encode the operands using
1071      * {@link #emitEVEXOperandHelper(Register, AMD64Address, int, int)}.
1072      */
1073     protected final void evexPrefix(Register dst, Register mask, Register nds, AMD64Address src, AVXSize size, int pp, int mm, int w, int z, int b) {
1074         assert !mask.isValid() || mask.getRegisterCategory().equals(MASK);
1075         emitEVEX(getLFlag(size), pp, mm, w, getRXB(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0);
1076     }
1077 
1078 }