1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 
  25 package org.graalvm.compiler.asm.amd64;
  26 
  27 import static jdk.vm.ci.amd64.AMD64.MASK;
  28 import static jdk.vm.ci.amd64.AMD64.XMM;
  29 import static jdk.vm.ci.amd64.AMD64.r12;
  30 import static jdk.vm.ci.amd64.AMD64.r13;
  31 import static jdk.vm.ci.amd64.AMD64.rbp;
  32 import static jdk.vm.ci.amd64.AMD64.rsp;
  33 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0;
  34 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B1;
  35 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.L512;
  36 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0;
  37 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1;
  38 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128;
  39 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256;
  40 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ;
  41 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F;
  42 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38;
  43 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A;
  44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_;
  45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66;
  46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2;
  47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3;
  48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0;
  49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1;
  50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG;
  51 import static org.graalvm.compiler.core.common.NumUtil.isByte;
  52 
  53 import org.graalvm.compiler.asm.Assembler;
  54 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
  55 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize;
  56 import org.graalvm.compiler.debug.GraalError;
  57 
  58 import jdk.vm.ci.amd64.AMD64;
  59 import jdk.vm.ci.amd64.AMD64.CPUFeature;
  60 import jdk.vm.ci.amd64.AMD64Kind;
  61 import jdk.vm.ci.code.Register;
  62 import jdk.vm.ci.code.Register.RegisterCategory;
  63 import jdk.vm.ci.code.TargetDescription;
  64 import jdk.vm.ci.meta.PlatformKind;
  65 
  66 /**
  67  * This class implements an assembler that can encode most X86 instructions.
  68  */
  69 public abstract class AMD64BaseAssembler extends Assembler {
  70 
  71     private final SIMDEncoder simdEncoder;
  72 
  73     /**
  74      * Constructs an assembler for the AMD64 architecture.
  75      */
  76     public AMD64BaseAssembler(TargetDescription target) {
  77         super(target);
  78 
  79         if (supports(CPUFeature.AVX)) {
  80             simdEncoder = new VEXEncoderImpl();
  81         } else {
  82             simdEncoder = new SSEEncoderImpl();
  83         }
  84     }
  85 
  86     /**
  87      * The x86 operand sizes.
  88      */
  89     public enum OperandSize {
  90         BYTE(1, AMD64Kind.BYTE) {
  91             @Override
  92             protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
  93                 assert imm == (byte) imm;
  94                 asm.emitByte(imm);
  95             }
  96 
  97             @Override
  98             protected int immediateSize() {
  99                 return 1;
 100             }
 101         },
 102 
 103         WORD(2, AMD64Kind.WORD, 0x66) {
 104             @Override
 105             protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
 106                 assert imm == (short) imm;
 107                 asm.emitShort(imm);
 108             }
 109 
 110             @Override
 111             protected int immediateSize() {
 112                 return 2;
 113             }
 114         },
 115 
 116         DWORD(4, AMD64Kind.DWORD) {
 117             @Override
 118             protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
 119                 asm.emitInt(imm);
 120             }
 121 
 122             @Override
 123             protected int immediateSize() {
 124                 return 4;
 125             }
 126         },
 127 
 128         QWORD(8, AMD64Kind.QWORD) {
 129             @Override
 130             protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
 131                 asm.emitInt(imm);
 132             }
 133 
 134             @Override
 135             protected int immediateSize() {
 136                 return 4;
 137             }
 138         },
 139 
 140         SS(4, AMD64Kind.SINGLE, 0xF3, true),
 141 
 142         SD(8, AMD64Kind.DOUBLE, 0xF2, true),
 143 
 144         PS(16, AMD64Kind.V128_SINGLE, true),
 145 
 146         PD(16, AMD64Kind.V128_DOUBLE, 0x66, true);
 147 
 148         private final int sizePrefix;
 149         private final int bytes;
 150         private final boolean xmm;
 151         private final AMD64Kind kind;
 152 
 153         OperandSize(int bytes, AMD64Kind kind) {
 154             this(bytes, kind, 0);
 155         }
 156 
 157         OperandSize(int bytes, AMD64Kind kind, int sizePrefix) {
 158             this(bytes, kind, sizePrefix, false);
 159         }
 160 
 161         OperandSize(int bytes, AMD64Kind kind, boolean xmm) {
 162             this(bytes, kind, 0, xmm);
 163         }
 164 
 165         OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm) {
 166             this.sizePrefix = sizePrefix;
 167             this.bytes = bytes;
 168             this.kind = kind;
 169             this.xmm = xmm;
 170         }
 171 
 172         public int getSizePrefix() {
 173             return sizePrefix;
 174         }
 175 
 176         public int getBytes() {
 177             return bytes;
 178         }
 179 
 180         public boolean isXmmType() {
 181             return xmm;
 182         }
 183 
 184         public AMD64Kind getKind() {
 185             return kind;
 186         }
 187 
 188         public static OperandSize get(PlatformKind kind) {
 189             for (OperandSize operandSize : OperandSize.values()) {
 190                 if (operandSize.kind.equals(kind)) {
 191                     return operandSize;
 192                 }
 193             }
 194             throw GraalError.shouldNotReachHere("Unexpected kind: " + kind.toString());
 195         }
 196 
 197         /**
 198          * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded
 199          * as sign-extended 32-bit values.
 200          *
 201          * @param asm
 202          * @param imm
 203          */
 204         protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
 205             throw new UnsupportedOperationException();
 206         }
 207 
 208         protected int immediateSize() {
 209             throw new UnsupportedOperationException();
 210         }
 211     }
 212 
 213     public abstract static class OperandDataAnnotation extends CodeAnnotation {
 214         /**
 215          * The position (bytes from the beginning of the method) of the operand.
 216          */
 217         public final int operandPosition;
 218         /**
 219          * The size of the operand, in bytes.
 220          */
 221         public final int operandSize;
 222         /**
 223          * The position (bytes from the beginning of the method) of the next instruction. On AMD64,
 224          * RIP-relative operands are relative to this position.
 225          */
 226         public final int nextInstructionPosition;
 227 
 228         OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) {
 229             super(instructionPosition);
 230 
 231             this.operandPosition = operandPosition;
 232             this.operandSize = operandSize;
 233             this.nextInstructionPosition = nextInstructionPosition;
 234         }
 235 
 236         @Override
 237         public String toString() {
 238             return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize;
 239         }
 240     }
 241 
 242     /**
 243      * Annotation that stores additional information about the displacement of a
 244      * {@link Assembler#getPlaceholder placeholder address} that needs patching.
 245      */
 246     protected static class AddressDisplacementAnnotation extends OperandDataAnnotation {
 247         AddressDisplacementAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) {
 248             super(instructionPosition, operandPosition, operandSize, nextInstructionPosition);
 249         }
 250     }
 251 
 252     /**
 253      * Annotation that stores additional information about the immediate operand, e.g., of a call
 254      * instruction, that needs patching.
 255      */
 256     protected static class ImmediateOperandAnnotation extends OperandDataAnnotation {
 257         ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) {
 258             super(instructionPosition, operandPosition, operandSize, nextInstructionPosition);
 259         }
 260     }
 261 
 262     protected void annotatePatchingImmediate(int operandOffset, int operandSize) {
 263         if (codePatchingAnnotationConsumer != null) {
 264             int pos = position();
 265             codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(pos, pos + operandOffset, operandSize, pos + operandOffset + operandSize));
 266         }
 267     }
 268 
 269     public final boolean supports(CPUFeature feature) {
 270         return ((AMD64) target.arch).getFeatures().contains(feature);
 271     }
 272 
 273     protected static boolean inRC(RegisterCategory rc, Register r) {
 274         return r.getRegisterCategory().equals(rc);
 275     }
 276 
 277     protected static int encode(Register r) {
 278         assert r.encoding >= 0 && (inRC(XMM, r) ? r.encoding < 32 : r.encoding < 16) : "encoding out of range: " + r.encoding;
 279         return r.encoding & 0x7;
 280     }
 281 
 282     private static final int MinEncodingNeedsRex = 8;
 283 
 284     /**
 285      * Constants for X86 prefix bytes.
 286      */
 287     private static class Prefix {
 288         private static final int REX = 0x40;
 289         private static final int REXB = 0x41;
 290         private static final int REXX = 0x42;
 291         private static final int REXXB = 0x43;
 292         private static final int REXR = 0x44;
 293         private static final int REXRB = 0x45;
 294         private static final int REXRX = 0x46;
 295         private static final int REXRXB = 0x47;
 296         private static final int REXW = 0x48;
 297         private static final int REXWB = 0x49;
 298         private static final int REXWX = 0x4A;
 299         private static final int REXWXB = 0x4B;
 300         private static final int REXWR = 0x4C;
 301         private static final int REXWRB = 0x4D;
 302         private static final int REXWRX = 0x4E;
 303         private static final int REXWRXB = 0x4F;
 304 
 305         private static final int VEX2 = 0xC5;
 306         private static final int VEX3 = 0xC4;
 307         private static final int EVEX = 0x62;
 308     }
 309 
 310     protected final void rexw() {
 311         emitByte(Prefix.REXW);
 312     }
 313 
 314     protected final void prefix(Register reg) {
 315         prefix(reg, false);
 316     }
 317 
 318     protected final void prefix(Register reg, boolean byteinst) {
 319         int regEnc = reg.encoding;
 320         if (regEnc >= 8) {
 321             emitByte(Prefix.REXB);
 322         } else if (byteinst && regEnc >= 4) {
 323             emitByte(Prefix.REX);
 324         }
 325     }
 326 
 327     protected final void prefixq(Register reg) {
 328         if (reg.encoding < 8) {
 329             emitByte(Prefix.REXW);
 330         } else {
 331             emitByte(Prefix.REXWB);
 332         }
 333     }
 334 
 335     protected final void prefix(Register dst, Register src) {
 336         prefix(dst, false, src, false);
 337     }
 338 
 339     protected final void prefix(Register dst, boolean dstIsByte, Register src, boolean srcIsByte) {
 340         int dstEnc = dst.encoding;
 341         int srcEnc = src.encoding;
 342         if (dstEnc < 8) {
 343             if (srcEnc >= 8) {
 344                 emitByte(Prefix.REXB);
 345             } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) {
 346                 emitByte(Prefix.REX);
 347             }
 348         } else {
 349             if (srcEnc < 8) {
 350                 emitByte(Prefix.REXR);
 351             } else {
 352                 emitByte(Prefix.REXRB);
 353             }
 354         }
 355     }
 356 
 357     /**
 358      * Creates prefix for the operands. If the given operands exceed 3 bits, the 4th bit is encoded
 359      * in the prefix.
 360      */
 361     protected final void prefixq(Register reg, Register rm) {
 362         int regEnc = reg.encoding;
 363         int rmEnc = rm.encoding;
 364         if (regEnc < 8) {
 365             if (rmEnc < 8) {
 366                 emitByte(Prefix.REXW);
 367             } else {
 368                 emitByte(Prefix.REXWB);
 369             }
 370         } else {
 371             if (rmEnc < 8) {
 372                 emitByte(Prefix.REXWR);
 373             } else {
 374                 emitByte(Prefix.REXWRB);
 375             }
 376         }
 377     }
 378 
 379     private static boolean needsRex(Register reg) {
 380         return reg.encoding >= MinEncodingNeedsRex;
 381     }
 382 
 383     protected final void prefix(AMD64Address adr) {
 384         if (needsRex(adr.getBase())) {
 385             if (needsRex(adr.getIndex())) {
 386                 emitByte(Prefix.REXXB);
 387             } else {
 388                 emitByte(Prefix.REXB);
 389             }
 390         } else {
 391             if (needsRex(adr.getIndex())) {
 392                 emitByte(Prefix.REXX);
 393             }
 394         }
 395     }
 396 
 397     protected final void prefixq(AMD64Address adr) {
 398         if (needsRex(adr.getBase())) {
 399             if (needsRex(adr.getIndex())) {
 400                 emitByte(Prefix.REXWXB);
 401             } else {
 402                 emitByte(Prefix.REXWB);
 403             }
 404         } else {
 405             if (needsRex(adr.getIndex())) {
 406                 emitByte(Prefix.REXWX);
 407             } else {
 408                 emitByte(Prefix.REXW);
 409             }
 410         }
 411     }
 412 
 413     protected void prefixb(AMD64Address adr, Register reg) {
 414         prefix(adr, reg, true);
 415     }
 416 
 417     protected void prefix(AMD64Address adr, Register reg) {
 418         prefix(adr, reg, false);
 419     }
 420 
 421     protected void prefix(AMD64Address adr, Register reg, boolean byteinst) {
 422         if (reg.encoding < 8) {
 423             if (needsRex(adr.getBase())) {
 424                 if (needsRex(adr.getIndex())) {
 425                     emitByte(Prefix.REXXB);
 426                 } else {
 427                     emitByte(Prefix.REXB);
 428                 }
 429             } else {
 430                 if (needsRex(adr.getIndex())) {
 431                     emitByte(Prefix.REXX);
 432                 } else if (byteinst && reg.encoding >= 4) {
 433                     emitByte(Prefix.REX);
 434                 }
 435             }
 436         } else {
 437             if (needsRex(adr.getBase())) {
 438                 if (needsRex(adr.getIndex())) {
 439                     emitByte(Prefix.REXRXB);
 440                 } else {
 441                     emitByte(Prefix.REXRB);
 442                 }
 443             } else {
 444                 if (needsRex(adr.getIndex())) {
 445                     emitByte(Prefix.REXRX);
 446                 } else {
 447                     emitByte(Prefix.REXR);
 448                 }
 449             }
 450         }
 451     }
 452 
 453     protected void prefixq(AMD64Address adr, Register src) {
 454         if (src.encoding < 8) {
 455             if (needsRex(adr.getBase())) {
 456                 if (needsRex(adr.getIndex())) {
 457                     emitByte(Prefix.REXWXB);
 458                 } else {
 459                     emitByte(Prefix.REXWB);
 460                 }
 461             } else {
 462                 if (needsRex(adr.getIndex())) {
 463                     emitByte(Prefix.REXWX);
 464                 } else {
 465                     emitByte(Prefix.REXW);
 466                 }
 467             }
 468         } else {
 469             if (needsRex(adr.getBase())) {
 470                 if (needsRex(adr.getIndex())) {
 471                     emitByte(Prefix.REXWRXB);
 472                 } else {
 473                     emitByte(Prefix.REXWRB);
 474                 }
 475             } else {
 476                 if (needsRex(adr.getIndex())) {
 477                     emitByte(Prefix.REXWRX);
 478                 } else {
 479                     emitByte(Prefix.REXWR);
 480                 }
 481             }
 482         }
 483     }
 484 
 485     /**
 486      * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a
 487      * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm
 488      * field. The X bit must be 0.
 489      */
 490     protected static int getRXB(Register reg, Register rm) {
 491         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
 492         rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3;
 493         return rxb;
 494     }
 495 
 496     /**
 497      * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There
 498      * are two cases for the memory operand:<br>
 499      * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0.
 500      * <br>
 501      * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base.
 502      */
 503     protected static int getRXB(Register reg, AMD64Address rm) {
 504         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
 505         if (!rm.getIndex().equals(Register.None)) {
 506             rxb |= (rm.getIndex().encoding & 0x08) >> 2;
 507         }
 508         if (!rm.getBase().equals(Register.None)) {
 509             rxb |= (rm.getBase().encoding & 0x08) >> 3;
 510         }
 511         return rxb;
 512     }
 513 
 514     /**
 515      * Emit the ModR/M byte for one register operand and an opcode extension in the R field.
 516      * <p>
 517      * Format: [ 11 reg r/m ]
 518      */
 519     protected final void emitModRM(int reg, Register rm) {
 520         assert (reg & 0x07) == reg;
 521         emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07));
 522     }
 523 
 524     /**
 525      * Emit the ModR/M byte for two register operands.
 526      * <p>
 527      * Format: [ 11 reg r/m ]
 528      */
 529     protected final void emitModRM(Register reg, Register rm) {
 530         emitModRM(reg.encoding & 0x07, rm);
 531     }
 532 
 533     /**
 534      * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand.
 535      *
 536      * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
 537      */
 538     protected final void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
 539         assert !reg.equals(Register.None);
 540         emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize, 1);
 541     }
 542 
 543     protected final void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) {
 544         emitOperandHelper(reg, addr, false, additionalInstructionSize, 1);
 545     }
 546 
 547     protected final void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) {
 548         assert !reg.equals(Register.None);
 549         emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, 1);
 550     }
 551 
 552     protected final void emitEVEXOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize, int evexDisp8Scale) {
 553         assert !reg.equals(Register.None);
 554         emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, evexDisp8Scale);
 555     }
 556 
 557     /**
 558      * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode
 559      * extension in the R field.
 560      *
 561      * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
 562      * @param additionalInstructionSize the number of bytes that will be emitted after the operand,
 563      *            so that the start position of the next instruction can be computed even though
 564      *            this instruction has not been completely emitted yet.
 565      * @param evexDisp8Scale the scaling factor for computing the compressed displacement of
 566      *            EVEX-encoded instructions. This scaling factor only matters when the emitted
 567      *            instruction uses one-byte-displacement form.
 568      */
 569     private void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize, int evexDisp8Scale) {
 570         assert (reg & 0x07) == reg;
 571         int regenc = reg << 3;
 572 
 573         Register base = addr.getBase();
 574         Register index = addr.getIndex();
 575 
 576         Scale scale = addr.getScale();
 577         int disp = addr.getDisplacement();
 578 
 579         if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder()
 580             // [00 000 101] disp32
 581             assert index.equals(Register.None) : "cannot use RIP relative addressing with index register";
 582             emitByte(0x05 | regenc);
 583             if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) {
 584                 codePatchingAnnotationConsumer.accept(new AddressDisplacementAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize));
 585             }
 586             emitInt(disp);
 587         } else if (base.isValid()) {
 588             boolean overriddenForce4Byte = force4Byte;
 589             int baseenc = base.isValid() ? encode(base) : 0;
 590 
 591             if (index.isValid()) {
 592                 int indexenc = encode(index) << 3;
 593                 // [base + indexscale + disp]
 594                 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
 595                     // [base + indexscale]
 596                     // [00 reg 100][ss index base]
 597                     assert !index.equals(rsp) : "illegal addressing mode";
 598                     emitByte(0x04 | regenc);
 599                     emitByte(scale.log2 << 6 | indexenc | baseenc);
 600                 } else {
 601                     if (evexDisp8Scale > 1 && !overriddenForce4Byte) {
 602                         if (disp % evexDisp8Scale == 0) {
 603                             int newDisp = disp / evexDisp8Scale;
 604                             if (isByte(newDisp)) {
 605                                 disp = newDisp;
 606                                 assert isByte(disp) && !overriddenForce4Byte;
 607                             }
 608                         } else {
 609                             overriddenForce4Byte = true;
 610                         }
 611                     }
 612                     if (isByte(disp) && !overriddenForce4Byte) {
 613                         // [base + indexscale + imm8]
 614                         // [01 reg 100][ss index base] imm8
 615                         assert !index.equals(rsp) : "illegal addressing mode";
 616                         emitByte(0x44 | regenc);
 617                         emitByte(scale.log2 << 6 | indexenc | baseenc);
 618                         emitByte(disp & 0xFF);
 619                     } else {
 620                         // [base + indexscale + disp32]
 621                         // [10 reg 100][ss index base] disp32
 622                         assert !index.equals(rsp) : "illegal addressing mode";
 623                         emitByte(0x84 | regenc);
 624                         emitByte(scale.log2 << 6 | indexenc | baseenc);
 625                         emitInt(disp);
 626                     }
 627                 }
 628             } else if (base.equals(rsp) || base.equals(r12)) {
 629                 // [rsp + disp]
 630                 if (disp == 0) {
 631                     // [rsp]
 632                     // [00 reg 100][00 100 100]
 633                     emitByte(0x04 | regenc);
 634                     emitByte(0x24);
 635                 } else {
 636                     if (evexDisp8Scale > 1 && !overriddenForce4Byte) {
 637                         if (disp % evexDisp8Scale == 0) {
 638                             int newDisp = disp / evexDisp8Scale;
 639                             if (isByte(newDisp)) {
 640                                 disp = newDisp;
 641                                 assert isByte(disp) && !overriddenForce4Byte;
 642                             }
 643                         } else {
 644                             overriddenForce4Byte = true;
 645                         }
 646                     }
 647                     if (isByte(disp) && !overriddenForce4Byte) {
 648                         // [rsp + imm8]
 649                         // [01 reg 100][00 100 100] disp8
 650                         emitByte(0x44 | regenc);
 651                         emitByte(0x24);
 652                         emitByte(disp & 0xFF);
 653                     } else {
 654                         // [rsp + imm32]
 655                         // [10 reg 100][00 100 100] disp32
 656                         emitByte(0x84 | regenc);
 657                         emitByte(0x24);
 658                         emitInt(disp);
 659                     }
 660                 }
 661             } else {
 662                 // [base + disp]
 663                 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode";
 664                 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
 665                     // [base]
 666                     // [00 reg base]
 667                     emitByte(0x00 | regenc | baseenc);
 668                 } else {
 669                     if (evexDisp8Scale > 1 && !overriddenForce4Byte) {
 670                         if (disp % evexDisp8Scale == 0) {
 671                             int newDisp = disp / evexDisp8Scale;
 672                             if (isByte(newDisp)) {
 673                                 disp = newDisp;
 674                                 assert isByte(disp) && !overriddenForce4Byte;
 675                             }
 676                         } else {
 677                             overriddenForce4Byte = true;
 678                         }
 679                     }
 680                     if (isByte(disp) && !overriddenForce4Byte) {
 681                         // [base + disp8]
 682                         // [01 reg base] disp8
 683                         emitByte(0x40 | regenc | baseenc);
 684                         emitByte(disp & 0xFF);
 685                     } else {
 686                         // [base + disp32]
 687                         // [10 reg base] disp32
 688                         emitByte(0x80 | regenc | baseenc);
 689                         emitInt(disp);
 690                     }
 691                 }
 692             }
 693         } else {
 694             if (index.isValid()) {
 695                 int indexenc = encode(index) << 3;
 696                 // [indexscale + disp]
 697                 // [00 reg 100][ss index 101] disp32
 698                 assert !index.equals(rsp) : "illegal addressing mode";
 699                 emitByte(0x04 | regenc);
 700                 emitByte(scale.log2 << 6 | indexenc | 0x05);
 701                 emitInt(disp);
 702             } else {
 703                 // [disp] ABSOLUTE
 704                 // [00 reg 100][00 100 101] disp32
 705                 emitByte(0x04 | regenc);
 706                 emitByte(0x25);
 707                 emitInt(disp);
 708             }
 709         }
 710     }
 711 
 712     private interface SIMDEncoder {
 713 
 714         void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW);
 715 
 716         void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW);
 717 
 718     }
 719 
 720     private class SSEEncoderImpl implements SIMDEncoder {
 721 
 722         @Override
 723         public void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
 724             if (sizePrefix > 0) {
 725                 emitByte(sizePrefix);
 726             }
 727             if (isRexW) {
 728                 prefixq(adr, xreg);
 729             } else {
 730                 prefix(adr, xreg);
 731             }
 732             if (opcodeEscapePrefix > 0xFF) {
 733                 emitShort(opcodeEscapePrefix);
 734             } else if (opcodeEscapePrefix > 0) {
 735                 emitByte(opcodeEscapePrefix);
 736             }
 737         }
 738 
 739         @Override
 740         public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
 741             if (sizePrefix > 0) {
 742                 emitByte(sizePrefix);
 743             }
 744             if (isRexW) {
 745                 prefixq(dst, src);
 746             } else {
 747                 prefix(dst, src);
 748             }
 749             if (opcodeEscapePrefix > 0xFF) {
 750                 emitShort(opcodeEscapePrefix);
 751             } else if (opcodeEscapePrefix > 0) {
 752                 emitByte(opcodeEscapePrefix);
 753             }
 754         }
 755     }
 756 
 757     public static final class VEXPrefixConfig {
 758         public static final int L128 = 0;
 759         public static final int L256 = 1;
 760         public static final int LZ = 0;
 761 
 762         public static final int W0 = 0;
 763         public static final int W1 = 1;
 764         public static final int WIG = 0;
 765 
 766         public static final int P_ = 0x0;
 767         public static final int P_66 = 0x1;
 768         public static final int P_F3 = 0x2;
 769         public static final int P_F2 = 0x3;
 770 
 771         public static final int M_0F = 0x1;
 772         public static final int M_0F38 = 0x2;
 773         public static final int M_0F3A = 0x3;
 774 
 775         private VEXPrefixConfig() {
 776         }
 777     }
 778 
 779     private class VEXEncoderImpl implements SIMDEncoder {
 780 
 781         private int sizePrefixToPP(int sizePrefix) {
 782             switch (sizePrefix) {
 783                 case 0x66:
 784                     return P_66;
 785                 case 0xF2:
 786                     return P_F2;
 787                 case 0xF3:
 788                     return P_F3;
 789                 default:
 790                     return P_;
 791             }
 792         }
 793 
 794         private int opcodeEscapePrefixToMMMMM(int opcodeEscapePrefix) {
 795             switch (opcodeEscapePrefix) {
 796                 case 0x0F:
 797                     return M_0F;
 798                 case 0x380F:
 799                     return M_0F38;
 800                 case 0x3A0F:
 801                     return M_0F3A;
 802                 default:
 803                     return 0;
 804             }
 805         }
 806 
 807         @Override
 808         public void simdPrefix(Register reg, Register nds, AMD64Address rm, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
 809             assert reg.encoding < 16 : "encoding out of range: " + reg.encoding;
 810             assert nds.encoding < 16 : "encoding out of range: " + nds.encoding;
 811             emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(reg, rm), nds.isValid() ? nds.encoding : 0, true);
 812         }
 813 
 814         @Override
 815         public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
 816             assert dst.encoding < 16 : "encoding out of range: " + dst.encoding;
 817             assert src.encoding < 16 : "encoding out of range: " + src.encoding;
 818             assert nds.encoding < 16 : "encoding out of range: " + nds.encoding;
 819             emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(dst, src), nds.isValid() ? nds.encoding : 0, true);
 820         }
 821     }
 822 
 823     protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) {
 824         simdEncoder.simdPrefix(xreg, nds, adr, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW);
 825     }
 826 
 827     protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int opcodeEscapePrefix, boolean isRexW) {
 828         simdEncoder.simdPrefix(xreg, nds, adr, size.sizePrefix, opcodeEscapePrefix, isRexW);
 829     }
 830 
 831     protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) {
 832         simdEncoder.simdPrefix(dst, nds, src, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW);
 833     }
 834 
 835     protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int opcodeEscapePrefix, boolean isRexW) {
 836         simdEncoder.simdPrefix(dst, nds, src, size.sizePrefix, opcodeEscapePrefix, isRexW);
 837     }
 838 
 839  // @formatter:off
 840  //
 841  // Instruction Format and VEX illustrated below (optional []):
 842  //
 843  // #of bytes:    2,3      1       1       1       1,2,4       1
 844  // [Prefixes]    VEX   OpCode   ModR/M  [SIB]   [Disp8*N] [Immediate]
 845  //                                             [Disp16,32]
 846  //
 847  // VEX: 0xC4 | P1 | P2
 848  //
 849  //     7   6   5   4   3   2   1   0
 850  // P1  R   X   B   m   m   m   m   m      P[ 7:0]
 851  // P2  W   v   v   v   v   L   p   p      P[15:8]
 852  //
 853  // VEX: 0xC5 | B1
 854  //
 855  //     7   6   5   4   3   2   1   0
 856  // P1  R   v   v   v   v   L   p   p      P[7:0]
 857  //
 858  // Figure. Bit Field Layout of the VEX Prefix
 859  //
 860  // Table. VEX Prefix Bit Field Functional Grouping
 861  //
 862  // Notation        Bit field Group        Position        Comment
 863  // ----------  -------------------------  --------  -------------------
 864  // VEX.RXB     Next-8 register specifier  P[7:5]    Combine with ModR/M.reg, ModR/M.rm (base, index/vidx).
 865  // VEX.R       REX.R inverse              P[7]      Combine with EVEX.R and ModR/M.reg.
 866  // VEX.X       REX.X inverse              P[6]      Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent.
 867  // VEX.B       REX.B inverse              P[5]
 868  // VEX.mmmmmm  0F, 0F_38, 0F_3A encoding  P[4:0]    b01/0x0F, b10/0F_38, b11/0F_3A (all other reserved)
 869  //
 870  // VEX.W       Opcode specific            P[15]
 871  // VEX.vvvv    A register specifier       P[14:11]  In inverse form, b1111 if not used.
 872  //                                        P[6:3]
 873  // VEX.L       Vector length/RC           P[10]     b0/scalar or 128b vec, b1/256b vec.
 874  //                                        P[2]
 875  // VEX.pp      Compressed legacy prefix   P[9:8]    b00/None, b01/0x66, b10/0xF3, b11/0xF2
 876  //                                        P[1:0]
 877  // @formatter:on
 878 
 879     /**
 880      * Low-level function to encode and emit the VEX prefix.
 881      * <p>
 882      * 2 byte form: [1100 0101] [R vvvv L pp]<br>
 883      * 3 byte form: [1100 0100] [RXB m-mmmm] [W vvvv L pp]
 884      * <p>
 885      * The RXB and vvvv fields are stored in 1's complement in the prefix encoding. This function
 886      * performs the 1s complement conversion, the caller is expected to pass plain unencoded
 887      * arguments.
 888      * <p>
 889      * The pp field encodes an extension to the opcode:<br>
 890      * 00: no extension<br>
 891      * 01: 66<br>
 892      * 10: F3<br>
 893      * 11: F2
 894      * <p>
 895      * The m-mmmm field encodes the leading bytes of the opcode:<br>
 896      * 00001: implied 0F leading opcode byte (default in 2-byte encoding)<br>
 897      * 00010: implied 0F 38 leading opcode bytes<br>
 898      * 00011: implied 0F 3A leading opcode bytes
 899      * <p>
 900      * This function automatically chooses the 2 or 3 byte encoding, based on the XBW flags and the
 901      * m-mmmm field.
 902      */
 903     protected final void emitVEX(int l, int pp, int mmmmm, int w, int rxb, int vvvv, boolean checkAVX) {
 904         assert !checkAVX || ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX) : "emitting VEX prefix on a CPU without AVX support";
 905 
 906         assert l == L128 || l == L256 : "invalid value for VEX.L";
 907         assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for VEX.pp";
 908         assert mmmmm == M_0F || mmmmm == M_0F38 || mmmmm == M_0F3A : "invalid value for VEX.m-mmmm";
 909         assert w == W0 || w == W1 : "invalid value for VEX.W";
 910 
 911         assert (rxb & 0x07) == rxb : "invalid value for VEX.RXB";
 912         assert (vvvv & 0x0F) == vvvv : "invalid value for VEX.vvvv";
 913 
 914         int rxb1s = rxb ^ 0x07;
 915         int vvvv1s = vvvv ^ 0x0F;
 916         if ((rxb & 0x03) == 0 && w == WIG && mmmmm == M_0F) {
 917             // 2 byte encoding
 918             int byte2 = 0;
 919             byte2 |= (rxb1s & 0x04) << 5;
 920             byte2 |= vvvv1s << 3;
 921             byte2 |= l << 2;
 922             byte2 |= pp;
 923 
 924             emitByte(Prefix.VEX2);
 925             emitByte(byte2);
 926         } else {
 927             // 3 byte encoding
 928             int byte2 = 0;
 929             byte2 = (rxb1s & 0x07) << 5;
 930             byte2 |= mmmmm;
 931 
 932             int byte3 = 0;
 933             byte3 |= w << 7;
 934             byte3 |= vvvv1s << 3;
 935             byte3 |= l << 2;
 936             byte3 |= pp;
 937 
 938             emitByte(Prefix.VEX3);
 939             emitByte(byte2);
 940             emitByte(byte3);
 941         }
 942     }
 943 
 944     public static int getLFlag(AVXSize size) {
 945         switch (size) {
 946             case XMM:
 947                 return L128;
 948             case YMM:
 949                 return L256;
 950             case ZMM:
 951                 return L512;
 952             default:
 953                 return LZ;
 954         }
 955     }
 956 
 957     public final void vexPrefix(Register dst, Register nds, Register src, AVXSize size, int pp, int mmmmm, int w, boolean checkAVX) {
 958         emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0, checkAVX);
 959     }
 960 
 961     public final void vexPrefix(Register dst, Register nds, AMD64Address src, AVXSize size, int pp, int mmmmm, int w, boolean checkAVX) {
 962         emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0, checkAVX);
 963     }
 964 
 965     protected static final class EVEXPrefixConfig {
 966         public static final int L512 = 2;
 967         public static final int LIG = 0;
 968 
 969         public static final int Z0 = 0x0;
 970         public static final int Z1 = 0x1;
 971 
 972         public static final int B0 = 0x0;
 973         public static final int B1 = 0x1;
 974 
 975         private EVEXPrefixConfig() {
 976         }
 977     }
 978 
 979     private static final int NOT_SUPPORTED_VECTOR_LENGTH = -1;
 980 
 981     /**
 982      * EVEX-encoded instructions use a compressed displacement scheme by multiplying disp8 with a
 983      * scaling factor N depending on the tuple type and the vector length.
 984      *
 985      * Reference: Intel Software Developer's Manual Volume 2, Section 2.6.5
 986      */
 987     protected enum EVEXTuple {
 988         FV_NO_BROADCAST_32BIT(16, 32, 64),
 989         FV_BROADCAST_32BIT(4, 4, 4),
 990         FV_NO_BROADCAST_64BIT(16, 32, 64),
 991         FV_BROADCAST_64BIT(8, 8, 8),
 992         HV_NO_BROADCAST_32BIT(8, 16, 32),
 993         HV_BROADCAST_32BIT(4, 4, 4),
 994         FVM(16, 32, 64),
 995         T1S_8BIT(1, 1, 1),
 996         T1S_16BIT(2, 2, 2),
 997         T1S_32BIT(4, 4, 4),
 998         T1S_64BIT(8, 8, 8),
 999         T1F_32BIT(4, 4, 4),
1000         T1F_64BIT(8, 8, 8),
1001         T2_32BIT(8, 8, 8),
1002         T2_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16),
1003         T4_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16),
1004         T4_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32),
1005         T8_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32),
1006         HVM(8, 16, 32),
1007         QVM(4, 8, 16),
1008         OVM(2, 4, 8),
1009         M128(16, 16, 16),
1010         DUP(8, 32, 64);
1011 
1012         private final int scalingFactorVL128;
1013         private final int scalingFactorVL256;
1014         private final int scalingFactorVL512;
1015 
1016         EVEXTuple(int scalingFactorVL128, int scalingFactorVL256, int scalingFactorVL512) {
1017             this.scalingFactorVL128 = scalingFactorVL128;
1018             this.scalingFactorVL256 = scalingFactorVL256;
1019             this.scalingFactorVL512 = scalingFactorVL512;
1020         }
1021 
1022         private static int verifyScalingFactor(int scalingFactor) {
1023             if (scalingFactor == NOT_SUPPORTED_VECTOR_LENGTH) {
1024                 throw GraalError.shouldNotReachHere("Invalid scaling factor.");
1025             }
1026             return scalingFactor;
1027         }
1028 
1029         public int getDisp8ScalingFactor(AVXSize size) {
1030             switch (size) {
1031                 case XMM:
1032                     return verifyScalingFactor(scalingFactorVL128);
1033                 case YMM:
1034                     return verifyScalingFactor(scalingFactorVL256);
1035                 case ZMM:
1036                     return verifyScalingFactor(scalingFactorVL512);
1037                 default:
1038                     throw GraalError.shouldNotReachHere("Unsupported vector size.");
1039             }
1040         }
1041     }
1042 
1043  // @formatter:off
1044  //
1045  // Instruction Format and EVEX illustrated below (optional []):
1046  //
1047  // #of bytes:      4       1       1       1       1,2,4       1
1048  // [Prefixes]    EVEX   OpCode   ModR/M  [SIB]   [Disp8*N] [Immediate]
1049  //                                              [Disp16,32]
1050  //
1051  // The EVEX prefix is a 4-byte prefix, with the first two bytes derived from unused encoding
1052  // form of the 32-bit-mode-only BOUND instruction. The layout of the EVEX prefix is shown in
1053  // the figure below. The first byte must be 0x62, followed by three pay-load bytes, denoted
1054  // as P1, P2, and P3 individually or collectively as P[23:0] (see below).
1055  //
1056  // EVEX: 0x62 | P1 | P2 | P3
1057  //
1058  //     7   6   5   4   3   2   1   0
1059  // P1  R   X   B   R'  0   0   m   m      P[ 7: 0]
1060  // P2  W   v   v   v   v   1   p   p      P[15: 8]
1061  // P3  z   L'  L   b   V'  a   a   a      P[23:16]
1062  //
1063  // Figure. Bit Field Layout of the EVEX Prefix
1064  //
1065  // Table. EVEX Prefix Bit Field Functional Grouping
1066  //
1067  // Notation        Bit field Group        Position        Comment
1068  // ---------  --------------------------  --------  -----------------------
1069  // EVEX.RXB   Next-8 register specifier   P[7:5]    Combine with ModR/M.reg, ModR/M.rm (base, index/vidx).
1070  // EVEX.X     High-16 register specifier  P[6]      Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent.
1071  // EVEX.R'    High-16 register specifier  P[4]      Combine with EVEX.R and ModR/M.reg.
1072  // --         Reserved                    P[3:2]    Must be 0.
1073  // EVEX.mm    Compressed legacy escape    P[1:0]    Identical to low two bits of VEX.mmmmm.
1074  //
1075  // EVEX.W     Osize promotion/Opcode ext  P[15]
1076  // EVEX.vvvv  NDS register specifier      P[14:11]  Same as VEX.vvvv.
1077  // --         Fixed Value                 P[10]     Must be 1.
1078  // EVEX.pp    Compressed legacy prefix    P[9:8]    Identical to VEX.pp.
1079  //
1080  // EVEX.z     Zeroing/Merging             P[23]
1081  // EVEX.L'L   Vector length/RC            P[22:21]
1082  // EVEX.b     Broadcast/RC/SAE Context    P[20]
1083  // EVEX.V'    High-16 NDS/VIDX register   P[19]     Combine with EVEX.vvvv or VSIB when present.
1084  // EVEX.aaa   Embedded opmask register    P[18:16]
1085  //
1086  // @formatter:on
1087 
1088     /**
1089      * Low-level function to encode and emit the EVEX prefix.
1090      * <p>
1091      * 62 [0 1 1 0 0 0 1 0]<br>
1092      * P1 [R X B R'0 0 m m]<br>
1093      * P2 [W v v v v 1 p p]<br>
1094      * P3 [z L'L b V'a a a]
1095      * <p>
1096      * The pp field encodes an extension to the opcode:<br>
1097      * 00: no extension<br>
1098      * 01: 66<br>
1099      * 10: F3<br>
1100      * 11: F2
1101      * <p>
1102      * The mm field encodes the leading bytes of the opcode:<br>
1103      * 01: implied 0F leading opcode byte<br>
1104      * 10: implied 0F 38 leading opcode bytes<br>
1105      * 11: implied 0F 3A leading opcode bytes
1106      * <p>
1107      * The z field encodes the merging mode (merge or zero).
1108      * <p>
1109      * The b field encodes the source broadcast or data rounding modes.
1110      * <p>
1111      * The aaa field encodes the operand mask register.
1112      */
1113     private void emitEVEX(int l, int pp, int mm, int w, int rxb, int reg, int vvvvv, int z, int b, int aaa) {
1114         assert ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX512F) : "emitting EVEX prefix on a CPU without AVX512 support";
1115 
1116         assert l == L128 || l == L256 || l == L512 : "invalid value for EVEX.L'L";
1117         assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for EVEX.pp";
1118         assert mm == M_0F || mm == M_0F38 || mm == M_0F3A : "invalid value for EVEX.mm";
1119         assert w == W0 || w == W1 : "invalid value for EVEX.W";
1120 
1121         assert (rxb & 0x07) == rxb : "invalid value for EVEX.RXB";
1122         assert (reg & 0x1F) == reg : "invalid value for EVEX.R'";
1123         assert (vvvvv & 0x1F) == vvvvv : "invalid value for EVEX.V'vvvv";
1124 
1125         assert z == Z0 || z == Z1 : "invalid value for EVEX.z";
1126         assert b == B0 || b == B1 : "invalid value for EVEX.b";
1127         assert (aaa & 0x07) == aaa : "invalid value for EVEX.aaa";
1128 
1129         emitByte(Prefix.EVEX);
1130         int p1 = 0;
1131         p1 |= ((rxb ^ 0x07) & 0x07) << 5;
1132         p1 |= reg < 16 ? 0x10 : 0;
1133         p1 |= mm;
1134         emitByte(p1);
1135 
1136         int p2 = 0;
1137         p2 |= w << 7;
1138         p2 |= ((vvvvv ^ 0x0F) & 0x0F) << 3;
1139         p2 |= 0x04;
1140         p2 |= pp;
1141         emitByte(p2);
1142 
1143         int p3 = 0;
1144         p3 |= z << 7;
1145         p3 |= l << 5;
1146         p3 |= b << 4;
1147         p3 |= vvvvv < 16 ? 0x08 : 0;
1148         p3 |= aaa;
1149         emitByte(p3);
1150     }
1151 
1152     /**
1153      * Get RXB bits for register-register instructions in EVEX-encoding, where ModRM.rm contains a
1154      * register index. The R bit extends the ModRM.reg field and the X and B bits extends the
1155      * ModRM.rm field.
1156      */
1157     private static int getRXBForEVEX(Register reg, Register rm) {
1158         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
1159         rxb |= (rm == null ? 0 : rm.encoding & 0x018) >> 3;
1160         return rxb;
1161     }
1162 
1163     /**
1164      * Helper method for emitting EVEX prefix in the form of RRRR.
1165      */
1166     protected final void evexPrefix(Register dst, Register mask, Register nds, Register src, AVXSize size, int pp, int mm, int w, int z, int b) {
1167         assert !mask.isValid() || inRC(MASK, mask);
1168         emitEVEX(getLFlag(size), pp, mm, w, getRXBForEVEX(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0);
1169     }
1170 
1171     /**
1172      * Helper method for emitting EVEX prefix in the form of RRRM. Because the memory addressing in
1173      * EVEX-encoded instructions employ a compressed displacement scheme when using disp8 form, the
1174      * user of this API should make sure to encode the operands using
1175      * {@link #emitEVEXOperandHelper(Register, AMD64Address, int, int)}.
1176      */
1177     protected final void evexPrefix(Register dst, Register mask, Register nds, AMD64Address src, AVXSize size, int pp, int mm, int w, int z, int b) {
1178         assert !mask.isValid() || inRC(MASK, mask);
1179         emitEVEX(getLFlag(size), pp, mm, w, getRXB(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0);
1180     }
1181 
1182 }