1 /*
   2  * Copyright (c) 2009, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 package org.graalvm.compiler.asm.amd64;
  24 
  25 import static jdk.vm.ci.amd64.AMD64.CPU;
  26 import static jdk.vm.ci.amd64.AMD64.XMM;
  27 import static jdk.vm.ci.amd64.AMD64.r12;
  28 import static jdk.vm.ci.amd64.AMD64.r13;
  29 import static jdk.vm.ci.amd64.AMD64.rbp;
  30 import static jdk.vm.ci.amd64.AMD64.rip;
  31 import static jdk.vm.ci.amd64.AMD64.rsp;
  32 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD;
  33 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop;
  34 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop;
  35 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD;
  36 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND;
  37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP;
  38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR;
  39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB;
  40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB;
  41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR;
  42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC;
  43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC;
  44 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG;
  45 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT;
  46 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.BYTE;
  47 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.DWORD;
  48 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PD;
  49 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PS;
  50 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.QWORD;
  51 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SD;
  52 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SS;
  53 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.WORD;
  54 import static org.graalvm.compiler.core.common.NumUtil.isByte;
  55 import static org.graalvm.compiler.core.common.NumUtil.isInt;
  56 import static org.graalvm.compiler.core.common.NumUtil.isShiftCount;
  57 import static org.graalvm.compiler.core.common.NumUtil.isUByte;
  58 
  59 import org.graalvm.compiler.asm.Assembler;
  60 import org.graalvm.compiler.asm.Label;
  61 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
  62 import org.graalvm.compiler.core.common.NumUtil;
  63 import org.graalvm.compiler.debug.GraalError;
  64 
  65 import jdk.vm.ci.amd64.AMD64;
  66 import jdk.vm.ci.amd64.AMD64.CPUFeature;
  67 import jdk.vm.ci.amd64.AMD64Kind;
  68 import jdk.vm.ci.code.Register;
  69 import jdk.vm.ci.code.Register.RegisterCategory;
  70 import jdk.vm.ci.code.TargetDescription;
  71 import jdk.vm.ci.meta.PlatformKind;
  72 
  73 /**
  74  * This class implements an assembler that can encode most X86 instructions.
  75  */
  76 public class AMD64Assembler extends Assembler {
  77 
  78     private static final int MinEncodingNeedsRex = 8;
  79 
  80     /**
  81      * The x86 condition codes used for conditional jumps/moves.
  82      */
  83     public enum ConditionFlag {
  84         Zero(0x4, "|zero|"),
  85         NotZero(0x5, "|nzero|"),
  86         Equal(0x4, "="),
  87         NotEqual(0x5, "!="),
  88         Less(0xc, "<"),
  89         LessEqual(0xe, "<="),
  90         Greater(0xf, ">"),
  91         GreaterEqual(0xd, ">="),
  92         Below(0x2, "|<|"),
  93         BelowEqual(0x6, "|<=|"),
  94         Above(0x7, "|>|"),
  95         AboveEqual(0x3, "|>=|"),
  96         Overflow(0x0, "|of|"),
  97         NoOverflow(0x1, "|nof|"),
  98         CarrySet(0x2, "|carry|"),
  99         CarryClear(0x3, "|ncarry|"),
 100         Negative(0x8, "|neg|"),
 101         Positive(0x9, "|pos|"),
 102         Parity(0xa, "|par|"),
 103         NoParity(0xb, "|npar|");
 104 
 105         private final int value;
 106         private final String operator;
 107 
 108         ConditionFlag(int value, String operator) {
 109             this.value = value;
 110             this.operator = operator;
 111         }
 112 
 113         public ConditionFlag negate() {
 114             switch (this) {
 115                 case Zero:
 116                     return NotZero;
 117                 case NotZero:
 118                     return Zero;
 119                 case Equal:
 120                     return NotEqual;
 121                 case NotEqual:
 122                     return Equal;
 123                 case Less:
 124                     return GreaterEqual;
 125                 case LessEqual:
 126                     return Greater;
 127                 case Greater:
 128                     return LessEqual;
 129                 case GreaterEqual:
 130                     return Less;
 131                 case Below:
 132                     return AboveEqual;
 133                 case BelowEqual:
 134                     return Above;
 135                 case Above:
 136                     return BelowEqual;
 137                 case AboveEqual:
 138                     return Below;
 139                 case Overflow:
 140                     return NoOverflow;
 141                 case NoOverflow:
 142                     return Overflow;
 143                 case CarrySet:
 144                     return CarryClear;
 145                 case CarryClear:
 146                     return CarrySet;
 147                 case Negative:
 148                     return Positive;
 149                 case Positive:
 150                     return Negative;
 151                 case Parity:
 152                     return NoParity;
 153                 case NoParity:
 154                     return Parity;
 155             }
 156             throw new IllegalArgumentException();
 157         }
 158 
 159         public int getValue() {
 160             return value;
 161         }
 162 
 163         @Override
 164         public String toString() {
 165             return operator;
 166         }
 167     }
 168 
 169     /**
 170      * Constants for X86 prefix bytes.
 171      */
 172     private static class Prefix {
 173         private static final int REX = 0x40;
 174         private static final int REXB = 0x41;
 175         private static final int REXX = 0x42;
 176         private static final int REXXB = 0x43;
 177         private static final int REXR = 0x44;
 178         private static final int REXRB = 0x45;
 179         private static final int REXRX = 0x46;
 180         private static final int REXRXB = 0x47;
 181         private static final int REXW = 0x48;
 182         private static final int REXWB = 0x49;
 183         private static final int REXWX = 0x4A;
 184         private static final int REXWXB = 0x4B;
 185         private static final int REXWR = 0x4C;
 186         private static final int REXWRB = 0x4D;
 187         private static final int REXWRX = 0x4E;
 188         private static final int REXWRXB = 0x4F;
 189         private static final int VEX_3BYTES = 0xC4;
 190         private static final int VEX_2BYTES = 0xC5;
 191     }
 192 
 193     private static class VexPrefix {
 194         private static final int VEX_R = 0x80;
 195         private static final int VEX_W = 0x80;
 196     }
 197 
 198     private static class AvxVectorLen {
 199         private static final int AVX_128bit = 0x0;
 200         private static final int AVX_256bit = 0x1;
 201     }
 202 
 203     private static class VexSimdPrefix {
 204         private static final int VEX_SIMD_NONE = 0x0;
 205         private static final int VEX_SIMD_66 = 0x1;
 206         private static final int VEX_SIMD_F3 = 0x2;
 207         private static final int VEX_SIMD_F2 = 0x3;
 208     }
 209 
 210     private static class VexOpcode {
 211         private static final int VEX_OPCODE_NONE = 0x0;
 212         private static final int VEX_OPCODE_0F = 0x1;
 213         private static final int VEX_OPCODE_0F_38 = 0x2;
 214         private static final int VEX_OPCODE_0F_3A = 0x3;
 215     }
 216 
 217     private AMD64InstructionAttr curAttributes;
 218 
 219     AMD64InstructionAttr getCurAttributes() {
 220         return curAttributes;
 221     }
 222 
 223     void setCurAttributes(AMD64InstructionAttr attributes) {
 224         curAttributes = attributes;
 225     }
 226 
 227     /**
 228      * The x86 operand sizes.
 229      */
 230     public enum OperandSize {
 231         BYTE(1, AMD64Kind.BYTE) {
 232             @Override
 233             protected void emitImmediate(AMD64Assembler asm, int imm) {
 234                 assert imm == (byte) imm;
 235                 asm.emitByte(imm);
 236             }
 237 
 238             @Override
 239             protected int immediateSize() {
 240                 return 1;
 241             }
 242         },
 243 
 244         WORD(2, AMD64Kind.WORD, 0x66) {
 245             @Override
 246             protected void emitImmediate(AMD64Assembler asm, int imm) {
 247                 assert imm == (short) imm;
 248                 asm.emitShort(imm);
 249             }
 250 
 251             @Override
 252             protected int immediateSize() {
 253                 return 2;
 254             }
 255         },
 256 
 257         DWORD(4, AMD64Kind.DWORD) {
 258             @Override
 259             protected void emitImmediate(AMD64Assembler asm, int imm) {
 260                 asm.emitInt(imm);
 261             }
 262 
 263             @Override
 264             protected int immediateSize() {
 265                 return 4;
 266             }
 267         },
 268 
 269         QWORD(8, AMD64Kind.QWORD) {
 270             @Override
 271             protected void emitImmediate(AMD64Assembler asm, int imm) {
 272                 asm.emitInt(imm);
 273             }
 274 
 275             @Override
 276             protected int immediateSize() {
 277                 return 4;
 278             }
 279         },
 280 
 281         SS(4, AMD64Kind.SINGLE, 0xF3, true),
 282 
 283         SD(8, AMD64Kind.DOUBLE, 0xF2, true),
 284 
 285         PS(16, AMD64Kind.V128_SINGLE, true),
 286 
 287         PD(16, AMD64Kind.V128_DOUBLE, 0x66, true);
 288 
 289         private final int sizePrefix;
 290         private final int bytes;
 291         private final boolean xmm;
 292         private final AMD64Kind kind;
 293 
 294         OperandSize(int bytes, AMD64Kind kind) {
 295             this(bytes, kind, 0);
 296         }
 297 
 298         OperandSize(int bytes, AMD64Kind kind, int sizePrefix) {
 299             this(bytes, kind, sizePrefix, false);
 300         }
 301 
 302         OperandSize(int bytes, AMD64Kind kind, boolean xmm) {
 303             this(bytes, kind, 0, xmm);
 304         }
 305 
 306         OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm) {
 307             this.sizePrefix = sizePrefix;
 308             this.bytes = bytes;
 309             this.kind = kind;
 310             this.xmm = xmm;
 311         }
 312 
 313         public int getBytes() {
 314             return bytes;
 315         }
 316 
 317         public boolean isXmmType() {
 318             return xmm;
 319         }
 320 
 321         public AMD64Kind getKind() {
 322             return kind;
 323         }
 324 
 325         public static OperandSize get(PlatformKind kind) {
 326             for (OperandSize operandSize : OperandSize.values()) {
 327                 if (operandSize.kind.equals(kind)) {
 328                     return operandSize;
 329                 }
 330             }
 331             throw GraalError.shouldNotReachHere("Unexpected kind: " + kind.toString());
 332         }
 333 
 334         /**
 335          * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded
 336          * as sign-extended 32-bit values.
 337          *
 338          * @param asm
 339          * @param imm
 340          */
 341         protected void emitImmediate(AMD64Assembler asm, int imm) {
 342             throw new UnsupportedOperationException();
 343         }
 344 
 345         protected int immediateSize() {
 346             throw new UnsupportedOperationException();
 347         }
 348     }
 349 
 350     /**
 351      * Operand size and register type constraints.
 352      */
 353     private enum OpAssertion {
 354         ByteAssertion(CPU, CPU, BYTE),
 355         IntegerAssertion(CPU, CPU, WORD, DWORD, QWORD),
 356         No16BitAssertion(CPU, CPU, DWORD, QWORD),
 357         No32BitAssertion(CPU, CPU, WORD, QWORD),
 358         QwordOnlyAssertion(CPU, CPU, QWORD),
 359         FloatingAssertion(XMM, XMM, SS, SD, PS, PD),
 360         PackedFloatingAssertion(XMM, XMM, PS, PD),
 361         SingleAssertion(XMM, XMM, SS),
 362         DoubleAssertion(XMM, XMM, SD),
 363         PackedDoubleAssertion(XMM, XMM, PD),
 364         IntToFloatingAssertion(XMM, CPU, DWORD, QWORD),
 365         FloatingToIntAssertion(CPU, XMM, DWORD, QWORD);
 366 
 367         private final RegisterCategory resultCategory;
 368         private final RegisterCategory inputCategory;
 369         private final OperandSize[] allowedSizes;
 370 
 371         OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) {
 372             this.resultCategory = resultCategory;
 373             this.inputCategory = inputCategory;
 374             this.allowedSizes = allowedSizes;
 375         }
 376 
 377         protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) {
 378             assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op;
 379             assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op;
 380 
 381             for (OperandSize s : allowedSizes) {
 382                 if (size == s) {
 383                     return true;
 384                 }
 385             }
 386 
 387             assert false : "invalid operand size " + size + " used in " + op;
 388             return false;
 389         }
 390     }
 391 
 392     public abstract static class OperandDataAnnotation extends CodeAnnotation {
 393         /**
 394          * The position (bytes from the beginning of the method) of the operand.
 395          */
 396         public final int operandPosition;
 397         /**
 398          * The size of the operand, in bytes.
 399          */
 400         public final int operandSize;
 401         /**
 402          * The position (bytes from the beginning of the method) of the next instruction. On AMD64,
 403          * RIP-relative operands are relative to this position.
 404          */
 405         public final int nextInstructionPosition;
 406 
 407         OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) {
 408             super(instructionPosition);
 409 
 410             this.operandPosition = operandPosition;
 411             this.operandSize = operandSize;
 412             this.nextInstructionPosition = nextInstructionPosition;
 413         }
 414 
 415         @Override
 416         public String toString() {
 417             return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize;
 418         }
 419     }
 420 
 421     /**
 422      * Annotation that stores additional information about the displacement of a
 423      * {@link Assembler#getPlaceholder placeholder address} that needs patching.
 424      */
 425     public static class AddressDisplacementAnnotation extends OperandDataAnnotation {
 426         AddressDisplacementAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) {
 427             super(instructionPosition, operandPosition, operndSize, nextInstructionPosition);
 428         }
 429     }
 430 
 431     /**
 432      * Annotation that stores additional information about the immediate operand, e.g., of a call
 433      * instruction, that needs patching.
 434      */
 435     public static class ImmediateOperandAnnotation extends OperandDataAnnotation {
 436         ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) {
 437             super(instructionPosition, operandPosition, operndSize, nextInstructionPosition);
 438         }
 439     }
 440 
 441     /**
 442      * Constructs an assembler for the AMD64 architecture.
 443      */
 444     public AMD64Assembler(TargetDescription target) {
 445         super(target);
 446     }
 447 
 448     public boolean supports(CPUFeature feature) {
 449         return ((AMD64) target.arch).getFeatures().contains(feature);
 450     }
 451 
 452     private static int encode(Register r) {
 453         assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding;
 454         return r.encoding & 0x7;
 455     }
 456 
 457     /**
 458      * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a
 459      * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm
 460      * field. The X bit must be 0.
 461      */
 462     protected static int getRXB(Register reg, Register rm) {
 463         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
 464         rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3;
 465         return rxb;
 466     }
 467 
 468     /**
 469      * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There
 470      * are two cases for the memory operand:<br>
 471      * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0.
 472      * <br>
 473      * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base.
 474      */
 475     protected static int getRXB(Register reg, AMD64Address rm) {
 476         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
 477         if (!rm.getIndex().equals(Register.None)) {
 478             rxb |= (rm.getIndex().encoding & 0x08) >> 2;
 479         }
 480         if (!rm.getBase().equals(Register.None)) {
 481             rxb |= (rm.getBase().encoding & 0x08) >> 3;
 482         }
 483         return rxb;
 484     }
 485 
 486     /**
 487      * Emit the ModR/M byte for one register operand and an opcode extension in the R field.
 488      * <p>
 489      * Format: [ 11 reg r/m ]
 490      */
 491     protected void emitModRM(int reg, Register rm) {
 492         assert (reg & 0x07) == reg;
 493         emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07));
 494     }
 495 
 496     /**
 497      * Emit the ModR/M byte for two register operands.
 498      * <p>
 499      * Format: [ 11 reg r/m ]
 500      */
 501     protected void emitModRM(Register reg, Register rm) {
 502         emitModRM(reg.encoding & 0x07, rm);
 503     }
 504 
 505     protected void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) {
 506         assert !reg.equals(Register.None);
 507         emitOperandHelper(encode(reg), addr, false, additionalInstructionSize);
 508     }
 509 
 510     /**
 511      * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand.
 512      *
 513      * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
 514      */
 515     protected void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
 516         assert !reg.equals(Register.None);
 517         emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize);
 518     }
 519 
 520     protected void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) {
 521         emitOperandHelper(reg, addr, false, additionalInstructionSize);
 522     }
 523 
 524     /**
 525      * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode
 526      * extension in the R field.
 527      *
 528      * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
 529      * @param additionalInstructionSize the number of bytes that will be emitted after the operand,
 530      *            so that the start position of the next instruction can be computed even though
 531      *            this instruction has not been completely emitted yet.
 532      */
 533     protected void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
 534         assert (reg & 0x07) == reg;
 535         int regenc = reg << 3;
 536 
 537         Register base = addr.getBase();
 538         Register index = addr.getIndex();
 539 
 540         AMD64Address.Scale scale = addr.getScale();
 541         int disp = addr.getDisplacement();
 542 
 543         if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder()
 544             // [00 000 101] disp32
 545             assert index.equals(Register.None) : "cannot use RIP relative addressing with index register";
 546             emitByte(0x05 | regenc);
 547             if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) {
 548                 codePatchingAnnotationConsumer.accept(new AddressDisplacementAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize));
 549             }
 550             emitInt(disp);
 551         } else if (base.isValid()) {
 552             int baseenc = base.isValid() ? encode(base) : 0;
 553             if (index.isValid()) {
 554                 int indexenc = encode(index) << 3;
 555                 // [base + indexscale + disp]
 556                 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
 557                     // [base + indexscale]
 558                     // [00 reg 100][ss index base]
 559                     assert !index.equals(rsp) : "illegal addressing mode";
 560                     emitByte(0x04 | regenc);
 561                     emitByte(scale.log2 << 6 | indexenc | baseenc);
 562                 } else if (isByte(disp) && !force4Byte) {
 563                     // [base + indexscale + imm8]
 564                     // [01 reg 100][ss index base] imm8
 565                     assert !index.equals(rsp) : "illegal addressing mode";
 566                     emitByte(0x44 | regenc);
 567                     emitByte(scale.log2 << 6 | indexenc | baseenc);
 568                     emitByte(disp & 0xFF);
 569                 } else {
 570                     // [base + indexscale + disp32]
 571                     // [10 reg 100][ss index base] disp32
 572                     assert !index.equals(rsp) : "illegal addressing mode";
 573                     emitByte(0x84 | regenc);
 574                     emitByte(scale.log2 << 6 | indexenc | baseenc);
 575                     emitInt(disp);
 576                 }
 577             } else if (base.equals(rsp) || base.equals(r12)) {
 578                 // [rsp + disp]
 579                 if (disp == 0) {
 580                     // [rsp]
 581                     // [00 reg 100][00 100 100]
 582                     emitByte(0x04 | regenc);
 583                     emitByte(0x24);
 584                 } else if (isByte(disp) && !force4Byte) {
 585                     // [rsp + imm8]
 586                     // [01 reg 100][00 100 100] disp8
 587                     emitByte(0x44 | regenc);
 588                     emitByte(0x24);
 589                     emitByte(disp & 0xFF);
 590                 } else {
 591                     // [rsp + imm32]
 592                     // [10 reg 100][00 100 100] disp32
 593                     emitByte(0x84 | regenc);
 594                     emitByte(0x24);
 595                     emitInt(disp);
 596                 }
 597             } else {
 598                 // [base + disp]
 599                 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode";
 600                 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
 601                     // [base]
 602                     // [00 reg base]
 603                     emitByte(0x00 | regenc | baseenc);
 604                 } else if (isByte(disp) && !force4Byte) {
 605                     // [base + disp8]
 606                     // [01 reg base] disp8
 607                     emitByte(0x40 | regenc | baseenc);
 608                     emitByte(disp & 0xFF);
 609                 } else {
 610                     // [base + disp32]
 611                     // [10 reg base] disp32
 612                     emitByte(0x80 | regenc | baseenc);
 613                     emitInt(disp);
 614                 }
 615             }
 616         } else {
 617             if (index.isValid()) {
 618                 int indexenc = encode(index) << 3;
 619                 // [indexscale + disp]
 620                 // [00 reg 100][ss index 101] disp32
 621                 assert !index.equals(rsp) : "illegal addressing mode";
 622                 emitByte(0x04 | regenc);
 623                 emitByte(scale.log2 << 6 | indexenc | 0x05);
 624                 emitInt(disp);
 625             } else {
 626                 // [disp] ABSOLUTE
 627                 // [00 reg 100][00 100 101] disp32
 628                 emitByte(0x04 | regenc);
 629                 emitByte(0x25);
 630                 emitInt(disp);
 631             }
 632         }
 633         setCurAttributes(null);
 634     }
 635 
 636     /**
 637      * Base class for AMD64 opcodes.
 638      */
 639     public static class AMD64Op {
 640 
 641         protected static final int P_0F = 0x0F;
 642         protected static final int P_0F38 = 0x380F;
 643         protected static final int P_0F3A = 0x3A0F;
 644 
 645         private final String opcode;
 646 
 647         protected final int prefix1;
 648         protected final int prefix2;
 649         protected final int op;
 650 
 651         private final boolean dstIsByte;
 652         private final boolean srcIsByte;
 653 
 654         private final OpAssertion assertion;
 655         private final CPUFeature feature;
 656 
 657         protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
 658             this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature);
 659         }
 660 
 661         protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
 662             this.opcode = opcode;
 663             this.prefix1 = prefix1;
 664             this.prefix2 = prefix2;
 665             this.op = op;
 666 
 667             this.dstIsByte = dstIsByte;
 668             this.srcIsByte = srcIsByte;
 669 
 670             this.assertion = assertion;
 671             this.feature = feature;
 672         }
 673 
 674         protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) {
 675             if (prefix1 != 0) {
 676                 asm.emitByte(prefix1);
 677             }
 678             if (size.sizePrefix != 0) {
 679                 asm.emitByte(size.sizePrefix);
 680             }
 681             int rexPrefix = 0x40 | rxb;
 682             if (size == QWORD) {
 683                 rexPrefix |= 0x08;
 684             }
 685             if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) {
 686                 asm.emitByte(rexPrefix);
 687             }
 688             if (prefix2 > 0xFF) {
 689                 asm.emitShort(prefix2);
 690             } else if (prefix2 > 0) {
 691                 asm.emitByte(prefix2);
 692             }
 693             asm.emitByte(op);
 694         }
 695 
 696         protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) {
 697             assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode);
 698             assert assertion.checkOperands(this, size, resultReg, inputReg);
 699             return true;
 700         }
 701 
 702         @Override
 703         public String toString() {
 704             return opcode;
 705         }
 706     }
 707 
 708     /**
 709      * Base class for AMD64 opcodes with immediate operands.
 710      */
 711     public static class AMD64ImmOp extends AMD64Op {
 712 
 713         private final boolean immIsByte;
 714 
 715         protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
 716             super(opcode, 0, prefix, op, assertion, null);
 717             this.immIsByte = immIsByte;
 718         }
 719 
 720         protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) {
 721             if (immIsByte) {
 722                 assert imm == (byte) imm;
 723                 asm.emitByte(imm);
 724             } else {
 725                 size.emitImmediate(asm, imm);
 726             }
 727         }
 728 
 729         protected final int immediateSize(OperandSize size) {
 730             if (immIsByte) {
 731                 return 1;
 732             } else {
 733                 return size.bytes;
 734             }
 735         }
 736     }
 737 
 738     /**
 739      * Opcode with operand order of either RM or MR for 2 address forms.
 740      */
 741     public abstract static class AMD64RROp extends AMD64Op {
 742 
 743         protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
 744             super(opcode, prefix1, prefix2, op, assertion, feature);
 745         }
 746 
 747         protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
 748             super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature);
 749         }
 750 
 751         public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src);
 752     }
 753 
 754     /**
 755      * Opcode with operand order of either RM or MR for 3 address forms.
 756      */
 757     public abstract static class AMD64RRROp extends AMD64Op {
 758 
 759         protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
 760             super(opcode, prefix1, prefix2, op, assertion, feature);
 761         }
 762 
 763         protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
 764             super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature);
 765         }
 766 
 767         public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src);
 768     }
 769 
 770     /**
 771      * Opcode with operand order of RM.
 772      */
 773     public static class AMD64RMOp extends AMD64RROp {
 774         // @formatter:off
 775         public static final AMD64RMOp IMUL   = new AMD64RMOp("IMUL",         P_0F, 0xAF);
 776         public static final AMD64RMOp BSF    = new AMD64RMOp("BSF",          P_0F, 0xBC);
 777         public static final AMD64RMOp BSR    = new AMD64RMOp("BSR",          P_0F, 0xBD);
 778         public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, CPUFeature.POPCNT);
 779         public static final AMD64RMOp TZCNT  = new AMD64RMOp("TZCNT",  0xF3, P_0F, 0xBC, CPUFeature.BMI1);
 780         public static final AMD64RMOp LZCNT  = new AMD64RMOp("LZCNT",  0xF3, P_0F, 0xBD, CPUFeature.LZCNT);
 781         public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB",       P_0F, 0xB6, false, true, OpAssertion.IntegerAssertion);
 782         public static final AMD64RMOp MOVZX  = new AMD64RMOp("MOVZX",        P_0F, 0xB7, OpAssertion.No16BitAssertion);
 783         public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB",       P_0F, 0xBE, false, true, OpAssertion.IntegerAssertion);
 784         public static final AMD64RMOp MOVSX  = new AMD64RMOp("MOVSX",        P_0F, 0xBF, OpAssertion.No16BitAssertion);
 785         public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD",             0x63, OpAssertion.QwordOnlyAssertion);
 786         public static final AMD64RMOp MOVB   = new AMD64RMOp("MOVB",               0x8A, OpAssertion.ByteAssertion);
 787         public static final AMD64RMOp MOV    = new AMD64RMOp("MOV",                0x8B);
 788 
 789         // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix
 790         public static final AMD64RMOp MOVD   = new AMD64RMOp("MOVD",   0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
 791         public static final AMD64RMOp MOVQ   = new AMD64RMOp("MOVQ",   0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
 792         public static final AMD64RMOp MOVSS  = new AMD64RMOp("MOVSS",        P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE);
 793         public static final AMD64RMOp MOVSD  = new AMD64RMOp("MOVSD",        P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE);
 794 
 795         // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient.
 796         public static final AMD64RMOp TESTB  = new AMD64RMOp("TEST",               0x84, OpAssertion.ByteAssertion);
 797         public static final AMD64RMOp TEST   = new AMD64RMOp("TEST",               0x85);
 798         // @formatter:on
 799 
 800         protected AMD64RMOp(String opcode, int op) {
 801             this(opcode, 0, op);
 802         }
 803 
 804         protected AMD64RMOp(String opcode, int op, OpAssertion assertion) {
 805             this(opcode, 0, op, assertion);
 806         }
 807 
 808         protected AMD64RMOp(String opcode, int prefix, int op) {
 809             this(opcode, 0, prefix, op, null);
 810         }
 811 
 812         protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) {
 813             this(opcode, 0, prefix, op, assertion, null);
 814         }
 815 
 816         protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
 817             this(opcode, 0, prefix, op, assertion, feature);
 818         }
 819 
 820         protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) {
 821             super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null);
 822         }
 823 
 824         protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) {
 825             this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature);
 826         }
 827 
 828         protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
 829             super(opcode, prefix1, prefix2, op, assertion, feature);
 830         }
 831 
 832         @Override
 833         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
 834             assert verify(asm, size, dst, src);
 835             boolean isSimd = false;
 836             boolean noNds = false;
 837 
 838             switch (op) {
 839                 case 0x2A:
 840                 case 0x2C:
 841                 case 0x2E:
 842                 case 0x5A:
 843                 case 0x6E:
 844                     isSimd = true;
 845                     noNds = true;
 846                     break;
 847                 case 0x10:
 848                 case 0x51:
 849                 case 0x54:
 850                 case 0x55:
 851                 case 0x56:
 852                 case 0x57:
 853                 case 0x58:
 854                 case 0x59:
 855                 case 0x5C:
 856                 case 0x5D:
 857                 case 0x5E:
 858                 case 0x5F:
 859                     isSimd = true;
 860                     break;
 861             }
 862 
 863             if (isSimd) {
 864                 int pre;
 865                 int opc;
 866                 boolean rexVexW = (size == QWORD) ? true : false;
 867                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
 868                 int curPrefix = size.sizePrefix | prefix1;
 869                 switch (curPrefix) {
 870                     case 0x66:
 871                         pre = VexSimdPrefix.VEX_SIMD_66;
 872                         break;
 873                     case 0xF2:
 874                         pre = VexSimdPrefix.VEX_SIMD_F2;
 875                         break;
 876                     case 0xF3:
 877                         pre = VexSimdPrefix.VEX_SIMD_F3;
 878                         break;
 879                     default:
 880                         pre = VexSimdPrefix.VEX_SIMD_NONE;
 881                         break;
 882                 }
 883                 switch (prefix2) {
 884                     case P_0F:
 885                         opc = VexOpcode.VEX_OPCODE_0F;
 886                         break;
 887                     case P_0F38:
 888                         opc = VexOpcode.VEX_OPCODE_0F_38;
 889                         break;
 890                     case P_0F3A:
 891                         opc = VexOpcode.VEX_OPCODE_0F_3A;
 892                         break;
 893                     default:
 894                         opc = VexOpcode.VEX_OPCODE_NONE;
 895                         break;
 896                 }
 897                 int encode;
 898                 if (noNds) {
 899                     encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes);
 900                 } else {
 901                     encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes);
 902                 }
 903                 asm.emitByte(op);
 904                 asm.emitByte(0xC0 | encode);
 905             } else {
 906                 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
 907                 asm.emitModRM(dst, src);
 908             }
 909         }
 910 
 911         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) {
 912             assert verify(asm, size, dst, null);
 913             boolean isSimd = false;
 914             boolean noNds = false;
 915 
 916             switch (op) {
 917                 case 0x10:
 918                 case 0x2A:
 919                 case 0x2C:
 920                 case 0x2E:
 921                 case 0x6E:
 922                     isSimd = true;
 923                     noNds = true;
 924                     break;
 925                 case 0x51:
 926                 case 0x54:
 927                 case 0x55:
 928                 case 0x56:
 929                 case 0x57:
 930                 case 0x58:
 931                 case 0x59:
 932                 case 0x5C:
 933                 case 0x5D:
 934                 case 0x5E:
 935                 case 0x5F:
 936                     isSimd = true;
 937                     break;
 938             }
 939 
 940             if (isSimd) {
 941                 int pre;
 942                 int opc;
 943                 boolean rexVexW = (size == QWORD) ? true : false;
 944                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
 945                 int curPrefix = size.sizePrefix | prefix1;
 946                 switch (curPrefix) {
 947                     case 0x66:
 948                         pre = VexSimdPrefix.VEX_SIMD_66;
 949                         break;
 950                     case 0xF2:
 951                         pre = VexSimdPrefix.VEX_SIMD_F2;
 952                         break;
 953                     case 0xF3:
 954                         pre = VexSimdPrefix.VEX_SIMD_F3;
 955                         break;
 956                     default:
 957                         pre = VexSimdPrefix.VEX_SIMD_NONE;
 958                         break;
 959                 }
 960                 switch (prefix2) {
 961                     case P_0F:
 962                         opc = VexOpcode.VEX_OPCODE_0F;
 963                         break;
 964                     case P_0F38:
 965                         opc = VexOpcode.VEX_OPCODE_0F_38;
 966                         break;
 967                     case P_0F3A:
 968                         opc = VexOpcode.VEX_OPCODE_0F_3A;
 969                         break;
 970                     default:
 971                         opc = VexOpcode.VEX_OPCODE_NONE;
 972                         break;
 973                 }
 974                 if (noNds) {
 975                     asm.simdPrefix(dst, Register.None, src, pre, opc, attributes);
 976                 } else {
 977                     asm.simdPrefix(dst, dst, src, pre, opc, attributes);
 978                 }
 979                 asm.emitByte(op);
 980                 asm.emitOperandHelper(dst, src, 0);
 981             } else {
 982                 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
 983                 asm.emitOperandHelper(dst, src, 0);
 984             }
 985         }
 986     }
 987 
 988     /**
 989      * Opcode with operand order of RM.
 990      */
 991     public static class AMD64RRMOp extends AMD64RRROp {
 992         protected AMD64RRMOp(String opcode, int op) {
 993             this(opcode, 0, op);
 994         }
 995 
 996         protected AMD64RRMOp(String opcode, int op, OpAssertion assertion) {
 997             this(opcode, 0, op, assertion);
 998         }
 999 
1000         protected AMD64RRMOp(String opcode, int prefix, int op) {
1001             this(opcode, 0, prefix, op, null);
1002         }
1003 
1004         protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion) {
1005             this(opcode, 0, prefix, op, assertion, null);
1006         }
1007 
1008         protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
1009             this(opcode, 0, prefix, op, assertion, feature);
1010         }
1011 
1012         protected AMD64RRMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) {
1013             super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null);
1014         }
1015 
1016         protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) {
1017             this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature);
1018         }
1019 
1020         protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
1021             super(opcode, prefix1, prefix2, op, assertion, feature);
1022         }
1023 
1024         @Override
1025         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src) {
1026             assert verify(asm, size, dst, src);
1027             int pre;
1028             int opc;
1029             boolean rexVexW = (size == QWORD) ? true : false;
1030             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1031             int curPrefix = size.sizePrefix | prefix1;
1032             switch (curPrefix) {
1033                 case 0x66:
1034                     pre = VexSimdPrefix.VEX_SIMD_66;
1035                     break;
1036                 case 0xF2:
1037                     pre = VexSimdPrefix.VEX_SIMD_F2;
1038                     break;
1039                 case 0xF3:
1040                     pre = VexSimdPrefix.VEX_SIMD_F3;
1041                     break;
1042                 default:
1043                     pre = VexSimdPrefix.VEX_SIMD_NONE;
1044                     break;
1045             }
1046             switch (prefix2) {
1047                 case P_0F:
1048                     opc = VexOpcode.VEX_OPCODE_0F;
1049                     break;
1050                 case P_0F38:
1051                     opc = VexOpcode.VEX_OPCODE_0F_38;
1052                     break;
1053                 case P_0F3A:
1054                     opc = VexOpcode.VEX_OPCODE_0F_3A;
1055                     break;
1056                 default:
1057                     opc = VexOpcode.VEX_OPCODE_NONE;
1058                     break;
1059             }
1060             int encode;
1061             encode = asm.simdPrefixAndEncode(dst, nds, src, pre, opc, attributes);
1062             asm.emitByte(op);
1063             asm.emitByte(0xC0 | encode);
1064         }
1065 
1066         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, AMD64Address src) {
1067             assert verify(asm, size, dst, null);
1068             int pre;
1069             int opc;
1070             boolean rexVexW = (size == QWORD) ? true : false;
1071             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1072             int curPrefix = size.sizePrefix | prefix1;
1073             switch (curPrefix) {
1074                 case 0x66:
1075                     pre = VexSimdPrefix.VEX_SIMD_66;
1076                     break;
1077                 case 0xF2:
1078                     pre = VexSimdPrefix.VEX_SIMD_F2;
1079                     break;
1080                 case 0xF3:
1081                     pre = VexSimdPrefix.VEX_SIMD_F3;
1082                     break;
1083                 default:
1084                     pre = VexSimdPrefix.VEX_SIMD_NONE;
1085                     break;
1086             }
1087             switch (prefix2) {
1088                 case P_0F:
1089                     opc = VexOpcode.VEX_OPCODE_0F;
1090                     break;
1091                 case P_0F38:
1092                     opc = VexOpcode.VEX_OPCODE_0F_38;
1093                     break;
1094                 case P_0F3A:
1095                     opc = VexOpcode.VEX_OPCODE_0F_3A;
1096                     break;
1097                 default:
1098                     opc = VexOpcode.VEX_OPCODE_NONE;
1099                     break;
1100             }
1101             asm.simdPrefix(dst, nds, src, pre, opc, attributes);
1102             asm.emitByte(op);
1103             asm.emitOperandHelper(dst, src, 0);
1104         }
1105     }
1106 
1107     /**
1108      * Opcode with operand order of MR.
1109      */
1110     public static class AMD64MROp extends AMD64RROp {
1111         // @formatter:off
1112         public static final AMD64MROp MOVB   = new AMD64MROp("MOVB",               0x88, OpAssertion.ByteAssertion);
1113         public static final AMD64MROp MOV    = new AMD64MROp("MOV",                0x89);
1114 
1115         // MOVD and MOVQ are the same opcode, just with different operand size prefix
1116         // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used.
1117         public static final AMD64MROp MOVD   = new AMD64MROp("MOVD",   0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
1118         public static final AMD64MROp MOVQ   = new AMD64MROp("MOVQ",   0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
1119 
1120         // MOVSS and MOVSD are the same opcode, just with different operand size prefix
1121         public static final AMD64MROp MOVSS  = new AMD64MROp("MOVSS",        P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE);
1122         public static final AMD64MROp MOVSD  = new AMD64MROp("MOVSD",        P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE);
1123         // @formatter:on
1124 
1125         protected AMD64MROp(String opcode, int op) {
1126             this(opcode, 0, op);
1127         }
1128 
1129         protected AMD64MROp(String opcode, int op, OpAssertion assertion) {
1130             this(opcode, 0, op, assertion);
1131         }
1132 
1133         protected AMD64MROp(String opcode, int prefix, int op) {
1134             this(opcode, prefix, op, OpAssertion.IntegerAssertion);
1135         }
1136 
1137         protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) {
1138             this(opcode, prefix, op, assertion, null);
1139         }
1140 
1141         protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
1142             this(opcode, 0, prefix, op, assertion, feature);
1143         }
1144 
1145         protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
1146             super(opcode, prefix1, prefix2, op, assertion, feature);
1147         }
1148 
1149         @Override
1150         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
1151             assert verify(asm, size, src, dst);
1152             boolean isSimd = false;
1153             boolean noNds = false;
1154 
1155             switch (op) {
1156                 case 0x7E:
1157                     isSimd = true;
1158                     noNds = true;
1159                     break;
1160                 case 0x11:
1161                     isSimd = true;
1162                     break;
1163             }
1164 
1165             if (isSimd) {
1166                 int pre;
1167                 int opc;
1168                 boolean rexVexW = (size == QWORD) ? true : false;
1169                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1170                 int curPrefix = size.sizePrefix | prefix1;
1171                 switch (curPrefix) {
1172                     case 0x66:
1173                         pre = VexSimdPrefix.VEX_SIMD_66;
1174                         break;
1175                     case 0xF2:
1176                         pre = VexSimdPrefix.VEX_SIMD_F2;
1177                         break;
1178                     case 0xF3:
1179                         pre = VexSimdPrefix.VEX_SIMD_F3;
1180                         break;
1181                     default:
1182                         pre = VexSimdPrefix.VEX_SIMD_NONE;
1183                         break;
1184                 }
1185                 switch (prefix2) {
1186                     case P_0F:
1187                         opc = VexOpcode.VEX_OPCODE_0F;
1188                         break;
1189                     case P_0F38:
1190                         opc = VexOpcode.VEX_OPCODE_0F_38;
1191                         break;
1192                     case P_0F3A:
1193                         opc = VexOpcode.VEX_OPCODE_0F_3A;
1194                         break;
1195                     default:
1196                         opc = VexOpcode.VEX_OPCODE_NONE;
1197                         break;
1198                 }
1199                 int encode;
1200                 if (noNds) {
1201                     encode = asm.simdPrefixAndEncode(src, Register.None, dst, pre, opc, attributes);
1202                 } else {
1203                     encode = asm.simdPrefixAndEncode(src, src, dst, pre, opc, attributes);
1204                 }
1205                 asm.emitByte(op);
1206                 asm.emitByte(0xC0 | encode);
1207             } else {
1208                 emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding);
1209                 asm.emitModRM(src, dst);
1210             }
1211         }
1212 
1213         public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) {
1214             assert verify(asm, size, null, src);
1215             boolean isSimd = false;
1216 
1217             switch (op) {
1218                 case 0x7E:
1219                 case 0x11:
1220                     isSimd = true;
1221                     break;
1222             }
1223 
1224             if (isSimd) {
1225                 int pre;
1226                 int opc;
1227                 boolean rexVexW = (size == QWORD) ? true : false;
1228                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1229                 int curPrefix = size.sizePrefix | prefix1;
1230                 switch (curPrefix) {
1231                     case 0x66:
1232                         pre = VexSimdPrefix.VEX_SIMD_66;
1233                         break;
1234                     case 0xF2:
1235                         pre = VexSimdPrefix.VEX_SIMD_F2;
1236                         break;
1237                     case 0xF3:
1238                         pre = VexSimdPrefix.VEX_SIMD_F3;
1239                         break;
1240                     default:
1241                         pre = VexSimdPrefix.VEX_SIMD_NONE;
1242                         break;
1243                 }
1244                 switch (prefix2) {
1245                     case P_0F:
1246                         opc = VexOpcode.VEX_OPCODE_0F;
1247                         break;
1248                     case P_0F38:
1249                         opc = VexOpcode.VEX_OPCODE_0F_38;
1250                         break;
1251                     case P_0F3A:
1252                         opc = VexOpcode.VEX_OPCODE_0F_3A;
1253                         break;
1254                     default:
1255                         opc = VexOpcode.VEX_OPCODE_NONE;
1256                         break;
1257                 }
1258                 asm.simdPrefix(src, Register.None, dst, pre, opc, attributes);
1259                 asm.emitByte(op);
1260                 asm.emitOperandHelper(src, dst, 0);
1261             } else {
1262                 emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0);
1263                 asm.emitOperandHelper(src, dst, 0);
1264             }
1265         }
1266     }
1267 
1268     /**
1269      * Opcodes with operand order of M.
1270      */
1271     public static class AMD64MOp extends AMD64Op {
1272         // @formatter:off
1273         public static final AMD64MOp NOT  = new AMD64MOp("NOT",  0xF7, 2);
1274         public static final AMD64MOp NEG  = new AMD64MOp("NEG",  0xF7, 3);
1275         public static final AMD64MOp MUL  = new AMD64MOp("MUL",  0xF7, 4);
1276         public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5);
1277         public static final AMD64MOp DIV  = new AMD64MOp("DIV",  0xF7, 6);
1278         public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7);
1279         public static final AMD64MOp INC  = new AMD64MOp("INC",  0xFF, 0);
1280         public static final AMD64MOp DEC  = new AMD64MOp("DEC",  0xFF, 1);
1281         public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6);
1282         public static final AMD64MOp POP  = new AMD64MOp("POP",  0x8F, 0, OpAssertion.No32BitAssertion);
1283         // @formatter:on
1284 
1285         private final int ext;
1286 
1287         protected AMD64MOp(String opcode, int op, int ext) {
1288             this(opcode, 0, op, ext);
1289         }
1290 
1291         protected AMD64MOp(String opcode, int prefix, int op, int ext) {
1292             this(opcode, prefix, op, ext, OpAssertion.IntegerAssertion);
1293         }
1294 
1295         protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) {
1296             this(opcode, 0, op, ext, assertion);
1297         }
1298 
1299         protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) {
1300             super(opcode, 0, prefix, op, assertion, null);
1301             this.ext = ext;
1302         }
1303 
1304         public final void emit(AMD64Assembler asm, OperandSize size, Register dst) {
1305             assert verify(asm, size, dst, null);
1306             emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
1307             asm.emitModRM(ext, dst);
1308         }
1309 
1310         public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) {
1311             assert verify(asm, size, null, null);
1312             emitOpcode(asm, size, getRXB(null, dst), 0, 0);
1313             asm.emitOperandHelper(ext, dst, 0);
1314         }
1315     }
1316 
1317     /**
1318      * Opcodes with operand order of MI.
1319      */
1320     public static class AMD64MIOp extends AMD64ImmOp {
1321         // @formatter:off
1322         public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true,  0xC6, 0, OpAssertion.ByteAssertion);
1323         public static final AMD64MIOp MOV  = new AMD64MIOp("MOV",  false, 0xC7, 0);
1324         public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0);
1325         // @formatter:on
1326 
1327         private final int ext;
1328 
1329         protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) {
1330             this(opcode, immIsByte, op, ext, OpAssertion.IntegerAssertion);
1331         }
1332 
1333         protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) {
1334             this(opcode, immIsByte, 0, op, ext, assertion);
1335         }
1336 
1337         protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) {
1338             super(opcode, immIsByte, prefix, op, assertion);
1339             this.ext = ext;
1340         }
1341 
1342         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) {
1343             assert verify(asm, size, dst, null);
1344             emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
1345             asm.emitModRM(ext, dst);
1346             emitImmediate(asm, size, imm);
1347         }
1348 
1349         public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) {
1350             assert verify(asm, size, null, null);
1351             emitOpcode(asm, size, getRXB(null, dst), 0, 0);
1352             asm.emitOperandHelper(ext, dst, immediateSize(size));
1353             emitImmediate(asm, size, imm);
1354         }
1355     }
1356 
1357     /**
1358      * Opcodes with operand order of RMI.
1359      *
1360      * We only have one form of round as the operation is always treated with single variant input,
1361      * making its extension to 3 address forms redundant.
1362      */
1363     public static class AMD64RMIOp extends AMD64ImmOp {
1364         // @formatter:off
1365         public static final AMD64RMIOp IMUL    = new AMD64RMIOp("IMUL", false, 0x69);
1366         public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true,  0x6B);
1367         public static final AMD64RMIOp ROUNDSS = new AMD64RMIOp("ROUNDSS", true, P_0F3A, 0x0A, OpAssertion.PackedDoubleAssertion);
1368         public static final AMD64RMIOp ROUNDSD = new AMD64RMIOp("ROUNDSD", true, P_0F3A, 0x0B, OpAssertion.PackedDoubleAssertion);
1369         // @formatter:on
1370 
1371         protected AMD64RMIOp(String opcode, boolean immIsByte, int op) {
1372             this(opcode, immIsByte, 0, op, OpAssertion.IntegerAssertion);
1373         }
1374 
1375         protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
1376             super(opcode, immIsByte, prefix, op, assertion);
1377         }
1378 
1379         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) {
1380             assert verify(asm, size, dst, src);
1381             boolean isSimd = false;
1382             boolean noNds = false;
1383 
1384             switch (op) {
1385                 case 0x0A:
1386                 case 0x0B:
1387                     isSimd = true;
1388                     noNds = true;
1389                     break;
1390             }
1391 
1392             if (isSimd) {
1393                 int pre;
1394                 int opc;
1395                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1396                 int curPrefix = size.sizePrefix | prefix1;
1397                 switch (curPrefix) {
1398                     case 0x66:
1399                         pre = VexSimdPrefix.VEX_SIMD_66;
1400                         break;
1401                     case 0xF2:
1402                         pre = VexSimdPrefix.VEX_SIMD_F2;
1403                         break;
1404                     case 0xF3:
1405                         pre = VexSimdPrefix.VEX_SIMD_F3;
1406                         break;
1407                     default:
1408                         pre = VexSimdPrefix.VEX_SIMD_NONE;
1409                         break;
1410                 }
1411                 switch (prefix2) {
1412                     case P_0F:
1413                         opc = VexOpcode.VEX_OPCODE_0F;
1414                         break;
1415                     case P_0F38:
1416                         opc = VexOpcode.VEX_OPCODE_0F_38;
1417                         break;
1418                     case P_0F3A:
1419                         opc = VexOpcode.VEX_OPCODE_0F_3A;
1420                         break;
1421                     default:
1422                         opc = VexOpcode.VEX_OPCODE_NONE;
1423                         break;
1424                 }
1425                 int encode;
1426                 if (noNds) {
1427                     encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes);
1428                 } else {
1429                     encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes);
1430                 }
1431                 asm.emitByte(op);
1432                 asm.emitByte(0xC0 | encode);
1433                 emitImmediate(asm, size, imm);
1434             } else {
1435                 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
1436                 asm.emitModRM(dst, src);
1437                 emitImmediate(asm, size, imm);
1438             }
1439         }
1440 
1441         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) {
1442             assert verify(asm, size, dst, null);
1443 
1444             boolean isSimd = false;
1445             boolean noNds = false;
1446 
1447             switch (op) {
1448                 case 0x0A:
1449                 case 0x0B:
1450                     isSimd = true;
1451                     noNds = true;
1452                     break;
1453             }
1454 
1455             if (isSimd) {
1456                 int pre;
1457                 int opc;
1458                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1459                 int curPrefix = size.sizePrefix | prefix1;
1460                 switch (curPrefix) {
1461                     case 0x66:
1462                         pre = VexSimdPrefix.VEX_SIMD_66;
1463                         break;
1464                     case 0xF2:
1465                         pre = VexSimdPrefix.VEX_SIMD_F2;
1466                         break;
1467                     case 0xF3:
1468                         pre = VexSimdPrefix.VEX_SIMD_F3;
1469                         break;
1470                     default:
1471                         pre = VexSimdPrefix.VEX_SIMD_NONE;
1472                         break;
1473                 }
1474                 switch (prefix2) {
1475                     case P_0F:
1476                         opc = VexOpcode.VEX_OPCODE_0F;
1477                         break;
1478                     case P_0F38:
1479                         opc = VexOpcode.VEX_OPCODE_0F_38;
1480                         break;
1481                     case P_0F3A:
1482                         opc = VexOpcode.VEX_OPCODE_0F_3A;
1483                         break;
1484                     default:
1485                         opc = VexOpcode.VEX_OPCODE_NONE;
1486                         break;
1487                 }
1488 
1489                 if (noNds) {
1490                     asm.simdPrefix(dst, Register.None, src, pre, opc, attributes);
1491                 } else {
1492                     asm.simdPrefix(dst, dst, src, pre, opc, attributes);
1493                 }
1494                 asm.emitByte(op);
1495                 asm.emitOperandHelper(dst, src, immediateSize(size));
1496                 emitImmediate(asm, size, imm);
1497             } else {
1498                 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
1499                 asm.emitOperandHelper(dst, src, immediateSize(size));
1500                 emitImmediate(asm, size, imm);
1501             }
1502         }
1503     }
1504 
1505     public static class SSEOp extends AMD64RMOp {
1506         // @formatter:off
1507         public static final SSEOp CVTSI2SS  = new SSEOp("CVTSI2SS",  0xF3, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion);
1508         public static final SSEOp CVTSI2SD  = new SSEOp("CVTSI2SS",  0xF2, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion);
1509         public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion);
1510         public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion);
1511         public static final SSEOp UCOMIS    = new SSEOp("UCOMIS",          P_0F, 0x2E, OpAssertion.PackedFloatingAssertion);
1512         public static final SSEOp SQRT      = new SSEOp("SQRT",            P_0F, 0x51);
1513         public static final SSEOp AND       = new SSEOp("AND",             P_0F, 0x54, OpAssertion.PackedFloatingAssertion);
1514         public static final SSEOp ANDN      = new SSEOp("ANDN",            P_0F, 0x55, OpAssertion.PackedFloatingAssertion);
1515         public static final SSEOp OR        = new SSEOp("OR",              P_0F, 0x56, OpAssertion.PackedFloatingAssertion);
1516         public static final SSEOp XOR       = new SSEOp("XOR",             P_0F, 0x57, OpAssertion.PackedFloatingAssertion);
1517         public static final SSEOp ADD       = new SSEOp("ADD",             P_0F, 0x58);
1518         public static final SSEOp MUL       = new SSEOp("MUL",             P_0F, 0x59);
1519         public static final SSEOp CVTSS2SD  = new SSEOp("CVTSS2SD",        P_0F, 0x5A, OpAssertion.SingleAssertion);
1520         public static final SSEOp CVTSD2SS  = new SSEOp("CVTSD2SS",        P_0F, 0x5A, OpAssertion.DoubleAssertion);
1521         public static final SSEOp SUB       = new SSEOp("SUB",             P_0F, 0x5C);
1522         public static final SSEOp MIN       = new SSEOp("MIN",             P_0F, 0x5D);
1523         public static final SSEOp DIV       = new SSEOp("DIV",             P_0F, 0x5E);
1524         public static final SSEOp MAX       = new SSEOp("MAX",             P_0F, 0x5F);
1525         // @formatter:on
1526 
1527         protected SSEOp(String opcode, int prefix, int op) {
1528             this(opcode, prefix, op, OpAssertion.FloatingAssertion);
1529         }
1530 
1531         protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) {
1532             this(opcode, 0, prefix, op, assertion);
1533         }
1534 
1535         protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) {
1536             super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2);
1537         }
1538     }
1539 
1540     public static class AVXOp extends AMD64RRMOp {
1541         // @formatter:off
1542         public static final AVXOp AND       = new AVXOp("AND",             P_0F, 0x54, OpAssertion.PackedFloatingAssertion);
1543         public static final AVXOp ANDN      = new AVXOp("ANDN",            P_0F, 0x55, OpAssertion.PackedFloatingAssertion);
1544         public static final AVXOp OR        = new AVXOp("OR",              P_0F, 0x56, OpAssertion.PackedFloatingAssertion);
1545         public static final AVXOp XOR       = new AVXOp("XOR",             P_0F, 0x57, OpAssertion.PackedFloatingAssertion);
1546         public static final AVXOp ADD       = new AVXOp("ADD",             P_0F, 0x58);
1547         public static final AVXOp MUL       = new AVXOp("MUL",             P_0F, 0x59);
1548         public static final AVXOp SUB       = new AVXOp("SUB",             P_0F, 0x5C);
1549         public static final AVXOp MIN       = new AVXOp("MIN",             P_0F, 0x5D);
1550         public static final AVXOp DIV       = new AVXOp("DIV",             P_0F, 0x5E);
1551         public static final AVXOp MAX       = new AVXOp("MAX",             P_0F, 0x5F);
1552         // @formatter:on
1553 
1554         protected AVXOp(String opcode, int prefix, int op) {
1555             this(opcode, prefix, op, OpAssertion.FloatingAssertion);
1556         }
1557 
1558         protected AVXOp(String opcode, int prefix, int op, OpAssertion assertion) {
1559             this(opcode, 0, prefix, op, assertion);
1560         }
1561 
1562         protected AVXOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) {
1563             super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.AVX);
1564         }
1565     }
1566 
1567     /**
1568      * Arithmetic operation with operand order of RM, MR or MI.
1569      */
1570     public static final class AMD64BinaryArithmetic {
1571         // @formatter:off
1572         public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0);
1573         public static final AMD64BinaryArithmetic OR  = new AMD64BinaryArithmetic("OR",  1);
1574         public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2);
1575         public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3);
1576         public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4);
1577         public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5);
1578         public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6);
1579         public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7);
1580         // @formatter:on
1581 
1582         private final AMD64MIOp byteImmOp;
1583         private final AMD64MROp byteMrOp;
1584         private final AMD64RMOp byteRmOp;
1585 
1586         private final AMD64MIOp immOp;
1587         private final AMD64MIOp immSxOp;
1588         private final AMD64MROp mrOp;
1589         private final AMD64RMOp rmOp;
1590 
1591         private AMD64BinaryArithmetic(String opcode, int code) {
1592             int baseOp = code << 3;
1593 
1594             byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion);
1595             byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion);
1596             byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion);
1597 
1598             immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.IntegerAssertion);
1599             immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.IntegerAssertion);
1600             mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.IntegerAssertion);
1601             rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.IntegerAssertion);
1602         }
1603 
1604         public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) {
1605             if (size == BYTE) {
1606                 return byteImmOp;
1607             } else if (sx) {
1608                 return immSxOp;
1609             } else {
1610                 return immOp;
1611             }
1612         }
1613 
1614         public AMD64MROp getMROpcode(OperandSize size) {
1615             if (size == BYTE) {
1616                 return byteMrOp;
1617             } else {
1618                 return mrOp;
1619             }
1620         }
1621 
1622         public AMD64RMOp getRMOpcode(OperandSize size) {
1623             if (size == BYTE) {
1624                 return byteRmOp;
1625             } else {
1626                 return rmOp;
1627             }
1628         }
1629     }
1630 
1631     /**
1632      * Shift operation with operand order of M1, MC or MI.
1633      */
1634     public static final class AMD64Shift {
1635         // @formatter:off
1636         public static final AMD64Shift ROL = new AMD64Shift("ROL", 0);
1637         public static final AMD64Shift ROR = new AMD64Shift("ROR", 1);
1638         public static final AMD64Shift RCL = new AMD64Shift("RCL", 2);
1639         public static final AMD64Shift RCR = new AMD64Shift("RCR", 3);
1640         public static final AMD64Shift SHL = new AMD64Shift("SHL", 4);
1641         public static final AMD64Shift SHR = new AMD64Shift("SHR", 5);
1642         public static final AMD64Shift SAR = new AMD64Shift("SAR", 7);
1643         // @formatter:on
1644 
1645         public final AMD64MOp m1Op;
1646         public final AMD64MOp mcOp;
1647         public final AMD64MIOp miOp;
1648 
1649         private AMD64Shift(String opcode, int code) {
1650             m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.IntegerAssertion);
1651             mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.IntegerAssertion);
1652             miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.IntegerAssertion);
1653         }
1654     }
1655 
1656     public final void addl(AMD64Address dst, int imm32) {
1657         ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1658     }
1659 
1660     public final void addl(Register dst, int imm32) {
1661         ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1662     }
1663 
1664     public final void addl(Register dst, Register src) {
1665         ADD.rmOp.emit(this, DWORD, dst, src);
1666     }
1667 
1668     public final void addpd(Register dst, Register src) {
1669         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1670         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1671         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1672         emitByte(0x58);
1673         emitByte(0xC0 | encode);
1674     }
1675 
1676     public final void addpd(Register dst, AMD64Address src) {
1677         assert dst.getRegisterCategory().equals(AMD64.XMM);
1678         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1679         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1680         emitByte(0x58);
1681         emitOperandHelper(dst, src, 0);
1682     }
1683 
1684     public final void addsd(Register dst, Register src) {
1685         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1686         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1687         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1688         emitByte(0x58);
1689         emitByte(0xC0 | encode);
1690     }
1691 
1692     public final void addsd(Register dst, AMD64Address src) {
1693         assert dst.getRegisterCategory().equals(AMD64.XMM);
1694         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1695         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1696         emitByte(0x58);
1697         emitOperandHelper(dst, src, 0);
1698     }
1699 
1700     private void addrNop4() {
1701         // 4 bytes: NOP DWORD PTR [EAX+0]
1702         emitByte(0x0F);
1703         emitByte(0x1F);
1704         emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc);
1705         emitByte(0); // 8-bits offset (1 byte)
1706     }
1707 
1708     private void addrNop5() {
1709         // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1710         emitByte(0x0F);
1711         emitByte(0x1F);
1712         emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4);
1713         emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
1714         emitByte(0); // 8-bits offset (1 byte)
1715     }
1716 
1717     private void addrNop7() {
1718         // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1719         emitByte(0x0F);
1720         emitByte(0x1F);
1721         emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc);
1722         emitInt(0); // 32-bits offset (4 bytes)
1723     }
1724 
1725     private void addrNop8() {
1726         // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1727         emitByte(0x0F);
1728         emitByte(0x1F);
1729         emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4);
1730         emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
1731         emitInt(0); // 32-bits offset (4 bytes)
1732     }
1733 
1734     public final void andl(Register dst, int imm32) {
1735         AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1736     }
1737 
1738     public final void andl(Register dst, Register src) {
1739         AND.rmOp.emit(this, DWORD, dst, src);
1740     }
1741 
1742     public final void andpd(Register dst, Register src) {
1743         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1744         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1745         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1746         emitByte(0x54);
1747         emitByte(0xC0 | encode);
1748     }
1749 
1750     public final void andpd(Register dst, AMD64Address src) {
1751         assert dst.getRegisterCategory().equals(AMD64.XMM);
1752         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1753         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1754         emitByte(0x54);
1755         emitOperandHelper(dst, src, 0);
1756     }
1757 
1758     public final void bsrl(Register dst, Register src) {
1759         int encode = prefixAndEncode(dst.encoding(), src.encoding());
1760         emitByte(0x0F);
1761         emitByte(0xBD);
1762         emitByte(0xC0 | encode);
1763     }
1764 
1765     public final void bswapl(Register reg) {
1766         int encode = prefixAndEncode(reg.encoding);
1767         emitByte(0x0F);
1768         emitByte(0xC8 | encode);
1769     }
1770 
1771     public final void cdql() {
1772         emitByte(0x99);
1773     }
1774 
1775     public final void cmovl(ConditionFlag cc, Register dst, Register src) {
1776         int encode = prefixAndEncode(dst.encoding, src.encoding);
1777         emitByte(0x0F);
1778         emitByte(0x40 | cc.getValue());
1779         emitByte(0xC0 | encode);
1780     }
1781 
1782     public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) {
1783         prefix(src, dst);
1784         emitByte(0x0F);
1785         emitByte(0x40 | cc.getValue());
1786         emitOperandHelper(dst, src, 0);
1787     }
1788 
1789     public final void cmpl(Register dst, int imm32) {
1790         CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1791     }
1792 
1793     public final void cmpl(Register dst, Register src) {
1794         CMP.rmOp.emit(this, DWORD, dst, src);
1795     }
1796 
1797     public final void cmpl(Register dst, AMD64Address src) {
1798         CMP.rmOp.emit(this, DWORD, dst, src);
1799     }
1800 
1801     public final void cmpl(AMD64Address dst, int imm32) {
1802         CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1803     }
1804 
1805     // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax,
1806     // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,.
1807     // The ZF is set if the compared values were equal, and cleared otherwise.
1808     public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg
1809         prefix(adr, reg);
1810         emitByte(0x0F);
1811         emitByte(0xB1);
1812         emitOperandHelper(reg, adr, 0);
1813     }
1814 
1815     public final void cvtsi2sdl(Register dst, Register src) {
1816         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
1817         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1818         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1819         emitByte(0x2A);
1820         emitByte(0xC0 | encode);
1821     }
1822 
1823     public final void cvttsd2sil(Register dst, Register src) {
1824         assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
1825         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1826         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1827         emitByte(0x2C);
1828         emitByte(0xC0 | encode);
1829     }
1830 
1831     protected final void decl(AMD64Address dst) {
1832         prefix(dst);
1833         emitByte(0xFF);
1834         emitOperandHelper(1, dst, 0);
1835     }
1836 
1837     public final void divsd(Register dst, Register src) {
1838         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1839         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1840         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1841         emitByte(0x5E);
1842         emitByte(0xC0 | encode);
1843     }
1844 
1845     public final void hlt() {
1846         emitByte(0xF4);
1847     }
1848 
1849     public final void imull(Register dst, Register src, int value) {
1850         if (isByte(value)) {
1851             AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value);
1852         } else {
1853             AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value);
1854         }
1855     }
1856 
1857     protected final void incl(AMD64Address dst) {
1858         prefix(dst);
1859         emitByte(0xFF);
1860         emitOperandHelper(0, dst, 0);
1861     }
1862 
1863     public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) {
1864         int shortSize = 2;
1865         int longSize = 6;
1866         long disp = jumpTarget - position();
1867         if (!forceDisp32 && isByte(disp - shortSize)) {
1868             // 0111 tttn #8-bit disp
1869             emitByte(0x70 | cc.getValue());
1870             emitByte((int) ((disp - shortSize) & 0xFF));
1871         } else {
1872             // 0000 1111 1000 tttn #32-bit disp
1873             assert isInt(disp - longSize) : "must be 32bit offset (call4)";
1874             emitByte(0x0F);
1875             emitByte(0x80 | cc.getValue());
1876             emitInt((int) (disp - longSize));
1877         }
1878     }
1879 
1880     public final void jcc(ConditionFlag cc, Label l) {
1881         assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc";
1882         if (l.isBound()) {
1883             jcc(cc, l.position(), false);
1884         } else {
1885             // Note: could eliminate cond. jumps to this jump if condition
1886             // is the same however, seems to be rather unlikely case.
1887             // Note: use jccb() if label to be bound is very close to get
1888             // an 8-bit displacement
1889             l.addPatchAt(position());
1890             emitByte(0x0F);
1891             emitByte(0x80 | cc.getValue());
1892             emitInt(0);
1893         }
1894 
1895     }
1896 
1897     public final void jccb(ConditionFlag cc, Label l) {
1898         if (l.isBound()) {
1899             int shortSize = 2;
1900             int entry = l.position();
1901             assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp";
1902             long disp = entry - position();
1903             // 0111 tttn #8-bit disp
1904             emitByte(0x70 | cc.getValue());
1905             emitByte((int) ((disp - shortSize) & 0xFF));
1906         } else {
1907             l.addPatchAt(position());
1908             emitByte(0x70 | cc.getValue());
1909             emitByte(0);
1910         }
1911     }
1912 
1913     public final void jmp(int jumpTarget, boolean forceDisp32) {
1914         int shortSize = 2;
1915         int longSize = 5;
1916         long disp = jumpTarget - position();
1917         if (!forceDisp32 && isByte(disp - shortSize)) {
1918             emitByte(0xEB);
1919             emitByte((int) ((disp - shortSize) & 0xFF));
1920         } else {
1921             emitByte(0xE9);
1922             emitInt((int) (disp - longSize));
1923         }
1924     }
1925 
1926     @Override
1927     public final void jmp(Label l) {
1928         if (l.isBound()) {
1929             jmp(l.position(), false);
1930         } else {
1931             // By default, forward jumps are always 32-bit displacements, since
1932             // we can't yet know where the label will be bound. If you're sure that
1933             // the forward jump will not run beyond 256 bytes, use jmpb to
1934             // force an 8-bit displacement.
1935 
1936             l.addPatchAt(position());
1937             emitByte(0xE9);
1938             emitInt(0);
1939         }
1940     }
1941 
1942     public final void jmp(Register entry) {
1943         int encode = prefixAndEncode(entry.encoding);
1944         emitByte(0xFF);
1945         emitByte(0xE0 | encode);
1946     }
1947 
1948     public final void jmp(AMD64Address adr) {
1949         prefix(adr);
1950         emitByte(0xFF);
1951         emitOperandHelper(rsp, adr, 0);
1952     }
1953 
1954     public final void jmpb(Label l) {
1955         if (l.isBound()) {
1956             int shortSize = 2;
1957             int entry = l.position();
1958             assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp";
1959             long offs = entry - position();
1960             emitByte(0xEB);
1961             emitByte((int) ((offs - shortSize) & 0xFF));
1962         } else {
1963 
1964             l.addPatchAt(position());
1965             emitByte(0xEB);
1966             emitByte(0);
1967         }
1968     }
1969 
1970     public final void leaq(Register dst, AMD64Address src) {
1971         prefixq(src, dst);
1972         emitByte(0x8D);
1973         emitOperandHelper(dst, src, 0);
1974     }
1975 
1976     public final void leave() {
1977         emitByte(0xC9);
1978     }
1979 
1980     public final void lock() {
1981         emitByte(0xF0);
1982     }
1983 
1984     public final void movapd(Register dst, Register src) {
1985         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1986         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1987         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1988         emitByte(0x28);
1989         emitByte(0xC0 | encode);
1990     }
1991 
1992     public final void movaps(Register dst, Register src) {
1993         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1994         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1995         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
1996         emitByte(0x28);
1997         emitByte(0xC0 | encode);
1998     }
1999 
2000     public final void movb(AMD64Address dst, int imm8) {
2001         prefix(dst);
2002         emitByte(0xC6);
2003         emitOperandHelper(0, dst, 1);
2004         emitByte(imm8);
2005     }
2006 
2007     public final void movb(AMD64Address dst, Register src) {
2008         assert src.getRegisterCategory().equals(AMD64.CPU) : "must have byte register";
2009         prefix(dst, src, true);
2010         emitByte(0x88);
2011         emitOperandHelper(src, dst, 0);
2012     }
2013 
2014     public final void movl(Register dst, int imm32) {
2015         int encode = prefixAndEncode(dst.encoding);
2016         emitByte(0xB8 | encode);
2017         emitInt(imm32);
2018     }
2019 
2020     public final void movl(Register dst, Register src) {
2021         int encode = prefixAndEncode(dst.encoding, src.encoding);
2022         emitByte(0x8B);
2023         emitByte(0xC0 | encode);
2024     }
2025 
2026     public final void movl(Register dst, AMD64Address src) {
2027         prefix(src, dst);
2028         emitByte(0x8B);
2029         emitOperandHelper(dst, src, 0);
2030     }
2031 
2032     public final void movl(AMD64Address dst, int imm32) {
2033         prefix(dst);
2034         emitByte(0xC7);
2035         emitOperandHelper(0, dst, 4);
2036         emitInt(imm32);
2037     }
2038 
2039     public final void movl(AMD64Address dst, Register src) {
2040         prefix(dst, src);
2041         emitByte(0x89);
2042         emitOperandHelper(src, dst, 0);
2043     }
2044 
2045     /**
2046      * New CPUs require use of movsd and movss to avoid partial register stall when loading from
2047      * memory. But for old Opteron use movlpd instead of movsd. The selection is done in
2048      * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and
2049      * {@link AMD64MacroAssembler#movflt(Register, Register)}.
2050      */
2051     public final void movlpd(Register dst, AMD64Address src) {
2052         assert dst.getRegisterCategory().equals(AMD64.XMM);
2053         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2054         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2055         emitByte(0x12);
2056         emitOperandHelper(dst, src, 0);
2057     }
2058 
2059     public final void movlhps(Register dst, Register src) {
2060         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2061         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2062         int encode = simdPrefixAndEncode(dst, src, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2063         emitByte(0x16);
2064         emitByte(0xC0 | encode);
2065     }
2066 
2067     public final void movq(Register dst, AMD64Address src) {
2068         movq(dst, src, false);
2069     }
2070 
2071     public final void movq(Register dst, AMD64Address src, boolean wide) {
2072         if (dst.getRegisterCategory().equals(AMD64.XMM)) {
2073             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ wide, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2074             simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2075             emitByte(0x7E);
2076             emitOperandHelper(dst, src, wide, 0);
2077         } else {
2078             // gpr version of movq
2079             prefixq(src, dst);
2080             emitByte(0x8B);
2081             emitOperandHelper(dst, src, wide, 0);
2082         }
2083     }
2084 
2085     public final void movq(Register dst, Register src) {
2086         int encode = prefixqAndEncode(dst.encoding, src.encoding);
2087         emitByte(0x8B);
2088         emitByte(0xC0 | encode);
2089     }
2090 
2091     public final void movq(AMD64Address dst, Register src) {
2092         if (src.getRegisterCategory().equals(AMD64.XMM)) {
2093             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2094             simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2095             emitByte(0xD6);
2096             emitOperandHelper(src, dst, 0);
2097         } else {
2098             // gpr version of movq
2099             prefixq(dst, src);
2100             emitByte(0x89);
2101             emitOperandHelper(src, dst, 0);
2102         }
2103     }
2104 
2105     public final void movsbl(Register dst, AMD64Address src) {
2106         prefix(src, dst);
2107         emitByte(0x0F);
2108         emitByte(0xBE);
2109         emitOperandHelper(dst, src, 0);
2110     }
2111 
2112     public final void movsbl(Register dst, Register src) {
2113         int encode = prefixAndEncode(dst.encoding, false, src.encoding, true);
2114         emitByte(0x0F);
2115         emitByte(0xBE);
2116         emitByte(0xC0 | encode);
2117     }
2118 
2119     public final void movsbq(Register dst, AMD64Address src) {
2120         prefixq(src, dst);
2121         emitByte(0x0F);
2122         emitByte(0xBE);
2123         emitOperandHelper(dst, src, 0);
2124     }
2125 
2126     public final void movsbq(Register dst, Register src) {
2127         int encode = prefixqAndEncode(dst.encoding, src.encoding);
2128         emitByte(0x0F);
2129         emitByte(0xBE);
2130         emitByte(0xC0 | encode);
2131     }
2132 
2133     public final void movsd(Register dst, Register src) {
2134         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2135         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2136         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2137         emitByte(0x10);
2138         emitByte(0xC0 | encode);
2139     }
2140 
2141     public final void movsd(Register dst, AMD64Address src) {
2142         assert dst.getRegisterCategory().equals(AMD64.XMM);
2143         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2144         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2145         emitByte(0x10);
2146         emitOperandHelper(dst, src, 0);
2147     }
2148 
2149     public final void movsd(AMD64Address dst, Register src) {
2150         assert src.getRegisterCategory().equals(AMD64.XMM);
2151         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2152         simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2153         emitByte(0x11);
2154         emitOperandHelper(src, dst, 0);
2155     }
2156 
2157     public final void movss(Register dst, Register src) {
2158         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2159         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2160         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2161         emitByte(0x10);
2162         emitByte(0xC0 | encode);
2163     }
2164 
2165     public final void movss(Register dst, AMD64Address src) {
2166         assert dst.getRegisterCategory().equals(AMD64.XMM);
2167         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2168         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2169         emitByte(0x10);
2170         emitOperandHelper(dst, src, 0);
2171     }
2172 
2173     public final void movss(AMD64Address dst, Register src) {
2174         assert src.getRegisterCategory().equals(AMD64.XMM);
2175         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2176         simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2177         emitByte(0x11);
2178         emitOperandHelper(src, dst, 0);
2179     }
2180 
2181     public final void mulpd(Register dst, Register src) {
2182         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2183         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2184         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2185         emitByte(0x59);
2186         emitByte(0xC0 | encode);
2187     }
2188 
2189     public final void mulpd(Register dst, AMD64Address src) {
2190         assert dst.getRegisterCategory().equals(AMD64.XMM);
2191         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2192         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2193         emitByte(0x59);
2194         emitOperandHelper(dst, src, 0);
2195     }
2196 
2197     public final void mulsd(Register dst, Register src) {
2198         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2199         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2200         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2201         emitByte(0x59);
2202         emitByte(0xC0 | encode);
2203     }
2204 
2205     public final void mulsd(Register dst, AMD64Address src) {
2206         assert dst.getRegisterCategory().equals(AMD64.XMM);
2207         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2208         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2209         emitByte(0x59);
2210         emitOperandHelper(dst, src, 0);
2211     }
2212 
2213     public final void mulss(Register dst, Register src) {
2214         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2215         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2216         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2217         emitByte(0x59);
2218         emitByte(0xC0 | encode);
2219     }
2220 
2221     public final void movswl(Register dst, AMD64Address src) {
2222         prefix(src, dst);
2223         emitByte(0x0F);
2224         emitByte(0xBF);
2225         emitOperandHelper(dst, src, 0);
2226     }
2227 
2228     public final void movw(AMD64Address dst, int imm16) {
2229         emitByte(0x66); // switch to 16-bit mode
2230         prefix(dst);
2231         emitByte(0xC7);
2232         emitOperandHelper(0, dst, 2);
2233         emitShort(imm16);
2234     }
2235 
2236     public final void movw(AMD64Address dst, Register src) {
2237         emitByte(0x66);
2238         prefix(dst, src);
2239         emitByte(0x89);
2240         emitOperandHelper(src, dst, 0);
2241     }
2242 
2243     public final void movzbl(Register dst, AMD64Address src) {
2244         prefix(src, dst);
2245         emitByte(0x0F);
2246         emitByte(0xB6);
2247         emitOperandHelper(dst, src, 0);
2248     }
2249 
2250     public final void movzbl(Register dst, Register src) {
2251         AMD64RMOp.MOVZXB.emit(this, OperandSize.DWORD, dst, src);
2252     }
2253 
2254     public final void movzbq(Register dst, Register src) {
2255         AMD64RMOp.MOVZXB.emit(this, OperandSize.QWORD, dst, src);
2256     }
2257 
2258     public final void movzwl(Register dst, AMD64Address src) {
2259         prefix(src, dst);
2260         emitByte(0x0F);
2261         emitByte(0xB7);
2262         emitOperandHelper(dst, src, 0);
2263     }
2264 
2265     public final void negl(Register dst) {
2266         NEG.emit(this, DWORD, dst);
2267     }
2268 
2269     public final void notl(Register dst) {
2270         NOT.emit(this, DWORD, dst);
2271     }
2272 
2273     @Override
2274     public final void ensureUniquePC() {
2275         nop();
2276     }
2277 
2278     public final void nop() {
2279         nop(1);
2280     }
2281 
2282     public void nop(int count) {
2283         int i = count;
2284         if (UseNormalNop) {
2285             assert i > 0 : " ";
2286             // The fancy nops aren't currently recognized by debuggers making it a
2287             // pain to disassemble code while debugging. If assert are on clearly
2288             // speed is not an issue so simply use the single byte traditional nop
2289             // to do alignment.
2290 
2291             for (; i > 0; i--) {
2292                 emitByte(0x90);
2293             }
2294             return;
2295         }
2296 
2297         if (UseAddressNop) {
2298             //
2299             // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD.
2300             // 1: 0x90
2301             // 2: 0x66 0x90
2302             // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2303             // 4: 0x0F 0x1F 0x40 0x00
2304             // 5: 0x0F 0x1F 0x44 0x00 0x00
2305             // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2306             // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2307             // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2308             // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2309             // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2310             // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2311 
2312             // The rest coding is AMD specific - use consecutive Address nops
2313 
2314             // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2315             // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2316             // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2317             // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2318             // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2319             // Size prefixes (0x66) are added for larger sizes
2320 
2321             while (i >= 22) {
2322                 i -= 11;
2323                 emitByte(0x66); // size prefix
2324                 emitByte(0x66); // size prefix
2325                 emitByte(0x66); // size prefix
2326                 addrNop8();
2327             }
2328             // Generate first nop for size between 21-12
2329             switch (i) {
2330                 case 21:
2331                     i -= 11;
2332                     emitByte(0x66); // size prefix
2333                     emitByte(0x66); // size prefix
2334                     emitByte(0x66); // size prefix
2335                     addrNop8();
2336                     break;
2337                 case 20:
2338                 case 19:
2339                     i -= 10;
2340                     emitByte(0x66); // size prefix
2341                     emitByte(0x66); // size prefix
2342                     addrNop8();
2343                     break;
2344                 case 18:
2345                 case 17:
2346                     i -= 9;
2347                     emitByte(0x66); // size prefix
2348                     addrNop8();
2349                     break;
2350                 case 16:
2351                 case 15:
2352                     i -= 8;
2353                     addrNop8();
2354                     break;
2355                 case 14:
2356                 case 13:
2357                     i -= 7;
2358                     addrNop7();
2359                     break;
2360                 case 12:
2361                     i -= 6;
2362                     emitByte(0x66); // size prefix
2363                     addrNop5();
2364                     break;
2365                 default:
2366                     assert i < 12;
2367             }
2368 
2369             // Generate second nop for size between 11-1
2370             switch (i) {
2371                 case 11:
2372                     emitByte(0x66); // size prefix
2373                     emitByte(0x66); // size prefix
2374                     emitByte(0x66); // size prefix
2375                     addrNop8();
2376                     break;
2377                 case 10:
2378                     emitByte(0x66); // size prefix
2379                     emitByte(0x66); // size prefix
2380                     addrNop8();
2381                     break;
2382                 case 9:
2383                     emitByte(0x66); // size prefix
2384                     addrNop8();
2385                     break;
2386                 case 8:
2387                     addrNop8();
2388                     break;
2389                 case 7:
2390                     addrNop7();
2391                     break;
2392                 case 6:
2393                     emitByte(0x66); // size prefix
2394                     addrNop5();
2395                     break;
2396                 case 5:
2397                     addrNop5();
2398                     break;
2399                 case 4:
2400                     addrNop4();
2401                     break;
2402                 case 3:
2403                     // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2404                     emitByte(0x66); // size prefix
2405                     emitByte(0x66); // size prefix
2406                     emitByte(0x90); // nop
2407                     break;
2408                 case 2:
2409                     emitByte(0x66); // size prefix
2410                     emitByte(0x90); // nop
2411                     break;
2412                 case 1:
2413                     emitByte(0x90); // nop
2414                     break;
2415                 default:
2416                     assert i == 0;
2417             }
2418             return;
2419         }
2420 
2421         // Using nops with size prefixes "0x66 0x90".
2422         // From AMD Optimization Guide:
2423         // 1: 0x90
2424         // 2: 0x66 0x90
2425         // 3: 0x66 0x66 0x90
2426         // 4: 0x66 0x66 0x66 0x90
2427         // 5: 0x66 0x66 0x90 0x66 0x90
2428         // 6: 0x66 0x66 0x90 0x66 0x66 0x90
2429         // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2430         // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2431         // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2432         // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2433         //
2434         while (i > 12) {
2435             i -= 4;
2436             emitByte(0x66); // size prefix
2437             emitByte(0x66);
2438             emitByte(0x66);
2439             emitByte(0x90); // nop
2440         }
2441         // 1 - 12 nops
2442         if (i > 8) {
2443             if (i > 9) {
2444                 i -= 1;
2445                 emitByte(0x66);
2446             }
2447             i -= 3;
2448             emitByte(0x66);
2449             emitByte(0x66);
2450             emitByte(0x90);
2451         }
2452         // 1 - 8 nops
2453         if (i > 4) {
2454             if (i > 6) {
2455                 i -= 1;
2456                 emitByte(0x66);
2457             }
2458             i -= 3;
2459             emitByte(0x66);
2460             emitByte(0x66);
2461             emitByte(0x90);
2462         }
2463         switch (i) {
2464             case 4:
2465                 emitByte(0x66);
2466                 emitByte(0x66);
2467                 emitByte(0x66);
2468                 emitByte(0x90);
2469                 break;
2470             case 3:
2471                 emitByte(0x66);
2472                 emitByte(0x66);
2473                 emitByte(0x90);
2474                 break;
2475             case 2:
2476                 emitByte(0x66);
2477                 emitByte(0x90);
2478                 break;
2479             case 1:
2480                 emitByte(0x90);
2481                 break;
2482             default:
2483                 assert i == 0;
2484         }
2485     }
2486 
2487     public final void orl(Register dst, Register src) {
2488         OR.rmOp.emit(this, DWORD, dst, src);
2489     }
2490 
2491     public final void orl(Register dst, int imm32) {
2492         OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2493     }
2494 
2495     public final void pop(Register dst) {
2496         int encode = prefixAndEncode(dst.encoding);
2497         emitByte(0x58 | encode);
2498     }
2499 
2500     public void popfq() {
2501         emitByte(0x9D);
2502     }
2503 
2504     public final void ptest(Register dst, Register src) {
2505         assert supports(CPUFeature.SSE4_1);
2506         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2507         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2508         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
2509         emitByte(0x17);
2510         emitByte(0xC0 | encode);
2511     }
2512 
2513     public final void vptest(Register dst, Register src) {
2514         assert supports(CPUFeature.AVX);
2515         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2516         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2517         int encode = vexPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
2518         emitByte(0x17);
2519         emitByte(0xC0 | encode);
2520     }
2521 
2522     void pcmpestri(Register dst, AMD64Address src, int imm8) {
2523         assert supports(CPUFeature.SSE4_2);
2524         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2525         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_3A, attributes);
2526         emitByte(0x61);
2527         emitOperandHelper(dst, src, 0);
2528         emitByte(imm8);
2529     }
2530 
2531     void pcmpestri(Register dst, Register src, int imm8) {
2532         assert supports(CPUFeature.SSE4_2);
2533         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2534         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_3A, attributes);
2535         emitByte(0x61);
2536         emitByte(0xC0 | encode);
2537         emitByte(imm8);
2538     }
2539 
2540     public final void push(Register src) {
2541         int encode = prefixAndEncode(src.encoding);
2542         emitByte(0x50 | encode);
2543     }
2544 
2545     public void pushfq() {
2546         emitByte(0x9c);
2547     }
2548 
2549     public final void paddd(Register dst, Register src) {
2550         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2551         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2552         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2553         emitByte(0xFE);
2554         emitByte(0xC0 | encode);
2555     }
2556 
2557     public final void paddq(Register dst, Register src) {
2558         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2559         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2560         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2561         emitByte(0xD4);
2562         emitByte(0xC0 | encode);
2563     }
2564 
2565     public final void pextrw(Register dst, Register src, int imm8) {
2566         assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
2567         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2568         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2569         emitByte(0xC5);
2570         emitByte(0xC0 | encode);
2571         emitByte(imm8);
2572     }
2573 
2574     public final void pinsrw(Register dst, Register src, int imm8) {
2575         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
2576         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2577         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2578         emitByte(0xC4);
2579         emitByte(0xC0 | encode);
2580         emitByte(imm8);
2581     }
2582 
2583     public final void por(Register dst, Register src) {
2584         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2585         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2586         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2587         emitByte(0xEB);
2588         emitByte(0xC0 | encode);
2589     }
2590 
2591     public final void pand(Register dst, Register src) {
2592         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2593         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2594         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2595         emitByte(0xDB);
2596         emitByte(0xC0 | encode);
2597     }
2598 
2599     public final void pxor(Register dst, Register src) {
2600         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2601         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2602         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2603         emitByte(0xEF);
2604         emitByte(0xC0 | encode);
2605     }
2606 
2607     public final void vpxor(Register dst, Register nds, Register src) {
2608         assert supports(CPUFeature.AVX);
2609         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2610         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2611         int encode = vexPrefixAndEncode(dst, nds, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2612         emitByte(0xEF);
2613         emitByte(0xC0 | encode);
2614     }
2615 
2616     public final void pslld(Register dst, int imm8) {
2617         assert isUByte(imm8) : "invalid value";
2618         assert dst.getRegisterCategory().equals(AMD64.XMM);
2619         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2620         // XMM6 is for /6 encoding: 66 0F 72 /6 ib
2621         int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2622         emitByte(0x72);
2623         emitByte(0xC0 | encode);
2624         emitByte(imm8 & 0xFF);
2625     }
2626 
2627     public final void psllq(Register dst, Register shift) {
2628         assert dst.getRegisterCategory().equals(AMD64.XMM) && shift.getRegisterCategory().equals(AMD64.XMM);
2629         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2630         int encode = simdPrefixAndEncode(dst, dst, shift, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2631         emitByte(0xF3);
2632         emitByte(0xC0 | encode);
2633     }
2634 
2635     public final void psllq(Register dst, int imm8) {
2636         assert isUByte(imm8) : "invalid value";
2637         assert dst.getRegisterCategory().equals(AMD64.XMM);
2638         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2639         // XMM6 is for /6 encoding: 66 0F 73 /6 ib
2640         int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2641         emitByte(0x73);
2642         emitByte(0xC0 | encode);
2643         emitByte(imm8);
2644     }
2645 
2646     public final void psrad(Register dst, int imm8) {
2647         assert isUByte(imm8) : "invalid value";
2648         assert dst.getRegisterCategory().equals(AMD64.XMM);
2649         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2650         // XMM4 is for /2 encoding: 66 0F 72 /4 ib
2651         int encode = simdPrefixAndEncode(AMD64.xmm4, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2652         emitByte(0x72);
2653         emitByte(0xC0 | encode);
2654         emitByte(imm8);
2655     }
2656 
2657     public final void psrld(Register dst, int imm8) {
2658         assert isUByte(imm8) : "invalid value";
2659         assert dst.getRegisterCategory().equals(AMD64.XMM);
2660         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2661         // XMM2 is for /2 encoding: 66 0F 72 /2 ib
2662         int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2663         emitByte(0x72);
2664         emitByte(0xC0 | encode);
2665         emitByte(imm8);
2666     }
2667 
2668     public final void psrlq(Register dst, int imm8) {
2669         assert isUByte(imm8) : "invalid value";
2670         assert dst.getRegisterCategory().equals(AMD64.XMM);
2671         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2672         // XMM2 is for /2 encoding: 66 0F 73 /2 ib
2673         int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2674         emitByte(0x73);
2675         emitByte(0xC0 | encode);
2676         emitByte(imm8);
2677     }
2678 
2679     public final void psrldq(Register dst, int imm8) {
2680         assert isUByte(imm8) : "invalid value";
2681         assert dst.getRegisterCategory().equals(AMD64.XMM);
2682         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2683         int encode = simdPrefixAndEncode(AMD64.xmm3, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2684         emitByte(0x73);
2685         emitByte(0xC0 | encode);
2686         emitByte(imm8);
2687     }
2688 
2689     public final void pshufd(Register dst, Register src, int imm8) {
2690         assert isUByte(imm8) : "invalid value";
2691         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2692         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2693         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2694         emitByte(0x70);
2695         emitByte(0xC0 | encode);
2696         emitByte(imm8);
2697     }
2698 
2699     public final void psubd(Register dst, Register src) {
2700         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2701         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2702         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2703         emitByte(0xFA);
2704         emitByte(0xC0 | encode);
2705     }
2706 
2707     public final void rcpps(Register dst, Register src) {
2708         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2709         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ true, /* noMaskReg */ false, /* usesVl */ false, target);
2710         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2711         emitByte(0x53);
2712         emitByte(0xC0 | encode);
2713     }
2714 
2715     public final void ret(int imm16) {
2716         if (imm16 == 0) {
2717             emitByte(0xC3);
2718         } else {
2719             emitByte(0xC2);
2720             emitShort(imm16);
2721         }
2722     }
2723 
2724     public final void sarl(Register dst, int imm8) {
2725         int encode = prefixAndEncode(dst.encoding);
2726         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2727         if (imm8 == 1) {
2728             emitByte(0xD1);
2729             emitByte(0xF8 | encode);
2730         } else {
2731             emitByte(0xC1);
2732             emitByte(0xF8 | encode);
2733             emitByte(imm8);
2734         }
2735     }
2736 
2737     public final void shll(Register dst, int imm8) {
2738         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2739         int encode = prefixAndEncode(dst.encoding);
2740         if (imm8 == 1) {
2741             emitByte(0xD1);
2742             emitByte(0xE0 | encode);
2743         } else {
2744             emitByte(0xC1);
2745             emitByte(0xE0 | encode);
2746             emitByte(imm8);
2747         }
2748     }
2749 
2750     public final void shll(Register dst) {
2751         int encode = prefixAndEncode(dst.encoding);
2752         emitByte(0xD3);
2753         emitByte(0xE0 | encode);
2754     }
2755 
2756     public final void shrl(Register dst, int imm8) {
2757         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2758         int encode = prefixAndEncode(dst.encoding);
2759         emitByte(0xC1);
2760         emitByte(0xE8 | encode);
2761         emitByte(imm8);
2762     }
2763 
2764     public final void shrl(Register dst) {
2765         int encode = prefixAndEncode(dst.encoding);
2766         emitByte(0xD3);
2767         emitByte(0xE8 | encode);
2768     }
2769 
2770     public final void subl(AMD64Address dst, int imm32) {
2771         SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2772     }
2773 
2774     public final void subl(Register dst, int imm32) {
2775         SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2776     }
2777 
2778     public final void subl(Register dst, Register src) {
2779         SUB.rmOp.emit(this, DWORD, dst, src);
2780     }
2781 
2782     public final void subpd(Register dst, Register src) {
2783         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2784         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2785         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2786         emitByte(0x5C);
2787         emitByte(0xC0 | encode);
2788     }
2789 
2790     public final void subsd(Register dst, Register src) {
2791         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2792         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2793         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2794         emitByte(0x5C);
2795         emitByte(0xC0 | encode);
2796     }
2797 
2798     public final void subsd(Register dst, AMD64Address src) {
2799         assert dst.getRegisterCategory().equals(AMD64.XMM);
2800         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2801         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2802         emitByte(0x5C);
2803         emitOperandHelper(dst, src, 0);
2804     }
2805 
2806     public final void testl(Register dst, int imm32) {
2807         // not using emitArith because test
2808         // doesn't support sign-extension of
2809         // 8bit operands
2810         int encode = dst.encoding;
2811         if (encode == 0) {
2812             emitByte(0xA9);
2813         } else {
2814             encode = prefixAndEncode(encode);
2815             emitByte(0xF7);
2816             emitByte(0xC0 | encode);
2817         }
2818         emitInt(imm32);
2819     }
2820 
2821     public final void testl(Register dst, Register src) {
2822         int encode = prefixAndEncode(dst.encoding, src.encoding);
2823         emitByte(0x85);
2824         emitByte(0xC0 | encode);
2825     }
2826 
2827     public final void testl(Register dst, AMD64Address src) {
2828         prefix(src, dst);
2829         emitByte(0x85);
2830         emitOperandHelper(dst, src, 0);
2831     }
2832 
2833     public final void unpckhpd(Register dst, Register src) {
2834         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2835         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2836         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2837         emitByte(0x15);
2838         emitByte(0xC0 | encode);
2839     }
2840 
2841     public final void unpcklpd(Register dst, Register src) {
2842         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2843         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2844         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2845         emitByte(0x14);
2846         emitByte(0xC0 | encode);
2847     }
2848 
2849     public final void xorl(Register dst, Register src) {
2850         XOR.rmOp.emit(this, DWORD, dst, src);
2851     }
2852 
2853     public final void xorpd(Register dst, Register src) {
2854         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2855         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2856         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2857         emitByte(0x57);
2858         emitByte(0xC0 | encode);
2859     }
2860 
2861     public final void xorps(Register dst, Register src) {
2862         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2863         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2864         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2865         emitByte(0x57);
2866         emitByte(0xC0 | encode);
2867     }
2868 
2869     protected final void decl(Register dst) {
2870         // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
2871         int encode = prefixAndEncode(dst.encoding);
2872         emitByte(0xFF);
2873         emitByte(0xC8 | encode);
2874     }
2875 
2876     protected final void incl(Register dst) {
2877         // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2878         int encode = prefixAndEncode(dst.encoding);
2879         emitByte(0xFF);
2880         emitByte(0xC0 | encode);
2881     }
2882 
2883     private int prefixAndEncode(int regEnc) {
2884         return prefixAndEncode(regEnc, false);
2885     }
2886 
2887     private int prefixAndEncode(int regEnc, boolean byteinst) {
2888         if (regEnc >= 8) {
2889             emitByte(Prefix.REXB);
2890             return regEnc - 8;
2891         } else if (byteinst && regEnc >= 4) {
2892             emitByte(Prefix.REX);
2893         }
2894         return regEnc;
2895     }
2896 
2897     private int prefixqAndEncode(int regEnc) {
2898         if (regEnc < 8) {
2899             emitByte(Prefix.REXW);
2900             return regEnc;
2901         } else {
2902             emitByte(Prefix.REXWB);
2903             return regEnc - 8;
2904         }
2905     }
2906 
2907     private int prefixAndEncode(int dstEnc, int srcEnc) {
2908         return prefixAndEncode(dstEnc, false, srcEnc, false);
2909     }
2910 
2911     private int prefixAndEncode(int dstEncoding, boolean dstIsByte, int srcEncoding, boolean srcIsByte) {
2912         int srcEnc = srcEncoding;
2913         int dstEnc = dstEncoding;
2914         if (dstEnc < 8) {
2915             if (srcEnc >= 8) {
2916                 emitByte(Prefix.REXB);
2917                 srcEnc -= 8;
2918             } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) {
2919                 emitByte(Prefix.REX);
2920             }
2921         } else {
2922             if (srcEnc < 8) {
2923                 emitByte(Prefix.REXR);
2924             } else {
2925                 emitByte(Prefix.REXRB);
2926                 srcEnc -= 8;
2927             }
2928             dstEnc -= 8;
2929         }
2930         return dstEnc << 3 | srcEnc;
2931     }
2932 
2933     /**
2934      * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand
2935      * prefix. If the given operands exceed 3 bits, the 4th bit is encoded in the prefix.
2936      *
2937      * @param regEncoding the encoding of the register part of the ModRM-Byte
2938      * @param rmEncoding the encoding of the r/m part of the ModRM-Byte
2939      * @return the lower 6 bits of the ModRM-Byte that should be emitted
2940      */
2941     private int prefixqAndEncode(int regEncoding, int rmEncoding) {
2942         int rmEnc = rmEncoding;
2943         int regEnc = regEncoding;
2944         if (regEnc < 8) {
2945             if (rmEnc < 8) {
2946                 emitByte(Prefix.REXW);
2947             } else {
2948                 emitByte(Prefix.REXWB);
2949                 rmEnc -= 8;
2950             }
2951         } else {
2952             if (rmEnc < 8) {
2953                 emitByte(Prefix.REXWR);
2954             } else {
2955                 emitByte(Prefix.REXWRB);
2956                 rmEnc -= 8;
2957             }
2958             regEnc -= 8;
2959         }
2960         return regEnc << 3 | rmEnc;
2961     }
2962 
2963     private void vexPrefix(int rxb, int ndsEncoding, int pre, int opc, AMD64InstructionAttr attributes) {
2964         int vectorLen = attributes.getVectorLen();
2965         boolean vexW = attributes.isRexVexW();
2966         boolean isXorB = ((rxb & 0x3) > 0);
2967         if (isXorB || vexW || (opc == VexOpcode.VEX_OPCODE_0F_38) || (opc == VexOpcode.VEX_OPCODE_0F_3A)) {
2968             emitByte(Prefix.VEX_3BYTES);
2969 
2970             int byte1 = (rxb << 5);
2971             byte1 = ((~byte1) & 0xE0) | opc;
2972             emitByte(byte1);
2973 
2974             int byte2 = ((~ndsEncoding) & 0xf) << 3;
2975             byte2 |= (vexW ? VexPrefix.VEX_W : 0) | ((vectorLen > 0) ? 4 : 0) | pre;
2976             emitByte(byte2);
2977         } else {
2978             emitByte(Prefix.VEX_2BYTES);
2979 
2980             int byte1 = ((rxb & 0x4) > 0) ? VexPrefix.VEX_R : 0;
2981             byte1 = (~byte1) & 0x80;
2982             byte1 |= ((~ndsEncoding) & 0xf) << 3;
2983             byte1 |= ((vectorLen > 0) ? 4 : 0) | pre;
2984             emitByte(byte1);
2985         }
2986     }
2987 
2988     private void vexPrefix(AMD64Address adr, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
2989         int rxb = getRXB(src, adr);
2990         int ndsEncoding = nds.isValid() ? nds.encoding : 0;
2991         vexPrefix(rxb, ndsEncoding, pre, opc, attributes);
2992         setCurAttributes(attributes);
2993     }
2994 
2995     private int vexPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
2996         int rxb = getRXB(dst, src);
2997         int ndsEncoding = nds.isValid() ? nds.encoding : 0;
2998         vexPrefix(rxb, ndsEncoding, pre, opc, attributes);
2999         // return modrm byte components for operands
3000         return (((dst.encoding & 7) << 3) | (src.encoding & 7));
3001     }
3002 
3003     private void simdPrefix(Register xreg, Register nds, AMD64Address adr, int pre, int opc, AMD64InstructionAttr attributes) {
3004         if (supports(CPUFeature.AVX)) {
3005             vexPrefix(adr, nds, xreg, pre, opc, attributes);
3006         } else {
3007             switch (pre) {
3008                 case VexSimdPrefix.VEX_SIMD_66:
3009                     emitByte(0x66);
3010                     break;
3011                 case VexSimdPrefix.VEX_SIMD_F2:
3012                     emitByte(0xF2);
3013                     break;
3014                 case VexSimdPrefix.VEX_SIMD_F3:
3015                     emitByte(0xF3);
3016                     break;
3017             }
3018             if (attributes.isRexVexW()) {
3019                 prefixq(adr, xreg);
3020             } else {
3021                 prefix(adr, xreg);
3022             }
3023             switch (opc) {
3024                 case VexOpcode.VEX_OPCODE_0F:
3025                     emitByte(0x0F);
3026                     break;
3027                 case VexOpcode.VEX_OPCODE_0F_38:
3028                     emitByte(0x0F);
3029                     emitByte(0x38);
3030                     break;
3031                 case VexOpcode.VEX_OPCODE_0F_3A:
3032                     emitByte(0x0F);
3033                     emitByte(0x3A);
3034                     break;
3035             }
3036         }
3037     }
3038 
3039     private int simdPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
3040         if (supports(CPUFeature.AVX)) {
3041             return vexPrefixAndEncode(dst, nds, src, pre, opc, attributes);
3042         } else {
3043             switch (pre) {
3044                 case VexSimdPrefix.VEX_SIMD_66:
3045                     emitByte(0x66);
3046                     break;
3047                 case VexSimdPrefix.VEX_SIMD_F2:
3048                     emitByte(0xF2);
3049                     break;
3050                 case VexSimdPrefix.VEX_SIMD_F3:
3051                     emitByte(0xF3);
3052                     break;
3053             }
3054             int encode;
3055             int dstEncoding = dst.encoding;
3056             int srcEncoding = src.encoding;
3057             if (attributes.isRexVexW()) {
3058                 encode = prefixqAndEncode(dstEncoding, srcEncoding);
3059             } else {
3060                 encode = prefixAndEncode(dstEncoding, srcEncoding);
3061             }
3062             switch (opc) {
3063                 case VexOpcode.VEX_OPCODE_0F:
3064                     emitByte(0x0F);
3065                     break;
3066                 case VexOpcode.VEX_OPCODE_0F_38:
3067                     emitByte(0x0F);
3068                     emitByte(0x38);
3069                     break;
3070                 case VexOpcode.VEX_OPCODE_0F_3A:
3071                     emitByte(0x0F);
3072                     emitByte(0x3A);
3073                     break;
3074             }
3075             return encode;
3076         }
3077     }
3078 
3079     private static boolean needsRex(Register reg) {
3080         return reg.encoding >= MinEncodingNeedsRex;
3081     }
3082 
3083     private void prefix(AMD64Address adr) {
3084         if (needsRex(adr.getBase())) {
3085             if (needsRex(adr.getIndex())) {
3086                 emitByte(Prefix.REXXB);
3087             } else {
3088                 emitByte(Prefix.REXB);
3089             }
3090         } else {
3091             if (needsRex(adr.getIndex())) {
3092                 emitByte(Prefix.REXX);
3093             }
3094         }
3095     }
3096 
3097     private void prefixq(AMD64Address adr) {
3098         if (needsRex(adr.getBase())) {
3099             if (needsRex(adr.getIndex())) {
3100                 emitByte(Prefix.REXWXB);
3101             } else {
3102                 emitByte(Prefix.REXWB);
3103             }
3104         } else {
3105             if (needsRex(adr.getIndex())) {
3106                 emitByte(Prefix.REXWX);
3107             } else {
3108                 emitByte(Prefix.REXW);
3109             }
3110         }
3111     }
3112 
3113     private void prefix(AMD64Address adr, Register reg) {
3114         prefix(adr, reg, false);
3115     }
3116 
3117     private void prefix(AMD64Address adr, Register reg, boolean byteinst) {
3118         if (reg.encoding < 8) {
3119             if (needsRex(adr.getBase())) {
3120                 if (needsRex(adr.getIndex())) {
3121                     emitByte(Prefix.REXXB);
3122                 } else {
3123                     emitByte(Prefix.REXB);
3124                 }
3125             } else {
3126                 if (needsRex(adr.getIndex())) {
3127                     emitByte(Prefix.REXX);
3128                 } else if (byteinst && reg.encoding >= 4) {
3129                     emitByte(Prefix.REX);
3130                 }
3131             }
3132         } else {
3133             if (needsRex(adr.getBase())) {
3134                 if (needsRex(adr.getIndex())) {
3135                     emitByte(Prefix.REXRXB);
3136                 } else {
3137                     emitByte(Prefix.REXRB);
3138                 }
3139             } else {
3140                 if (needsRex(adr.getIndex())) {
3141                     emitByte(Prefix.REXRX);
3142                 } else {
3143                     emitByte(Prefix.REXR);
3144                 }
3145             }
3146         }
3147     }
3148 
3149     private void prefixq(AMD64Address adr, Register src) {
3150         if (src.encoding < 8) {
3151             if (needsRex(adr.getBase())) {
3152                 if (needsRex(adr.getIndex())) {
3153                     emitByte(Prefix.REXWXB);
3154                 } else {
3155                     emitByte(Prefix.REXWB);
3156                 }
3157             } else {
3158                 if (needsRex(adr.getIndex())) {
3159                     emitByte(Prefix.REXWX);
3160                 } else {
3161                     emitByte(Prefix.REXW);
3162                 }
3163             }
3164         } else {
3165             if (needsRex(adr.getBase())) {
3166                 if (needsRex(adr.getIndex())) {
3167                     emitByte(Prefix.REXWRXB);
3168                 } else {
3169                     emitByte(Prefix.REXWRB);
3170                 }
3171             } else {
3172                 if (needsRex(adr.getIndex())) {
3173                     emitByte(Prefix.REXWRX);
3174                 } else {
3175                     emitByte(Prefix.REXWR);
3176                 }
3177             }
3178         }
3179     }
3180 
3181     public final void addq(Register dst, int imm32) {
3182         ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3183     }
3184 
3185     public final void addq(AMD64Address dst, int imm32) {
3186         ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3187     }
3188 
3189     public final void addq(Register dst, Register src) {
3190         ADD.rmOp.emit(this, QWORD, dst, src);
3191     }
3192 
3193     public final void addq(AMD64Address dst, Register src) {
3194         ADD.mrOp.emit(this, QWORD, dst, src);
3195     }
3196 
3197     public final void andq(Register dst, int imm32) {
3198         AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3199     }
3200 
3201     public final void bsrq(Register dst, Register src) {
3202         int encode = prefixqAndEncode(dst.encoding(), src.encoding());
3203         emitByte(0x0F);
3204         emitByte(0xBD);
3205         emitByte(0xC0 | encode);
3206     }
3207 
3208     public final void bswapq(Register reg) {
3209         int encode = prefixqAndEncode(reg.encoding);
3210         emitByte(0x0F);
3211         emitByte(0xC8 | encode);
3212     }
3213 
3214     public final void cdqq() {
3215         emitByte(Prefix.REXW);
3216         emitByte(0x99);
3217     }
3218 
3219     public final void cmovq(ConditionFlag cc, Register dst, Register src) {
3220         int encode = prefixqAndEncode(dst.encoding, src.encoding);
3221         emitByte(0x0F);
3222         emitByte(0x40 | cc.getValue());
3223         emitByte(0xC0 | encode);
3224     }
3225 
3226     public final void setb(ConditionFlag cc, Register dst) {
3227         int encode = prefixAndEncode(dst.encoding, true);
3228         emitByte(0x0F);
3229         emitByte(0x90 | cc.getValue());
3230         emitByte(0xC0 | encode);
3231     }
3232 
3233     public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) {
3234         prefixq(src, dst);
3235         emitByte(0x0F);
3236         emitByte(0x40 | cc.getValue());
3237         emitOperandHelper(dst, src, 0);
3238     }
3239 
3240     public final void cmpq(Register dst, int imm32) {
3241         CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3242     }
3243 
3244     public final void cmpq(Register dst, Register src) {
3245         CMP.rmOp.emit(this, QWORD, dst, src);
3246     }
3247 
3248     public final void cmpq(Register dst, AMD64Address src) {
3249         CMP.rmOp.emit(this, QWORD, dst, src);
3250     }
3251 
3252     public final void cmpxchgq(Register reg, AMD64Address adr) {
3253         prefixq(adr, reg);
3254         emitByte(0x0F);
3255         emitByte(0xB1);
3256         emitOperandHelper(reg, adr, 0);
3257     }
3258 
3259     public final void cvtdq2pd(Register dst, Register src) {
3260         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
3261         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3262         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3263         emitByte(0xE6);
3264         emitByte(0xC0 | encode);
3265     }
3266 
3267     public final void cvtsi2sdq(Register dst, Register src) {
3268         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
3269         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3270         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3271         emitByte(0x2A);
3272         emitByte(0xC0 | encode);
3273     }
3274 
3275     public final void cvttsd2siq(Register dst, Register src) {
3276         assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
3277         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3278         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3279         emitByte(0x2C);
3280         emitByte(0xC0 | encode);
3281     }
3282 
3283     public final void cvttpd2dq(Register dst, Register src) {
3284         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
3285         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3286         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3287         emitByte(0xE6);
3288         emitByte(0xC0 | encode);
3289     }
3290 
3291     protected final void decq(Register dst) {
3292         // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3293         int encode = prefixqAndEncode(dst.encoding);
3294         emitByte(0xFF);
3295         emitByte(0xC8 | encode);
3296     }
3297 
3298     public final void decq(AMD64Address dst) {
3299         DEC.emit(this, QWORD, dst);
3300     }
3301 
3302     public final void imulq(Register dst, Register src) {
3303         int encode = prefixqAndEncode(dst.encoding, src.encoding);
3304         emitByte(0x0F);
3305         emitByte(0xAF);
3306         emitByte(0xC0 | encode);
3307     }
3308 
3309     public final void incq(Register dst) {
3310         // Don't use it directly. Use Macroincrementq() instead.
3311         // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3312         int encode = prefixqAndEncode(dst.encoding);
3313         emitByte(0xFF);
3314         emitByte(0xC0 | encode);
3315     }
3316 
3317     public final void incq(AMD64Address dst) {
3318         INC.emit(this, QWORD, dst);
3319     }
3320 
3321     public final void movq(Register dst, long imm64) {
3322         int encode = prefixqAndEncode(dst.encoding);
3323         emitByte(0xB8 | encode);
3324         emitLong(imm64);
3325     }
3326 
3327     public final void movslq(Register dst, int imm32) {
3328         int encode = prefixqAndEncode(dst.encoding);
3329         emitByte(0xC7);
3330         emitByte(0xC0 | encode);
3331         emitInt(imm32);
3332     }
3333 
3334     public final void movdq(Register dst, AMD64Address src) {
3335         assert dst.getRegisterCategory().equals(AMD64.XMM);
3336         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3337         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3338         emitByte(0x6E);
3339         emitOperandHelper(dst, src, 0);
3340     }
3341 
3342     public final void movdq(AMD64Address dst, Register src) {
3343         assert src.getRegisterCategory().equals(AMD64.XMM);
3344         // swap src/dst to get correct prefix
3345         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3346         simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3347         emitByte(0x7E);
3348         emitOperandHelper(src, dst, 0);
3349     }
3350 
3351     public final void movdq(Register dst, Register src) {
3352         if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) {
3353             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3354             int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3355             emitByte(0x6E);
3356             emitByte(0xC0 | encode);
3357         } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) {
3358             // swap src/dst to get correct prefix
3359             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3360             int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3361             emitByte(0x7E);
3362             emitByte(0xC0 | encode);
3363         } else {
3364             throw new InternalError("should not reach here");
3365         }
3366     }
3367 
3368     public final void movdl(Register dst, Register src) {
3369         if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) {
3370             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3371             int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3372             emitByte(0x6E);
3373             emitByte(0xC0 | encode);
3374         } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) {
3375             // swap src/dst to get correct prefix
3376             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3377             int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3378             emitByte(0x7E);
3379             emitByte(0xC0 | encode);
3380         } else {
3381             throw new InternalError("should not reach here");
3382         }
3383     }
3384 
3385     public final void movdl(Register dst, AMD64Address src) {
3386         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3387         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3388         emitByte(0x6E);
3389         emitOperandHelper(dst, src, 0);
3390     }
3391 
3392     public final void movddup(Register dst, Register src) {
3393         assert supports(CPUFeature.SSE3);
3394         assert dst.getRegisterCategory().equals(AMD64.XMM);
3395         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3396         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3397         emitByte(0x12);
3398         emitByte(0xC0 | encode);
3399     }
3400 
3401     public final void movdqu(Register dst, AMD64Address src) {
3402         assert dst.getRegisterCategory().equals(AMD64.XMM);
3403         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3404         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3405         emitByte(0x6F);
3406         emitOperandHelper(dst, src, 0);
3407     }
3408 
3409     public final void movdqu(Register dst, Register src) {
3410         assert dst.getRegisterCategory().equals(AMD64.XMM);
3411         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3412         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3413         emitByte(0x6F);
3414         emitByte(0xC0 | encode);
3415     }
3416 
3417     public final void vmovdqu(Register dst, AMD64Address src) {
3418         assert supports(CPUFeature.AVX);
3419         assert dst.getRegisterCategory().equals(AMD64.XMM);
3420         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3421         vexPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3422         emitByte(0x6F);
3423         emitOperandHelper(dst, src, 0);
3424     }
3425 
3426     public final void vzeroupper() {
3427         assert supports(CPUFeature.AVX);
3428         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3429         vexPrefixAndEncode(AMD64.xmm0, AMD64.xmm0, AMD64.xmm0, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
3430         emitByte(0x77);
3431     }
3432 
3433     public final void movslq(AMD64Address dst, int imm32) {
3434         prefixq(dst);
3435         emitByte(0xC7);
3436         emitOperandHelper(0, dst, 4);
3437         emitInt(imm32);
3438     }
3439 
3440     public final void movslq(Register dst, AMD64Address src) {
3441         prefixq(src, dst);
3442         emitByte(0x63);
3443         emitOperandHelper(dst, src, 0);
3444     }
3445 
3446     public final void movslq(Register dst, Register src) {
3447         int encode = prefixqAndEncode(dst.encoding, src.encoding);
3448         emitByte(0x63);
3449         emitByte(0xC0 | encode);
3450     }
3451 
3452     public final void negq(Register dst) {
3453         int encode = prefixqAndEncode(dst.encoding);
3454         emitByte(0xF7);
3455         emitByte(0xD8 | encode);
3456     }
3457 
3458     public final void orq(Register dst, Register src) {
3459         OR.rmOp.emit(this, QWORD, dst, src);
3460     }
3461 
3462     public final void shlq(Register dst, int imm8) {
3463         assert isShiftCount(imm8 >> 1) : "illegal shift count";
3464         int encode = prefixqAndEncode(dst.encoding);
3465         if (imm8 == 1) {
3466             emitByte(0xD1);
3467             emitByte(0xE0 | encode);
3468         } else {
3469             emitByte(0xC1);
3470             emitByte(0xE0 | encode);
3471             emitByte(imm8);
3472         }
3473     }
3474 
3475     public final void shlq(Register dst) {
3476         int encode = prefixqAndEncode(dst.encoding);
3477         emitByte(0xD3);
3478         emitByte(0xE0 | encode);
3479     }
3480 
3481     public final void shrq(Register dst, int imm8) {
3482         assert isShiftCount(imm8 >> 1) : "illegal shift count";
3483         int encode = prefixqAndEncode(dst.encoding);
3484         if (imm8 == 1) {
3485             emitByte(0xD1);
3486             emitByte(0xE8 | encode);
3487         } else {
3488             emitByte(0xC1);
3489             emitByte(0xE8 | encode);
3490             emitByte(imm8);
3491         }
3492     }
3493 
3494     public final void shrq(Register dst) {
3495         int encode = prefixqAndEncode(dst.encoding);
3496         emitByte(0xD3);
3497         emitByte(0xE8 | encode);
3498     }
3499 
3500     public final void sbbq(Register dst, Register src) {
3501         SBB.rmOp.emit(this, QWORD, dst, src);
3502     }
3503 
3504     public final void subq(Register dst, int imm32) {
3505         SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3506     }
3507 
3508     public final void subq(AMD64Address dst, int imm32) {
3509         SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3510     }
3511 
3512     public final void subqWide(Register dst, int imm32) {
3513         // don't use the sign-extending version, forcing a 32-bit immediate
3514         SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32);
3515     }
3516 
3517     public final void subq(Register dst, Register src) {
3518         SUB.rmOp.emit(this, QWORD, dst, src);
3519     }
3520 
3521     public final void testq(Register dst, Register src) {
3522         int encode = prefixqAndEncode(dst.encoding, src.encoding);
3523         emitByte(0x85);
3524         emitByte(0xC0 | encode);
3525     }
3526 
3527     public final void btrq(Register src, int imm8) {
3528         int encode = prefixqAndEncode(src.encoding);
3529         emitByte(0x0F);
3530         emitByte(0xBA);
3531         emitByte(0xF0 | encode);
3532         emitByte(imm8);
3533     }
3534 
3535     public final void xaddl(AMD64Address dst, Register src) {
3536         prefix(dst, src);
3537         emitByte(0x0F);
3538         emitByte(0xC1);
3539         emitOperandHelper(src, dst, 0);
3540     }
3541 
3542     public final void xaddq(AMD64Address dst, Register src) {
3543         prefixq(dst, src);
3544         emitByte(0x0F);
3545         emitByte(0xC1);
3546         emitOperandHelper(src, dst, 0);
3547     }
3548 
3549     public final void xchgl(Register dst, AMD64Address src) {
3550         prefix(src, dst);
3551         emitByte(0x87);
3552         emitOperandHelper(dst, src, 0);
3553     }
3554 
3555     public final void xchgq(Register dst, AMD64Address src) {
3556         prefixq(src, dst);
3557         emitByte(0x87);
3558         emitOperandHelper(dst, src, 0);
3559     }
3560 
3561     public final void membar(int barriers) {
3562         if (target.isMP) {
3563             // We only have to handle StoreLoad
3564             if ((barriers & STORE_LOAD) != 0) {
3565                 // All usable chips support "locked" instructions which suffice
3566                 // as barriers, and are much faster than the alternative of
3567                 // using cpuid instruction. We use here a locked add [rsp],0.
3568                 // This is conveniently otherwise a no-op except for blowing
3569                 // flags.
3570                 // Any change to this code may need to revisit other places in
3571                 // the code where this idiom is used, in particular the
3572                 // orderAccess code.
3573                 lock();
3574                 addl(new AMD64Address(rsp, 0), 0); // Assert the lock# signal here
3575             }
3576         }
3577     }
3578 
3579     @Override
3580     protected final void patchJumpTarget(int branch, int branchTarget) {
3581         int op = getByte(branch);
3582         assert op == 0xE8 // call
3583                         ||
3584                         op == 0x00 // jump table entry
3585                         || op == 0xE9 // jmp
3586                         || op == 0xEB // short jmp
3587                         || (op & 0xF0) == 0x70 // short jcc
3588                         || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc
3589         : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
3590 
3591         if (op == 0x00) {
3592             int offsetToJumpTableBase = getShort(branch + 1);
3593             int jumpTableBase = branch - offsetToJumpTableBase;
3594             int imm32 = branchTarget - jumpTableBase;
3595             emitInt(imm32, branch);
3596         } else if (op == 0xEB || (op & 0xF0) == 0x70) {
3597 
3598             // short offset operators (jmp and jcc)
3599             final int imm8 = branchTarget - (branch + 2);
3600             /*
3601              * Since a wrongly patched short branch can potentially lead to working but really bad
3602              * behaving code we should always fail with an exception instead of having an assert.
3603              */
3604             if (!NumUtil.isByte(imm8)) {
3605                 throw new InternalError("branch displacement out of range: " + imm8);
3606             }
3607             emitByte(imm8, branch + 1);
3608 
3609         } else {
3610 
3611             int off = 1;
3612             if (op == 0x0F) {
3613                 off = 2;
3614             }
3615 
3616             int imm32 = branchTarget - (branch + 4 + off);
3617             emitInt(imm32, branch + off);
3618         }
3619     }
3620 
3621     public void nullCheck(AMD64Address address) {
3622         testl(AMD64.rax, address);
3623     }
3624 
3625     @Override
3626     public void align(int modulus) {
3627         if (position() % modulus != 0) {
3628             nop(modulus - (position() % modulus));
3629         }
3630     }
3631 
3632     /**
3633      * Emits a direct call instruction. Note that the actual call target is not specified, because
3634      * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is
3635      * responsible to add the call address to the appropriate patching tables.
3636      */
3637     public final void call() {
3638         if (codePatchingAnnotationConsumer != null) {
3639             int pos = position();
3640             codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(pos, pos + 1, 4, pos + 5));
3641         }
3642         emitByte(0xE8);
3643         emitInt(0);
3644     }
3645 
3646     public final void call(Register src) {
3647         int encode = prefixAndEncode(src.encoding);
3648         emitByte(0xFF);
3649         emitByte(0xD0 | encode);
3650     }
3651 
3652     public final void int3() {
3653         emitByte(0xCC);
3654     }
3655 
3656     public final void pause() {
3657         emitByte(0xF3);
3658         emitByte(0x90);
3659     }
3660 
3661     private void emitx87(int b1, int b2, int i) {
3662         assert 0 <= i && i < 8 : "illegal stack offset";
3663         emitByte(b1);
3664         emitByte(b2 + i);
3665     }
3666 
3667     public final void fldd(AMD64Address src) {
3668         emitByte(0xDD);
3669         emitOperandHelper(0, src, 0);
3670     }
3671 
3672     public final void flds(AMD64Address src) {
3673         emitByte(0xD9);
3674         emitOperandHelper(0, src, 0);
3675     }
3676 
3677     public final void fldln2() {
3678         emitByte(0xD9);
3679         emitByte(0xED);
3680     }
3681 
3682     public final void fldlg2() {
3683         emitByte(0xD9);
3684         emitByte(0xEC);
3685     }
3686 
3687     public final void fyl2x() {
3688         emitByte(0xD9);
3689         emitByte(0xF1);
3690     }
3691 
3692     public final void fstps(AMD64Address src) {
3693         emitByte(0xD9);
3694         emitOperandHelper(3, src, 0);
3695     }
3696 
3697     public final void fstpd(AMD64Address src) {
3698         emitByte(0xDD);
3699         emitOperandHelper(3, src, 0);
3700     }
3701 
3702     private void emitFPUArith(int b1, int b2, int i) {
3703         assert 0 <= i && i < 8 : "illegal FPU register: " + i;
3704         emitByte(b1);
3705         emitByte(b2 + i);
3706     }
3707 
3708     public void ffree(int i) {
3709         emitFPUArith(0xDD, 0xC0, i);
3710     }
3711 
3712     public void fincstp() {
3713         emitByte(0xD9);
3714         emitByte(0xF7);
3715     }
3716 
3717     public void fxch(int i) {
3718         emitFPUArith(0xD9, 0xC8, i);
3719     }
3720 
3721     public void fnstswAX() {
3722         emitByte(0xDF);
3723         emitByte(0xE0);
3724     }
3725 
3726     public void fwait() {
3727         emitByte(0x9B);
3728     }
3729 
3730     public void fprem() {
3731         emitByte(0xD9);
3732         emitByte(0xF8);
3733     }
3734 
3735     public final void fsin() {
3736         emitByte(0xD9);
3737         emitByte(0xFE);
3738     }
3739 
3740     public final void fcos() {
3741         emitByte(0xD9);
3742         emitByte(0xFF);
3743     }
3744 
3745     public final void fptan() {
3746         emitByte(0xD9);
3747         emitByte(0xF2);
3748     }
3749 
3750     public final void fstp(int i) {
3751         emitx87(0xDD, 0xD8, i);
3752     }
3753 
3754     @Override
3755     public AMD64Address makeAddress(Register base, int displacement) {
3756         return new AMD64Address(base, displacement);
3757     }
3758 
3759     @Override
3760     public AMD64Address getPlaceholder(int instructionStartPosition) {
3761         return new AMD64Address(rip, Register.None, Scale.Times1, 0, instructionStartPosition);
3762     }
3763 
3764     private void prefetchPrefix(AMD64Address src) {
3765         prefix(src);
3766         emitByte(0x0F);
3767     }
3768 
3769     public void prefetchnta(AMD64Address src) {
3770         prefetchPrefix(src);
3771         emitByte(0x18);
3772         emitOperandHelper(0, src, 0);
3773     }
3774 
3775     void prefetchr(AMD64Address src) {
3776         assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
3777         prefetchPrefix(src);
3778         emitByte(0x0D);
3779         emitOperandHelper(0, src, 0);
3780     }
3781 
3782     public void prefetcht0(AMD64Address src) {
3783         assert supports(CPUFeature.SSE);
3784         prefetchPrefix(src);
3785         emitByte(0x18);
3786         emitOperandHelper(1, src, 0);
3787     }
3788 
3789     public void prefetcht1(AMD64Address src) {
3790         assert supports(CPUFeature.SSE);
3791         prefetchPrefix(src);
3792         emitByte(0x18);
3793         emitOperandHelper(2, src, 0);
3794     }
3795 
3796     public void prefetcht2(AMD64Address src) {
3797         assert supports(CPUFeature.SSE);
3798         prefix(src);
3799         emitByte(0x0f);
3800         emitByte(0x18);
3801         emitOperandHelper(3, src, 0);
3802     }
3803 
3804     public void prefetchw(AMD64Address src) {
3805         assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
3806         prefix(src);
3807         emitByte(0x0f);
3808         emitByte(0x0D);
3809         emitOperandHelper(1, src, 0);
3810     }
3811 
3812     public void rdtsc() {
3813         emitByte(0x0F);
3814         emitByte(0x31);
3815     }
3816 
3817     /**
3818      * Emits an instruction which is considered to be illegal. This is used if we deliberately want
3819      * to crash the program (debugging etc.).
3820      */
3821     public void illegal() {
3822         emitByte(0x0f);
3823         emitByte(0x0b);
3824     }
3825 }