1 /*
   2  * Copyright (c) 2009, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 package org.graalvm.compiler.asm.amd64;
  24 
  25 import static org.graalvm.compiler.asm.NumUtil.isByte;
  26 import static org.graalvm.compiler.asm.NumUtil.isInt;
  27 import static org.graalvm.compiler.asm.NumUtil.isShiftCount;
  28 import static org.graalvm.compiler.asm.NumUtil.isUByte;
  29 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop;
  30 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop;
  31 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD;
  32 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND;
  33 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP;
  34 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR;
  35 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB;
  36 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB;
  37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR;
  38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC;
  39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC;
  40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG;
  41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT;
  42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.BYTE;
  43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.DWORD;
  44 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PD;
  45 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PS;
  46 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.QWORD;
  47 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SD;
  48 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SS;
  49 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.WORD;
  50 import static jdk.vm.ci.amd64.AMD64.CPU;
  51 import static jdk.vm.ci.amd64.AMD64.XMM;
  52 import static jdk.vm.ci.amd64.AMD64.r12;
  53 import static jdk.vm.ci.amd64.AMD64.r13;
  54 import static jdk.vm.ci.amd64.AMD64.rbp;
  55 import static jdk.vm.ci.amd64.AMD64.rip;
  56 import static jdk.vm.ci.amd64.AMD64.rsp;
  57 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD;
  58 
  59 import org.graalvm.compiler.asm.Assembler;
  60 import org.graalvm.compiler.asm.Label;
  61 import org.graalvm.compiler.asm.NumUtil;
  62 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
  63 
  64 import jdk.vm.ci.amd64.AMD64;
  65 import jdk.vm.ci.amd64.AMD64.CPUFeature;
  66 import jdk.vm.ci.code.Register;
  67 import jdk.vm.ci.code.Register.RegisterCategory;
  68 import jdk.vm.ci.code.TargetDescription;
  69 
  70 /**
  71  * This class implements an assembler that can encode most X86 instructions.
  72  */
  73 public class AMD64Assembler extends Assembler {
  74 
  75     private static final int MinEncodingNeedsRex = 8;
  76 
  77     /**
  78      * The x86 condition codes used for conditional jumps/moves.
  79      */
  80     public enum ConditionFlag {
  81         Zero(0x4, "|zero|"),
  82         NotZero(0x5, "|nzero|"),
  83         Equal(0x4, "="),
  84         NotEqual(0x5, "!="),
  85         Less(0xc, "<"),
  86         LessEqual(0xe, "<="),
  87         Greater(0xf, ">"),
  88         GreaterEqual(0xd, ">="),
  89         Below(0x2, "|<|"),
  90         BelowEqual(0x6, "|<=|"),
  91         Above(0x7, "|>|"),
  92         AboveEqual(0x3, "|>=|"),
  93         Overflow(0x0, "|of|"),
  94         NoOverflow(0x1, "|nof|"),
  95         CarrySet(0x2, "|carry|"),
  96         CarryClear(0x3, "|ncarry|"),
  97         Negative(0x8, "|neg|"),
  98         Positive(0x9, "|pos|"),
  99         Parity(0xa, "|par|"),
 100         NoParity(0xb, "|npar|");
 101 
 102         private final int value;
 103         private final String operator;
 104 
 105         ConditionFlag(int value, String operator) {
 106             this.value = value;
 107             this.operator = operator;
 108         }
 109 
 110         public ConditionFlag negate() {
 111             switch (this) {
 112                 case Zero:
 113                     return NotZero;
 114                 case NotZero:
 115                     return Zero;
 116                 case Equal:
 117                     return NotEqual;
 118                 case NotEqual:
 119                     return Equal;
 120                 case Less:
 121                     return GreaterEqual;
 122                 case LessEqual:
 123                     return Greater;
 124                 case Greater:
 125                     return LessEqual;
 126                 case GreaterEqual:
 127                     return Less;
 128                 case Below:
 129                     return AboveEqual;
 130                 case BelowEqual:
 131                     return Above;
 132                 case Above:
 133                     return BelowEqual;
 134                 case AboveEqual:
 135                     return Below;
 136                 case Overflow:
 137                     return NoOverflow;
 138                 case NoOverflow:
 139                     return Overflow;
 140                 case CarrySet:
 141                     return CarryClear;
 142                 case CarryClear:
 143                     return CarrySet;
 144                 case Negative:
 145                     return Positive;
 146                 case Positive:
 147                     return Negative;
 148                 case Parity:
 149                     return NoParity;
 150                 case NoParity:
 151                     return Parity;
 152             }
 153             throw new IllegalArgumentException();
 154         }
 155 
 156         public int getValue() {
 157             return value;
 158         }
 159 
 160         @Override
 161         public String toString() {
 162             return operator;
 163         }
 164     }
 165 
 166     /**
 167      * Constants for X86 prefix bytes.
 168      */
 169     private static class Prefix {
 170         private static final int REX = 0x40;
 171         private static final int REXB = 0x41;
 172         private static final int REXX = 0x42;
 173         private static final int REXXB = 0x43;
 174         private static final int REXR = 0x44;
 175         private static final int REXRB = 0x45;
 176         private static final int REXRX = 0x46;
 177         private static final int REXRXB = 0x47;
 178         private static final int REXW = 0x48;
 179         private static final int REXWB = 0x49;
 180         private static final int REXWX = 0x4A;
 181         private static final int REXWXB = 0x4B;
 182         private static final int REXWR = 0x4C;
 183         private static final int REXWRB = 0x4D;
 184         private static final int REXWRX = 0x4E;
 185         private static final int REXWRXB = 0x4F;
 186         private static final int VEX_3BYTES = 0xC4;
 187         private static final int VEX_2BYTES = 0xC5;
 188     }
 189 
 190     private static class VexPrefix {
 191         private static final int VEX_R = 0x80;
 192         private static final int VEX_W = 0x80;
 193     }
 194 
 195     private static class AvxVectorLen {
 196         private static final int AVX_128bit = 0x0;
 197         private static final int AVX_256bit = 0x1;
 198     }
 199 
 200     private static class VexSimdPrefix {
 201         private static final int VEX_SIMD_NONE = 0x0;
 202         private static final int VEX_SIMD_66 = 0x1;
 203         private static final int VEX_SIMD_F3 = 0x2;
 204         private static final int VEX_SIMD_F2 = 0x3;
 205     }
 206 
 207     private static class VexOpcode {
 208         private static final int VEX_OPCODE_NONE = 0x0;
 209         private static final int VEX_OPCODE_0F = 0x1;
 210         private static final int VEX_OPCODE_0F_38 = 0x2;
 211         private static final int VEX_OPCODE_0F_3A = 0x3;
 212     }
 213 
 214     private AMD64InstructionAttr curAttributes;
 215 
 216     AMD64InstructionAttr getCurAttributes() {
 217         return curAttributes;
 218     }
 219 
 220     void setCurAttributes(AMD64InstructionAttr attributes) {
 221         curAttributes = attributes;
 222     }
 223 
 224     /**
 225      * The x86 operand sizes.
 226      */
 227     public enum OperandSize {
 228         BYTE(1) {
 229             @Override
 230             protected void emitImmediate(AMD64Assembler asm, int imm) {
 231                 assert imm == (byte) imm;
 232                 asm.emitByte(imm);
 233             }
 234 
 235             @Override
 236             protected int immediateSize() {
 237                 return 1;
 238             }
 239         },
 240 
 241         WORD(2, 0x66) {
 242             @Override
 243             protected void emitImmediate(AMD64Assembler asm, int imm) {
 244                 assert imm == (short) imm;
 245                 asm.emitShort(imm);
 246             }
 247 
 248             @Override
 249             protected int immediateSize() {
 250                 return 2;
 251             }
 252         },
 253 
 254         DWORD(4) {
 255             @Override
 256             protected void emitImmediate(AMD64Assembler asm, int imm) {
 257                 asm.emitInt(imm);
 258             }
 259 
 260             @Override
 261             protected int immediateSize() {
 262                 return 4;
 263             }
 264         },
 265 
 266         QWORD(8) {
 267             @Override
 268             protected void emitImmediate(AMD64Assembler asm, int imm) {
 269                 asm.emitInt(imm);
 270             }
 271 
 272             @Override
 273             protected int immediateSize() {
 274                 return 4;
 275             }
 276         },
 277 
 278         SS(4, 0xF3, true),
 279 
 280         SD(8, 0xF2, true),
 281 
 282         PS(16, true),
 283 
 284         PD(16, 0x66, true);
 285 
 286         private final int sizePrefix;
 287 
 288         private final int bytes;
 289         private final boolean xmm;
 290 
 291         OperandSize(int bytes) {
 292             this(bytes, 0);
 293         }
 294 
 295         OperandSize(int bytes, int sizePrefix) {
 296             this(bytes, sizePrefix, false);
 297         }
 298 
 299         OperandSize(int bytes, boolean xmm) {
 300             this(bytes, 0, xmm);
 301         }
 302 
 303         OperandSize(int bytes, int sizePrefix, boolean xmm) {
 304             this.sizePrefix = sizePrefix;
 305             this.bytes = bytes;
 306             this.xmm = xmm;
 307         }
 308 
 309         public int getBytes() {
 310             return bytes;
 311         }
 312 
 313         public boolean isXmmType() {
 314             return xmm;
 315         }
 316 
 317         /**
 318          * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded
 319          * as sign-extended 32-bit values.
 320          *
 321          * @param asm
 322          * @param imm
 323          */
 324         protected void emitImmediate(AMD64Assembler asm, int imm) {
 325             throw new UnsupportedOperationException();
 326         }
 327 
 328         protected int immediateSize() {
 329             throw new UnsupportedOperationException();
 330         }
 331     }
 332 
 333     /**
 334      * Operand size and register type constraints.
 335      */
 336     private enum OpAssertion {
 337         ByteAssertion(CPU, CPU, BYTE),
 338         IntegerAssertion(CPU, CPU, WORD, DWORD, QWORD),
 339         No16BitAssertion(CPU, CPU, DWORD, QWORD),
 340         No32BitAssertion(CPU, CPU, WORD, QWORD),
 341         QwordOnlyAssertion(CPU, CPU, QWORD),
 342         FloatingAssertion(XMM, XMM, SS, SD, PS, PD),
 343         PackedFloatingAssertion(XMM, XMM, PS, PD),
 344         SingleAssertion(XMM, XMM, SS),
 345         DoubleAssertion(XMM, XMM, SD),
 346         PackedDoubleAssertion(XMM, XMM, PD),
 347         IntToFloatingAssertion(XMM, CPU, DWORD, QWORD),
 348         FloatingToIntAssertion(CPU, XMM, DWORD, QWORD);
 349 
 350         private final RegisterCategory resultCategory;
 351         private final RegisterCategory inputCategory;
 352         private final OperandSize[] allowedSizes;
 353 
 354         OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) {
 355             this.resultCategory = resultCategory;
 356             this.inputCategory = inputCategory;
 357             this.allowedSizes = allowedSizes;
 358         }
 359 
 360         protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) {
 361             assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op;
 362             assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op;
 363 
 364             for (OperandSize s : allowedSizes) {
 365                 if (size == s) {
 366                     return true;
 367                 }
 368             }
 369 
 370             assert false : "invalid operand size " + size + " used in " + op;
 371             return false;
 372         }
 373     }
 374 
 375     public abstract static class OperandDataAnnotation extends CodeAnnotation {
 376         /**
 377          * The position (bytes from the beginning of the method) of the operand.
 378          */
 379         public final int operandPosition;
 380         /**
 381          * The size of the operand, in bytes.
 382          */
 383         public final int operandSize;
 384         /**
 385          * The position (bytes from the beginning of the method) of the next instruction. On AMD64,
 386          * RIP-relative operands are relative to this position.
 387          */
 388         public final int nextInstructionPosition;
 389 
 390         OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) {
 391             super(instructionPosition);
 392 
 393             this.operandPosition = operandPosition;
 394             this.operandSize = operandSize;
 395             this.nextInstructionPosition = nextInstructionPosition;
 396         }
 397 
 398         @Override
 399         public String toString() {
 400             return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize;
 401         }
 402     }
 403 
 404     /**
 405      * Annotation that stores additional information about the displacement of a
 406      * {@link Assembler#getPlaceholder placeholder address} that needs patching.
 407      */
 408     public static class AddressDisplacementAnnotation extends OperandDataAnnotation {
 409         AddressDisplacementAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) {
 410             super(instructionPosition, operandPosition, operndSize, nextInstructionPosition);
 411         }
 412     }
 413 
 414     /**
 415      * Annotation that stores additional information about the immediate operand, e.g., of a call
 416      * instruction, that needs patching.
 417      */
 418     public static class ImmediateOperandAnnotation extends OperandDataAnnotation {
 419         ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) {
 420             super(instructionPosition, operandPosition, operndSize, nextInstructionPosition);
 421         }
 422     }
 423 
 424     /**
 425      * Constructs an assembler for the AMD64 architecture.
 426      */
 427     public AMD64Assembler(TargetDescription target) {
 428         super(target);
 429     }
 430 
 431     public boolean supports(CPUFeature feature) {
 432         return ((AMD64) target.arch).getFeatures().contains(feature);
 433     }
 434 
 435     private static int encode(Register r) {
 436         assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding;
 437         return r.encoding & 0x7;
 438     }
 439 
 440     /**
 441      * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a
 442      * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm
 443      * field. The X bit must be 0.
 444      */
 445     protected static int getRXB(Register reg, Register rm) {
 446         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
 447         rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3;
 448         return rxb;
 449     }
 450 
 451     /**
 452      * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There
 453      * are two cases for the memory operand:<br>
 454      * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0.
 455      * <br>
 456      * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base.
 457      */
 458     protected static int getRXB(Register reg, AMD64Address rm) {
 459         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
 460         if (!rm.getIndex().equals(Register.None)) {
 461             rxb |= (rm.getIndex().encoding & 0x08) >> 2;
 462         }
 463         if (!rm.getBase().equals(Register.None)) {
 464             rxb |= (rm.getBase().encoding & 0x08) >> 3;
 465         }
 466         return rxb;
 467     }
 468 
 469     /**
 470      * Emit the ModR/M byte for one register operand and an opcode extension in the R field.
 471      * <p>
 472      * Format: [ 11 reg r/m ]
 473      */
 474     protected void emitModRM(int reg, Register rm) {
 475         assert (reg & 0x07) == reg;
 476         emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07));
 477     }
 478 
 479     /**
 480      * Emit the ModR/M byte for two register operands.
 481      * <p>
 482      * Format: [ 11 reg r/m ]
 483      */
 484     protected void emitModRM(Register reg, Register rm) {
 485         emitModRM(reg.encoding & 0x07, rm);
 486     }
 487 
 488     protected void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) {
 489         assert !reg.equals(Register.None);
 490         emitOperandHelper(encode(reg), addr, false, additionalInstructionSize);
 491     }
 492 
 493     /**
 494      * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand.
 495      *
 496      * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
 497      */
 498     protected void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
 499         assert !reg.equals(Register.None);
 500         emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize);
 501     }
 502 
 503     protected void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) {
 504         emitOperandHelper(reg, addr, false, additionalInstructionSize);
 505     }
 506 
 507     /**
 508      * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode
 509      * extension in the R field.
 510      *
 511      * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
 512      * @param additionalInstructionSize the number of bytes that will be emitted after the operand,
 513      *            so that the start position of the next instruction can be computed even though
 514      *            this instruction has not been completely emitted yet.
 515      */
 516     protected void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
 517         assert (reg & 0x07) == reg;
 518         int regenc = reg << 3;
 519 
 520         Register base = addr.getBase();
 521         Register index = addr.getIndex();
 522 
 523         AMD64Address.Scale scale = addr.getScale();
 524         int disp = addr.getDisplacement();
 525 
 526         if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder()
 527             // [00 000 101] disp32
 528             assert index.equals(Register.None) : "cannot use RIP relative addressing with index register";
 529             emitByte(0x05 | regenc);
 530             if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) {
 531                 codePatchingAnnotationConsumer.accept(new AddressDisplacementAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize));
 532             }
 533             emitInt(disp);
 534         } else if (base.isValid()) {
 535             int baseenc = base.isValid() ? encode(base) : 0;
 536             if (index.isValid()) {
 537                 int indexenc = encode(index) << 3;
 538                 // [base + indexscale + disp]
 539                 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
 540                     // [base + indexscale]
 541                     // [00 reg 100][ss index base]
 542                     assert !index.equals(rsp) : "illegal addressing mode";
 543                     emitByte(0x04 | regenc);
 544                     emitByte(scale.log2 << 6 | indexenc | baseenc);
 545                 } else if (isByte(disp) && !force4Byte) {
 546                     // [base + indexscale + imm8]
 547                     // [01 reg 100][ss index base] imm8
 548                     assert !index.equals(rsp) : "illegal addressing mode";
 549                     emitByte(0x44 | regenc);
 550                     emitByte(scale.log2 << 6 | indexenc | baseenc);
 551                     emitByte(disp & 0xFF);
 552                 } else {
 553                     // [base + indexscale + disp32]
 554                     // [10 reg 100][ss index base] disp32
 555                     assert !index.equals(rsp) : "illegal addressing mode";
 556                     emitByte(0x84 | regenc);
 557                     emitByte(scale.log2 << 6 | indexenc | baseenc);
 558                     emitInt(disp);
 559                 }
 560             } else if (base.equals(rsp) || base.equals(r12)) {
 561                 // [rsp + disp]
 562                 if (disp == 0) {
 563                     // [rsp]
 564                     // [00 reg 100][00 100 100]
 565                     emitByte(0x04 | regenc);
 566                     emitByte(0x24);
 567                 } else if (isByte(disp) && !force4Byte) {
 568                     // [rsp + imm8]
 569                     // [01 reg 100][00 100 100] disp8
 570                     emitByte(0x44 | regenc);
 571                     emitByte(0x24);
 572                     emitByte(disp & 0xFF);
 573                 } else {
 574                     // [rsp + imm32]
 575                     // [10 reg 100][00 100 100] disp32
 576                     emitByte(0x84 | regenc);
 577                     emitByte(0x24);
 578                     emitInt(disp);
 579                 }
 580             } else {
 581                 // [base + disp]
 582                 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode";
 583                 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
 584                     // [base]
 585                     // [00 reg base]
 586                     emitByte(0x00 | regenc | baseenc);
 587                 } else if (isByte(disp) && !force4Byte) {
 588                     // [base + disp8]
 589                     // [01 reg base] disp8
 590                     emitByte(0x40 | regenc | baseenc);
 591                     emitByte(disp & 0xFF);
 592                 } else {
 593                     // [base + disp32]
 594                     // [10 reg base] disp32
 595                     emitByte(0x80 | regenc | baseenc);
 596                     emitInt(disp);
 597                 }
 598             }
 599         } else {
 600             if (index.isValid()) {
 601                 int indexenc = encode(index) << 3;
 602                 // [indexscale + disp]
 603                 // [00 reg 100][ss index 101] disp32
 604                 assert !index.equals(rsp) : "illegal addressing mode";
 605                 emitByte(0x04 | regenc);
 606                 emitByte(scale.log2 << 6 | indexenc | 0x05);
 607                 emitInt(disp);
 608             } else {
 609                 // [disp] ABSOLUTE
 610                 // [00 reg 100][00 100 101] disp32
 611                 emitByte(0x04 | regenc);
 612                 emitByte(0x25);
 613                 emitInt(disp);
 614             }
 615         }
 616         setCurAttributes(null);
 617     }
 618 
 619     /**
 620      * Base class for AMD64 opcodes.
 621      */
 622     public static class AMD64Op {
 623 
 624         protected static final int P_0F = 0x0F;
 625         protected static final int P_0F38 = 0x380F;
 626         protected static final int P_0F3A = 0x3A0F;
 627 
 628         private final String opcode;
 629 
 630         protected final int prefix1;
 631         protected final int prefix2;
 632         protected final int op;
 633 
 634         private final boolean dstIsByte;
 635         private final boolean srcIsByte;
 636 
 637         private final OpAssertion assertion;
 638         private final CPUFeature feature;
 639 
 640         protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
 641             this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature);
 642         }
 643 
 644         protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
 645             this.opcode = opcode;
 646             this.prefix1 = prefix1;
 647             this.prefix2 = prefix2;
 648             this.op = op;
 649 
 650             this.dstIsByte = dstIsByte;
 651             this.srcIsByte = srcIsByte;
 652 
 653             this.assertion = assertion;
 654             this.feature = feature;
 655         }
 656 
 657         protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) {
 658             if (prefix1 != 0) {
 659                 asm.emitByte(prefix1);
 660             }
 661             if (size.sizePrefix != 0) {
 662                 asm.emitByte(size.sizePrefix);
 663             }
 664             int rexPrefix = 0x40 | rxb;
 665             if (size == QWORD) {
 666                 rexPrefix |= 0x08;
 667             }
 668             if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) {
 669                 asm.emitByte(rexPrefix);
 670             }
 671             if (prefix2 > 0xFF) {
 672                 asm.emitShort(prefix2);
 673             } else if (prefix2 > 0) {
 674                 asm.emitByte(prefix2);
 675             }
 676             asm.emitByte(op);
 677         }
 678 
 679         protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) {
 680             assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode);
 681             assert assertion.checkOperands(this, size, resultReg, inputReg);
 682             return true;
 683         }
 684 
 685         @Override
 686         public String toString() {
 687             return opcode;
 688         }
 689     }
 690 
 691     /**
 692      * Base class for AMD64 opcodes with immediate operands.
 693      */
 694     public static class AMD64ImmOp extends AMD64Op {
 695 
 696         private final boolean immIsByte;
 697 
 698         protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
 699             super(opcode, 0, prefix, op, assertion, null);
 700             this.immIsByte = immIsByte;
 701         }
 702 
 703         protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) {
 704             if (immIsByte) {
 705                 assert imm == (byte) imm;
 706                 asm.emitByte(imm);
 707             } else {
 708                 size.emitImmediate(asm, imm);
 709             }
 710         }
 711 
 712         protected final int immediateSize(OperandSize size) {
 713             if (immIsByte) {
 714                 return 1;
 715             } else {
 716                 return size.bytes;
 717             }
 718         }
 719     }
 720 
 721     /**
 722      * Opcode with operand order of either RM or MR for 2 address forms.
 723      */
 724     public abstract static class AMD64RROp extends AMD64Op {
 725 
 726         protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
 727             super(opcode, prefix1, prefix2, op, assertion, feature);
 728         }
 729 
 730         protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
 731             super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature);
 732         }
 733 
 734         public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src);
 735     }
 736 
 737     /**
 738      * Opcode with operand order of either RM or MR for 3 address forms.
 739      */
 740     public abstract static class AMD64RRROp extends AMD64Op {
 741 
 742         protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
 743             super(opcode, prefix1, prefix2, op, assertion, feature);
 744         }
 745 
 746         protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
 747             super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature);
 748         }
 749 
 750         public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src);
 751     }
 752 
 753     /**
 754      * Opcode with operand order of RM.
 755      */
 756     public static class AMD64RMOp extends AMD64RROp {
 757         // @formatter:off
 758         public static final AMD64RMOp IMUL   = new AMD64RMOp("IMUL",         P_0F, 0xAF);
 759         public static final AMD64RMOp BSF    = new AMD64RMOp("BSF",          P_0F, 0xBC);
 760         public static final AMD64RMOp BSR    = new AMD64RMOp("BSR",          P_0F, 0xBD);
 761         public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, CPUFeature.POPCNT);
 762         public static final AMD64RMOp TZCNT  = new AMD64RMOp("TZCNT",  0xF3, P_0F, 0xBC, CPUFeature.BMI1);
 763         public static final AMD64RMOp LZCNT  = new AMD64RMOp("LZCNT",  0xF3, P_0F, 0xBD, CPUFeature.LZCNT);
 764         public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB",       P_0F, 0xB6, false, true, OpAssertion.IntegerAssertion);
 765         public static final AMD64RMOp MOVZX  = new AMD64RMOp("MOVZX",        P_0F, 0xB7, OpAssertion.No16BitAssertion);
 766         public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB",       P_0F, 0xBE, false, true, OpAssertion.IntegerAssertion);
 767         public static final AMD64RMOp MOVSX  = new AMD64RMOp("MOVSX",        P_0F, 0xBF, OpAssertion.No16BitAssertion);
 768         public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD",             0x63, OpAssertion.QwordOnlyAssertion);
 769         public static final AMD64RMOp MOVB   = new AMD64RMOp("MOVB",               0x8A, OpAssertion.ByteAssertion);
 770         public static final AMD64RMOp MOV    = new AMD64RMOp("MOV",                0x8B);
 771 
 772         // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix
 773         public static final AMD64RMOp MOVD   = new AMD64RMOp("MOVD",   0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
 774         public static final AMD64RMOp MOVQ   = new AMD64RMOp("MOVQ",   0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
 775         public static final AMD64RMOp MOVSS  = new AMD64RMOp("MOVSS",        P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE);
 776         public static final AMD64RMOp MOVSD  = new AMD64RMOp("MOVSD",        P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE);
 777 
 778         // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient.
 779         public static final AMD64RMOp TESTB  = new AMD64RMOp("TEST",               0x84, OpAssertion.ByteAssertion);
 780         public static final AMD64RMOp TEST   = new AMD64RMOp("TEST",               0x85);
 781         // @formatter:on
 782 
 783         protected AMD64RMOp(String opcode, int op) {
 784             this(opcode, 0, op);
 785         }
 786 
 787         protected AMD64RMOp(String opcode, int op, OpAssertion assertion) {
 788             this(opcode, 0, op, assertion);
 789         }
 790 
 791         protected AMD64RMOp(String opcode, int prefix, int op) {
 792             this(opcode, 0, prefix, op, null);
 793         }
 794 
 795         protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) {
 796             this(opcode, 0, prefix, op, assertion, null);
 797         }
 798 
 799         protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
 800             this(opcode, 0, prefix, op, assertion, feature);
 801         }
 802 
 803         protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) {
 804             super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null);
 805         }
 806 
 807         protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) {
 808             this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature);
 809         }
 810 
 811         protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
 812             super(opcode, prefix1, prefix2, op, assertion, feature);
 813         }
 814 
 815         @Override
 816         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
 817             assert verify(asm, size, dst, src);
 818             boolean isSimd = false;
 819             boolean noNds = false;
 820 
 821             switch (op) {
 822                 case 0x2A:
 823                 case 0x2C:
 824                 case 0x2E:
 825                 case 0x5A:
 826                 case 0x6E:
 827                     isSimd = true;
 828                     noNds = true;
 829                     break;
 830                 case 0x10:
 831                 case 0x51:
 832                 case 0x54:
 833                 case 0x55:
 834                 case 0x56:
 835                 case 0x57:
 836                 case 0x58:
 837                 case 0x59:
 838                 case 0x5C:
 839                 case 0x5D:
 840                 case 0x5E:
 841                 case 0x5F:
 842                     isSimd = true;
 843                     break;
 844             }
 845 
 846             if (isSimd) {
 847                 int pre;
 848                 int opc;
 849                 boolean rexVexW = (size == QWORD) ? true : false;
 850                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
 851                 int curPrefix = size.sizePrefix | prefix1;
 852                 switch (curPrefix) {
 853                     case 0x66:
 854                         pre = VexSimdPrefix.VEX_SIMD_66;
 855                         break;
 856                     case 0xF2:
 857                         pre = VexSimdPrefix.VEX_SIMD_F2;
 858                         break;
 859                     case 0xF3:
 860                         pre = VexSimdPrefix.VEX_SIMD_F3;
 861                         break;
 862                     default:
 863                         pre = VexSimdPrefix.VEX_SIMD_NONE;
 864                         break;
 865                 }
 866                 switch (prefix2) {
 867                     case P_0F:
 868                         opc = VexOpcode.VEX_OPCODE_0F;
 869                         break;
 870                     case P_0F38:
 871                         opc = VexOpcode.VEX_OPCODE_0F_38;
 872                         break;
 873                     case P_0F3A:
 874                         opc = VexOpcode.VEX_OPCODE_0F_3A;
 875                         break;
 876                     default:
 877                         opc = VexOpcode.VEX_OPCODE_NONE;
 878                         break;
 879                 }
 880                 int encode;
 881                 if (noNds) {
 882                     encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes);
 883                 } else {
 884                     encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes);
 885                 }
 886                 asm.emitByte(op);
 887                 asm.emitByte(0xC0 | encode);
 888             } else {
 889                 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
 890                 asm.emitModRM(dst, src);
 891             }
 892         }
 893 
 894         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) {
 895             assert verify(asm, size, dst, null);
 896             boolean isSimd = false;
 897             boolean noNds = false;
 898 
 899             switch (op) {
 900                 case 0x10:
 901                 case 0x2A:
 902                 case 0x2C:
 903                 case 0x2E:
 904                 case 0x6E:
 905                     isSimd = true;
 906                     noNds = true;
 907                     break;
 908                 case 0x51:
 909                 case 0x54:
 910                 case 0x55:
 911                 case 0x56:
 912                 case 0x57:
 913                 case 0x58:
 914                 case 0x59:
 915                 case 0x5C:
 916                 case 0x5D:
 917                 case 0x5E:
 918                 case 0x5F:
 919                     isSimd = true;
 920                     break;
 921             }
 922 
 923             if (isSimd) {
 924                 int pre;
 925                 int opc;
 926                 boolean rexVexW = (size == QWORD) ? true : false;
 927                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
 928                 int curPrefix = size.sizePrefix | prefix1;
 929                 switch (curPrefix) {
 930                     case 0x66:
 931                         pre = VexSimdPrefix.VEX_SIMD_66;
 932                         break;
 933                     case 0xF2:
 934                         pre = VexSimdPrefix.VEX_SIMD_F2;
 935                         break;
 936                     case 0xF3:
 937                         pre = VexSimdPrefix.VEX_SIMD_F3;
 938                         break;
 939                     default:
 940                         pre = VexSimdPrefix.VEX_SIMD_NONE;
 941                         break;
 942                 }
 943                 switch (prefix2) {
 944                     case P_0F:
 945                         opc = VexOpcode.VEX_OPCODE_0F;
 946                         break;
 947                     case P_0F38:
 948                         opc = VexOpcode.VEX_OPCODE_0F_38;
 949                         break;
 950                     case P_0F3A:
 951                         opc = VexOpcode.VEX_OPCODE_0F_3A;
 952                         break;
 953                     default:
 954                         opc = VexOpcode.VEX_OPCODE_NONE;
 955                         break;
 956                 }
 957                 if (noNds) {
 958                     asm.simdPrefix(dst, Register.None, src, pre, opc, attributes);
 959                 } else {
 960                     asm.simdPrefix(dst, dst, src, pre, opc, attributes);
 961                 }
 962                 asm.emitByte(op);
 963                 asm.emitOperandHelper(dst, src, 0);
 964             } else {
 965                 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
 966                 asm.emitOperandHelper(dst, src, 0);
 967             }
 968         }
 969     }
 970 
 971     /**
 972      * Opcode with operand order of RM.
 973      */
 974     public static class AMD64RRMOp extends AMD64RRROp {
 975         protected AMD64RRMOp(String opcode, int op) {
 976             this(opcode, 0, op);
 977         }
 978 
 979         protected AMD64RRMOp(String opcode, int op, OpAssertion assertion) {
 980             this(opcode, 0, op, assertion);
 981         }
 982 
 983         protected AMD64RRMOp(String opcode, int prefix, int op) {
 984             this(opcode, 0, prefix, op, null);
 985         }
 986 
 987         protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion) {
 988             this(opcode, 0, prefix, op, assertion, null);
 989         }
 990 
 991         protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
 992             this(opcode, 0, prefix, op, assertion, feature);
 993         }
 994 
 995         protected AMD64RRMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) {
 996             super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null);
 997         }
 998 
 999         protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) {
1000             this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature);
1001         }
1002 
1003         protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
1004             super(opcode, prefix1, prefix2, op, assertion, feature);
1005         }
1006 
1007         @Override
1008         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src) {
1009             assert verify(asm, size, dst, src);
1010             int pre;
1011             int opc;
1012             boolean rexVexW = (size == QWORD) ? true : false;
1013             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1014             int curPrefix = size.sizePrefix | prefix1;
1015             switch (curPrefix) {
1016                 case 0x66:
1017                     pre = VexSimdPrefix.VEX_SIMD_66;
1018                     break;
1019                 case 0xF2:
1020                     pre = VexSimdPrefix.VEX_SIMD_F2;
1021                     break;
1022                 case 0xF3:
1023                     pre = VexSimdPrefix.VEX_SIMD_F3;
1024                     break;
1025                 default:
1026                     pre = VexSimdPrefix.VEX_SIMD_NONE;
1027                     break;
1028             }
1029             switch (prefix2) {
1030                 case P_0F:
1031                     opc = VexOpcode.VEX_OPCODE_0F;
1032                     break;
1033                 case P_0F38:
1034                     opc = VexOpcode.VEX_OPCODE_0F_38;
1035                     break;
1036                 case P_0F3A:
1037                     opc = VexOpcode.VEX_OPCODE_0F_3A;
1038                     break;
1039                 default:
1040                     opc = VexOpcode.VEX_OPCODE_NONE;
1041                     break;
1042             }
1043             int encode;
1044             encode = asm.simdPrefixAndEncode(dst, nds, src, pre, opc, attributes);
1045             asm.emitByte(op);
1046             asm.emitByte(0xC0 | encode);
1047         }
1048 
1049         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, AMD64Address src) {
1050             assert verify(asm, size, dst, null);
1051             int pre;
1052             int opc;
1053             boolean rexVexW = (size == QWORD) ? true : false;
1054             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1055             int curPrefix = size.sizePrefix | prefix1;
1056             switch (curPrefix) {
1057                 case 0x66:
1058                     pre = VexSimdPrefix.VEX_SIMD_66;
1059                     break;
1060                 case 0xF2:
1061                     pre = VexSimdPrefix.VEX_SIMD_F2;
1062                     break;
1063                 case 0xF3:
1064                     pre = VexSimdPrefix.VEX_SIMD_F3;
1065                     break;
1066                 default:
1067                     pre = VexSimdPrefix.VEX_SIMD_NONE;
1068                     break;
1069             }
1070             switch (prefix2) {
1071                 case P_0F:
1072                     opc = VexOpcode.VEX_OPCODE_0F;
1073                     break;
1074                 case P_0F38:
1075                     opc = VexOpcode.VEX_OPCODE_0F_38;
1076                     break;
1077                 case P_0F3A:
1078                     opc = VexOpcode.VEX_OPCODE_0F_3A;
1079                     break;
1080                 default:
1081                     opc = VexOpcode.VEX_OPCODE_NONE;
1082                     break;
1083             }
1084             asm.simdPrefix(dst, nds, src, pre, opc, attributes);
1085             asm.emitByte(op);
1086             asm.emitOperandHelper(dst, src, 0);
1087         }
1088     }
1089 
1090     /**
1091      * Opcode with operand order of MR.
1092      */
1093     public static class AMD64MROp extends AMD64RROp {
1094         // @formatter:off
1095         public static final AMD64MROp MOVB   = new AMD64MROp("MOVB",               0x88, OpAssertion.ByteAssertion);
1096         public static final AMD64MROp MOV    = new AMD64MROp("MOV",                0x89);
1097 
1098         // MOVD and MOVQ are the same opcode, just with different operand size prefix
1099         // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used.
1100         public static final AMD64MROp MOVD   = new AMD64MROp("MOVD",   0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
1101         public static final AMD64MROp MOVQ   = new AMD64MROp("MOVQ",   0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
1102 
1103         // MOVSS and MOVSD are the same opcode, just with different operand size prefix
1104         public static final AMD64MROp MOVSS  = new AMD64MROp("MOVSS",        P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE);
1105         public static final AMD64MROp MOVSD  = new AMD64MROp("MOVSD",        P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE);
1106         // @formatter:on
1107 
1108         protected AMD64MROp(String opcode, int op) {
1109             this(opcode, 0, op);
1110         }
1111 
1112         protected AMD64MROp(String opcode, int op, OpAssertion assertion) {
1113             this(opcode, 0, op, assertion);
1114         }
1115 
1116         protected AMD64MROp(String opcode, int prefix, int op) {
1117             this(opcode, prefix, op, OpAssertion.IntegerAssertion);
1118         }
1119 
1120         protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) {
1121             this(opcode, prefix, op, assertion, null);
1122         }
1123 
1124         protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
1125             this(opcode, 0, prefix, op, assertion, feature);
1126         }
1127 
1128         protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
1129             super(opcode, prefix1, prefix2, op, assertion, feature);
1130         }
1131 
1132         @Override
1133         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
1134             assert verify(asm, size, src, dst);
1135             boolean isSimd = false;
1136             boolean noNds = false;
1137 
1138             switch (op) {
1139                 case 0x7E:
1140                     isSimd = true;
1141                     noNds = true;
1142                     break;
1143                 case 0x11:
1144                     isSimd = true;
1145                     break;
1146             }
1147 
1148             if (isSimd) {
1149                 int pre;
1150                 int opc;
1151                 boolean rexVexW = (size == QWORD) ? true : false;
1152                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1153                 int curPrefix = size.sizePrefix | prefix1;
1154                 switch (curPrefix) {
1155                     case 0x66:
1156                         pre = VexSimdPrefix.VEX_SIMD_66;
1157                         break;
1158                     case 0xF2:
1159                         pre = VexSimdPrefix.VEX_SIMD_F2;
1160                         break;
1161                     case 0xF3:
1162                         pre = VexSimdPrefix.VEX_SIMD_F3;
1163                         break;
1164                     default:
1165                         pre = VexSimdPrefix.VEX_SIMD_NONE;
1166                         break;
1167                 }
1168                 switch (prefix2) {
1169                     case P_0F:
1170                         opc = VexOpcode.VEX_OPCODE_0F;
1171                         break;
1172                     case P_0F38:
1173                         opc = VexOpcode.VEX_OPCODE_0F_38;
1174                         break;
1175                     case P_0F3A:
1176                         opc = VexOpcode.VEX_OPCODE_0F_3A;
1177                         break;
1178                     default:
1179                         opc = VexOpcode.VEX_OPCODE_NONE;
1180                         break;
1181                 }
1182                 int encode;
1183                 if (noNds) {
1184                     encode = asm.simdPrefixAndEncode(src, Register.None, dst, pre, opc, attributes);
1185                 } else {
1186                     encode = asm.simdPrefixAndEncode(src, src, dst, pre, opc, attributes);
1187                 }
1188                 asm.emitByte(op);
1189                 asm.emitByte(0xC0 | encode);
1190             } else {
1191                 emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding);
1192                 asm.emitModRM(src, dst);
1193             }
1194         }
1195 
1196         public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) {
1197             assert verify(asm, size, null, src);
1198             boolean isSimd = false;
1199 
1200             switch (op) {
1201                 case 0x7E:
1202                 case 0x11:
1203                     isSimd = true;
1204                     break;
1205             }
1206 
1207             if (isSimd) {
1208                 int pre;
1209                 int opc;
1210                 boolean rexVexW = (size == QWORD) ? true : false;
1211                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1212                 int curPrefix = size.sizePrefix | prefix1;
1213                 switch (curPrefix) {
1214                     case 0x66:
1215                         pre = VexSimdPrefix.VEX_SIMD_66;
1216                         break;
1217                     case 0xF2:
1218                         pre = VexSimdPrefix.VEX_SIMD_F2;
1219                         break;
1220                     case 0xF3:
1221                         pre = VexSimdPrefix.VEX_SIMD_F3;
1222                         break;
1223                     default:
1224                         pre = VexSimdPrefix.VEX_SIMD_NONE;
1225                         break;
1226                 }
1227                 switch (prefix2) {
1228                     case P_0F:
1229                         opc = VexOpcode.VEX_OPCODE_0F;
1230                         break;
1231                     case P_0F38:
1232                         opc = VexOpcode.VEX_OPCODE_0F_38;
1233                         break;
1234                     case P_0F3A:
1235                         opc = VexOpcode.VEX_OPCODE_0F_3A;
1236                         break;
1237                     default:
1238                         opc = VexOpcode.VEX_OPCODE_NONE;
1239                         break;
1240                 }
1241                 asm.simdPrefix(src, Register.None, dst, pre, opc, attributes);
1242                 asm.emitByte(op);
1243                 asm.emitOperandHelper(src, dst, 0);
1244             } else {
1245                 emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0);
1246                 asm.emitOperandHelper(src, dst, 0);
1247             }
1248         }
1249     }
1250 
1251     /**
1252      * Opcodes with operand order of M.
1253      */
1254     public static class AMD64MOp extends AMD64Op {
1255         // @formatter:off
1256         public static final AMD64MOp NOT  = new AMD64MOp("NOT",  0xF7, 2);
1257         public static final AMD64MOp NEG  = new AMD64MOp("NEG",  0xF7, 3);
1258         public static final AMD64MOp MUL  = new AMD64MOp("MUL",  0xF7, 4);
1259         public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5);
1260         public static final AMD64MOp DIV  = new AMD64MOp("DIV",  0xF7, 6);
1261         public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7);
1262         public static final AMD64MOp INC  = new AMD64MOp("INC",  0xFF, 0);
1263         public static final AMD64MOp DEC  = new AMD64MOp("DEC",  0xFF, 1);
1264         public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6);
1265         public static final AMD64MOp POP  = new AMD64MOp("POP",  0x8F, 0, OpAssertion.No32BitAssertion);
1266         // @formatter:on
1267 
1268         private final int ext;
1269 
1270         protected AMD64MOp(String opcode, int op, int ext) {
1271             this(opcode, 0, op, ext);
1272         }
1273 
1274         protected AMD64MOp(String opcode, int prefix, int op, int ext) {
1275             this(opcode, prefix, op, ext, OpAssertion.IntegerAssertion);
1276         }
1277 
1278         protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) {
1279             this(opcode, 0, op, ext, assertion);
1280         }
1281 
1282         protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) {
1283             super(opcode, 0, prefix, op, assertion, null);
1284             this.ext = ext;
1285         }
1286 
1287         public final void emit(AMD64Assembler asm, OperandSize size, Register dst) {
1288             assert verify(asm, size, dst, null);
1289             emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
1290             asm.emitModRM(ext, dst);
1291         }
1292 
1293         public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) {
1294             assert verify(asm, size, null, null);
1295             emitOpcode(asm, size, getRXB(null, dst), 0, 0);
1296             asm.emitOperandHelper(ext, dst, 0);
1297         }
1298     }
1299 
1300     /**
1301      * Opcodes with operand order of MI.
1302      */
1303     public static class AMD64MIOp extends AMD64ImmOp {
1304         // @formatter:off
1305         public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true,  0xC6, 0, OpAssertion.ByteAssertion);
1306         public static final AMD64MIOp MOV  = new AMD64MIOp("MOV",  false, 0xC7, 0);
1307         public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0);
1308         // @formatter:on
1309 
1310         private final int ext;
1311 
1312         protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) {
1313             this(opcode, immIsByte, op, ext, OpAssertion.IntegerAssertion);
1314         }
1315 
1316         protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) {
1317             this(opcode, immIsByte, 0, op, ext, assertion);
1318         }
1319 
1320         protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) {
1321             super(opcode, immIsByte, prefix, op, assertion);
1322             this.ext = ext;
1323         }
1324 
1325         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) {
1326             assert verify(asm, size, dst, null);
1327             emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
1328             asm.emitModRM(ext, dst);
1329             emitImmediate(asm, size, imm);
1330         }
1331 
1332         public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) {
1333             assert verify(asm, size, null, null);
1334             emitOpcode(asm, size, getRXB(null, dst), 0, 0);
1335             asm.emitOperandHelper(ext, dst, immediateSize(size));
1336             emitImmediate(asm, size, imm);
1337         }
1338     }
1339 
1340     /**
1341      * Opcodes with operand order of RMI.
1342      *
1343      * We only have one form of round as the operation is always treated with single variant input,
1344      * making its extension to 3 address forms redundant.
1345      */
1346     public static class AMD64RMIOp extends AMD64ImmOp {
1347         // @formatter:off
1348         public static final AMD64RMIOp IMUL    = new AMD64RMIOp("IMUL", false, 0x69);
1349         public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true,  0x6B);
1350         public static final AMD64RMIOp ROUNDSS = new AMD64RMIOp("ROUNDSS", true, P_0F3A, 0x0A, OpAssertion.PackedDoubleAssertion);
1351         public static final AMD64RMIOp ROUNDSD = new AMD64RMIOp("ROUNDSD", true, P_0F3A, 0x0B, OpAssertion.PackedDoubleAssertion);
1352         // @formatter:on
1353 
1354         protected AMD64RMIOp(String opcode, boolean immIsByte, int op) {
1355             this(opcode, immIsByte, 0, op, OpAssertion.IntegerAssertion);
1356         }
1357 
1358         protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
1359             super(opcode, immIsByte, prefix, op, assertion);
1360         }
1361 
1362         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) {
1363             assert verify(asm, size, dst, src);
1364             boolean isSimd = false;
1365             boolean noNds = false;
1366 
1367             switch (op) {
1368                 case 0x0A:
1369                 case 0x0B:
1370                     isSimd = true;
1371                     noNds = true;
1372                     break;
1373             }
1374 
1375             if (isSimd) {
1376                 int pre;
1377                 int opc;
1378                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1379                 int curPrefix = size.sizePrefix | prefix1;
1380                 switch (curPrefix) {
1381                     case 0x66:
1382                         pre = VexSimdPrefix.VEX_SIMD_66;
1383                         break;
1384                     case 0xF2:
1385                         pre = VexSimdPrefix.VEX_SIMD_F2;
1386                         break;
1387                     case 0xF3:
1388                         pre = VexSimdPrefix.VEX_SIMD_F3;
1389                         break;
1390                     default:
1391                         pre = VexSimdPrefix.VEX_SIMD_NONE;
1392                         break;
1393                 }
1394                 switch (prefix2) {
1395                     case P_0F:
1396                         opc = VexOpcode.VEX_OPCODE_0F;
1397                         break;
1398                     case P_0F38:
1399                         opc = VexOpcode.VEX_OPCODE_0F_38;
1400                         break;
1401                     case P_0F3A:
1402                         opc = VexOpcode.VEX_OPCODE_0F_3A;
1403                         break;
1404                     default:
1405                         opc = VexOpcode.VEX_OPCODE_NONE;
1406                         break;
1407                 }
1408                 int encode;
1409                 if (noNds) {
1410                     encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes);
1411                 } else {
1412                     encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes);
1413                 }
1414                 asm.emitByte(op);
1415                 asm.emitByte(0xC0 | encode);
1416                 emitImmediate(asm, size, imm);
1417             } else {
1418                 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
1419                 asm.emitModRM(dst, src);
1420                 emitImmediate(asm, size, imm);
1421             }
1422         }
1423 
1424         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) {
1425             assert verify(asm, size, dst, null);
1426 
1427             boolean isSimd = false;
1428             boolean noNds = false;
1429 
1430             switch (op) {
1431                 case 0x0A:
1432                 case 0x0B:
1433                     isSimd = true;
1434                     noNds = true;
1435                     break;
1436             }
1437 
1438             if (isSimd) {
1439                 int pre;
1440                 int opc;
1441                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1442                 int curPrefix = size.sizePrefix | prefix1;
1443                 switch (curPrefix) {
1444                     case 0x66:
1445                         pre = VexSimdPrefix.VEX_SIMD_66;
1446                         break;
1447                     case 0xF2:
1448                         pre = VexSimdPrefix.VEX_SIMD_F2;
1449                         break;
1450                     case 0xF3:
1451                         pre = VexSimdPrefix.VEX_SIMD_F3;
1452                         break;
1453                     default:
1454                         pre = VexSimdPrefix.VEX_SIMD_NONE;
1455                         break;
1456                 }
1457                 switch (prefix2) {
1458                     case P_0F:
1459                         opc = VexOpcode.VEX_OPCODE_0F;
1460                         break;
1461                     case P_0F38:
1462                         opc = VexOpcode.VEX_OPCODE_0F_38;
1463                         break;
1464                     case P_0F3A:
1465                         opc = VexOpcode.VEX_OPCODE_0F_3A;
1466                         break;
1467                     default:
1468                         opc = VexOpcode.VEX_OPCODE_NONE;
1469                         break;
1470                 }
1471 
1472                 if (noNds) {
1473                     asm.simdPrefix(dst, Register.None, src, pre, opc, attributes);
1474                 } else {
1475                     asm.simdPrefix(dst, dst, src, pre, opc, attributes);
1476                 }
1477                 asm.emitByte(op);
1478                 asm.emitOperandHelper(dst, src, immediateSize(size));
1479                 emitImmediate(asm, size, imm);
1480             } else {
1481                 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
1482                 asm.emitOperandHelper(dst, src, immediateSize(size));
1483                 emitImmediate(asm, size, imm);
1484             }
1485         }
1486     }
1487 
1488     public static class SSEOp extends AMD64RMOp {
1489         // @formatter:off
1490         public static final SSEOp CVTSI2SS  = new SSEOp("CVTSI2SS",  0xF3, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion);
1491         public static final SSEOp CVTSI2SD  = new SSEOp("CVTSI2SS",  0xF2, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion);
1492         public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion);
1493         public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion);
1494         public static final SSEOp UCOMIS    = new SSEOp("UCOMIS",          P_0F, 0x2E, OpAssertion.PackedFloatingAssertion);
1495         public static final SSEOp SQRT      = new SSEOp("SQRT",            P_0F, 0x51);
1496         public static final SSEOp AND       = new SSEOp("AND",             P_0F, 0x54, OpAssertion.PackedFloatingAssertion);
1497         public static final SSEOp ANDN      = new SSEOp("ANDN",            P_0F, 0x55, OpAssertion.PackedFloatingAssertion);
1498         public static final SSEOp OR        = new SSEOp("OR",              P_0F, 0x56, OpAssertion.PackedFloatingAssertion);
1499         public static final SSEOp XOR       = new SSEOp("XOR",             P_0F, 0x57, OpAssertion.PackedFloatingAssertion);
1500         public static final SSEOp ADD       = new SSEOp("ADD",             P_0F, 0x58);
1501         public static final SSEOp MUL       = new SSEOp("MUL",             P_0F, 0x59);
1502         public static final SSEOp CVTSS2SD  = new SSEOp("CVTSS2SD",        P_0F, 0x5A, OpAssertion.SingleAssertion);
1503         public static final SSEOp CVTSD2SS  = new SSEOp("CVTSD2SS",        P_0F, 0x5A, OpAssertion.DoubleAssertion);
1504         public static final SSEOp SUB       = new SSEOp("SUB",             P_0F, 0x5C);
1505         public static final SSEOp MIN       = new SSEOp("MIN",             P_0F, 0x5D);
1506         public static final SSEOp DIV       = new SSEOp("DIV",             P_0F, 0x5E);
1507         public static final SSEOp MAX       = new SSEOp("MAX",             P_0F, 0x5F);
1508         // @formatter:on
1509 
1510         protected SSEOp(String opcode, int prefix, int op) {
1511             this(opcode, prefix, op, OpAssertion.FloatingAssertion);
1512         }
1513 
1514         protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) {
1515             this(opcode, 0, prefix, op, assertion);
1516         }
1517 
1518         protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) {
1519             super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2);
1520         }
1521     }
1522 
1523     public static class AVXOp extends AMD64RRMOp {
1524         // @formatter:off
1525         public static final AVXOp AND       = new AVXOp("AND",             P_0F, 0x54, OpAssertion.PackedFloatingAssertion);
1526         public static final AVXOp ANDN      = new AVXOp("ANDN",            P_0F, 0x55, OpAssertion.PackedFloatingAssertion);
1527         public static final AVXOp OR        = new AVXOp("OR",              P_0F, 0x56, OpAssertion.PackedFloatingAssertion);
1528         public static final AVXOp XOR       = new AVXOp("XOR",             P_0F, 0x57, OpAssertion.PackedFloatingAssertion);
1529         public static final AVXOp ADD       = new AVXOp("ADD",             P_0F, 0x58);
1530         public static final AVXOp MUL       = new AVXOp("MUL",             P_0F, 0x59);
1531         public static final AVXOp SUB       = new AVXOp("SUB",             P_0F, 0x5C);
1532         public static final AVXOp MIN       = new AVXOp("MIN",             P_0F, 0x5D);
1533         public static final AVXOp DIV       = new AVXOp("DIV",             P_0F, 0x5E);
1534         public static final AVXOp MAX       = new AVXOp("MAX",             P_0F, 0x5F);
1535         // @formatter:on
1536 
1537         protected AVXOp(String opcode, int prefix, int op) {
1538             this(opcode, prefix, op, OpAssertion.FloatingAssertion);
1539         }
1540 
1541         protected AVXOp(String opcode, int prefix, int op, OpAssertion assertion) {
1542             this(opcode, 0, prefix, op, assertion);
1543         }
1544 
1545         protected AVXOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) {
1546             super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.AVX);
1547         }
1548     }
1549 
1550     /**
1551      * Arithmetic operation with operand order of RM, MR or MI.
1552      */
1553     public static final class AMD64BinaryArithmetic {
1554         // @formatter:off
1555         public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0);
1556         public static final AMD64BinaryArithmetic OR  = new AMD64BinaryArithmetic("OR",  1);
1557         public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2);
1558         public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3);
1559         public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4);
1560         public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5);
1561         public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6);
1562         public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7);
1563         // @formatter:on
1564 
1565         private final AMD64MIOp byteImmOp;
1566         private final AMD64MROp byteMrOp;
1567         private final AMD64RMOp byteRmOp;
1568 
1569         private final AMD64MIOp immOp;
1570         private final AMD64MIOp immSxOp;
1571         private final AMD64MROp mrOp;
1572         private final AMD64RMOp rmOp;
1573 
1574         private AMD64BinaryArithmetic(String opcode, int code) {
1575             int baseOp = code << 3;
1576 
1577             byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion);
1578             byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion);
1579             byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion);
1580 
1581             immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.IntegerAssertion);
1582             immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.IntegerAssertion);
1583             mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.IntegerAssertion);
1584             rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.IntegerAssertion);
1585         }
1586 
1587         public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) {
1588             if (size == BYTE) {
1589                 return byteImmOp;
1590             } else if (sx) {
1591                 return immSxOp;
1592             } else {
1593                 return immOp;
1594             }
1595         }
1596 
1597         public AMD64MROp getMROpcode(OperandSize size) {
1598             if (size == BYTE) {
1599                 return byteMrOp;
1600             } else {
1601                 return mrOp;
1602             }
1603         }
1604 
1605         public AMD64RMOp getRMOpcode(OperandSize size) {
1606             if (size == BYTE) {
1607                 return byteRmOp;
1608             } else {
1609                 return rmOp;
1610             }
1611         }
1612     }
1613 
1614     /**
1615      * Shift operation with operand order of M1, MC or MI.
1616      */
1617     public static final class AMD64Shift {
1618         // @formatter:off
1619         public static final AMD64Shift ROL = new AMD64Shift("ROL", 0);
1620         public static final AMD64Shift ROR = new AMD64Shift("ROR", 1);
1621         public static final AMD64Shift RCL = new AMD64Shift("RCL", 2);
1622         public static final AMD64Shift RCR = new AMD64Shift("RCR", 3);
1623         public static final AMD64Shift SHL = new AMD64Shift("SHL", 4);
1624         public static final AMD64Shift SHR = new AMD64Shift("SHR", 5);
1625         public static final AMD64Shift SAR = new AMD64Shift("SAR", 7);
1626         // @formatter:on
1627 
1628         public final AMD64MOp m1Op;
1629         public final AMD64MOp mcOp;
1630         public final AMD64MIOp miOp;
1631 
1632         private AMD64Shift(String opcode, int code) {
1633             m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.IntegerAssertion);
1634             mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.IntegerAssertion);
1635             miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.IntegerAssertion);
1636         }
1637     }
1638 
1639     public final void addl(AMD64Address dst, int imm32) {
1640         ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1641     }
1642 
1643     public final void addl(Register dst, int imm32) {
1644         ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1645     }
1646 
1647     public final void addl(Register dst, Register src) {
1648         ADD.rmOp.emit(this, DWORD, dst, src);
1649     }
1650 
1651     public final void addpd(Register dst, Register src) {
1652         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1653         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1654         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1655         emitByte(0x58);
1656         emitByte(0xC0 | encode);
1657     }
1658 
1659     public final void addpd(Register dst, AMD64Address src) {
1660         assert dst.getRegisterCategory().equals(AMD64.XMM);
1661         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1662         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1663         emitByte(0x58);
1664         emitOperandHelper(dst, src, 0);
1665     }
1666 
1667     public final void addsd(Register dst, Register src) {
1668         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1669         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1670         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1671         emitByte(0x58);
1672         emitByte(0xC0 | encode);
1673     }
1674 
1675     public final void addsd(Register dst, AMD64Address src) {
1676         assert dst.getRegisterCategory().equals(AMD64.XMM);
1677         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1678         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1679         emitByte(0x58);
1680         emitOperandHelper(dst, src, 0);
1681     }
1682 
1683     private void addrNop4() {
1684         // 4 bytes: NOP DWORD PTR [EAX+0]
1685         emitByte(0x0F);
1686         emitByte(0x1F);
1687         emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc);
1688         emitByte(0); // 8-bits offset (1 byte)
1689     }
1690 
1691     private void addrNop5() {
1692         // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1693         emitByte(0x0F);
1694         emitByte(0x1F);
1695         emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4);
1696         emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
1697         emitByte(0); // 8-bits offset (1 byte)
1698     }
1699 
1700     private void addrNop7() {
1701         // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1702         emitByte(0x0F);
1703         emitByte(0x1F);
1704         emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc);
1705         emitInt(0); // 32-bits offset (4 bytes)
1706     }
1707 
1708     private void addrNop8() {
1709         // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1710         emitByte(0x0F);
1711         emitByte(0x1F);
1712         emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4);
1713         emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
1714         emitInt(0); // 32-bits offset (4 bytes)
1715     }
1716 
1717     public final void andl(Register dst, int imm32) {
1718         AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1719     }
1720 
1721     public final void andl(Register dst, Register src) {
1722         AND.rmOp.emit(this, DWORD, dst, src);
1723     }
1724 
1725     public final void andpd(Register dst, Register src) {
1726         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1727         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1728         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1729         emitByte(0x54);
1730         emitByte(0xC0 | encode);
1731     }
1732 
1733     public final void andpd(Register dst, AMD64Address src) {
1734         assert dst.getRegisterCategory().equals(AMD64.XMM);
1735         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1736         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1737         emitByte(0x54);
1738         emitOperandHelper(dst, src, 0);
1739     }
1740 
1741     public final void bsrl(Register dst, Register src) {
1742         int encode = prefixAndEncode(dst.encoding(), src.encoding());
1743         emitByte(0x0F);
1744         emitByte(0xBD);
1745         emitByte(0xC0 | encode);
1746     }
1747 
1748     public final void bswapl(Register reg) {
1749         int encode = prefixAndEncode(reg.encoding);
1750         emitByte(0x0F);
1751         emitByte(0xC8 | encode);
1752     }
1753 
1754     public final void cdql() {
1755         emitByte(0x99);
1756     }
1757 
1758     public final void cmovl(ConditionFlag cc, Register dst, Register src) {
1759         int encode = prefixAndEncode(dst.encoding, src.encoding);
1760         emitByte(0x0F);
1761         emitByte(0x40 | cc.getValue());
1762         emitByte(0xC0 | encode);
1763     }
1764 
1765     public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) {
1766         prefix(src, dst);
1767         emitByte(0x0F);
1768         emitByte(0x40 | cc.getValue());
1769         emitOperandHelper(dst, src, 0);
1770     }
1771 
1772     public final void cmpl(Register dst, int imm32) {
1773         CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1774     }
1775 
1776     public final void cmpl(Register dst, Register src) {
1777         CMP.rmOp.emit(this, DWORD, dst, src);
1778     }
1779 
1780     public final void cmpl(Register dst, AMD64Address src) {
1781         CMP.rmOp.emit(this, DWORD, dst, src);
1782     }
1783 
1784     public final void cmpl(AMD64Address dst, int imm32) {
1785         CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1786     }
1787 
1788     // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax,
1789     // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,.
1790     // The ZF is set if the compared values were equal, and cleared otherwise.
1791     public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg
1792         prefix(adr, reg);
1793         emitByte(0x0F);
1794         emitByte(0xB1);
1795         emitOperandHelper(reg, adr, 0);
1796     }
1797 
1798     public final void cvtsi2sdl(Register dst, Register src) {
1799         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
1800         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1801         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1802         emitByte(0x2A);
1803         emitByte(0xC0 | encode);
1804     }
1805 
1806     public final void cvttsd2sil(Register dst, Register src) {
1807         assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
1808         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1809         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1810         emitByte(0x2C);
1811         emitByte(0xC0 | encode);
1812     }
1813 
1814     protected final void decl(AMD64Address dst) {
1815         prefix(dst);
1816         emitByte(0xFF);
1817         emitOperandHelper(1, dst, 0);
1818     }
1819 
1820     public final void divsd(Register dst, Register src) {
1821         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1822         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1823         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1824         emitByte(0x5E);
1825         emitByte(0xC0 | encode);
1826     }
1827 
1828     public final void hlt() {
1829         emitByte(0xF4);
1830     }
1831 
1832     public final void imull(Register dst, Register src, int value) {
1833         if (isByte(value)) {
1834             AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value);
1835         } else {
1836             AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value);
1837         }
1838     }
1839 
1840     protected final void incl(AMD64Address dst) {
1841         prefix(dst);
1842         emitByte(0xFF);
1843         emitOperandHelper(0, dst, 0);
1844     }
1845 
1846     public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) {
1847         int shortSize = 2;
1848         int longSize = 6;
1849         long disp = jumpTarget - position();
1850         if (!forceDisp32 && isByte(disp - shortSize)) {
1851             // 0111 tttn #8-bit disp
1852             emitByte(0x70 | cc.getValue());
1853             emitByte((int) ((disp - shortSize) & 0xFF));
1854         } else {
1855             // 0000 1111 1000 tttn #32-bit disp
1856             assert isInt(disp - longSize) : "must be 32bit offset (call4)";
1857             emitByte(0x0F);
1858             emitByte(0x80 | cc.getValue());
1859             emitInt((int) (disp - longSize));
1860         }
1861     }
1862 
1863     public final void jcc(ConditionFlag cc, Label l) {
1864         assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc";
1865         if (l.isBound()) {
1866             jcc(cc, l.position(), false);
1867         } else {
1868             // Note: could eliminate cond. jumps to this jump if condition
1869             // is the same however, seems to be rather unlikely case.
1870             // Note: use jccb() if label to be bound is very close to get
1871             // an 8-bit displacement
1872             l.addPatchAt(position());
1873             emitByte(0x0F);
1874             emitByte(0x80 | cc.getValue());
1875             emitInt(0);
1876         }
1877 
1878     }
1879 
1880     public final void jccb(ConditionFlag cc, Label l) {
1881         if (l.isBound()) {
1882             int shortSize = 2;
1883             int entry = l.position();
1884             assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp";
1885             long disp = entry - position();
1886             // 0111 tttn #8-bit disp
1887             emitByte(0x70 | cc.getValue());
1888             emitByte((int) ((disp - shortSize) & 0xFF));
1889         } else {
1890             l.addPatchAt(position());
1891             emitByte(0x70 | cc.getValue());
1892             emitByte(0);
1893         }
1894     }
1895 
1896     public final void jmp(int jumpTarget, boolean forceDisp32) {
1897         int shortSize = 2;
1898         int longSize = 5;
1899         long disp = jumpTarget - position();
1900         if (!forceDisp32 && isByte(disp - shortSize)) {
1901             emitByte(0xEB);
1902             emitByte((int) ((disp - shortSize) & 0xFF));
1903         } else {
1904             emitByte(0xE9);
1905             emitInt((int) (disp - longSize));
1906         }
1907     }
1908 
1909     @Override
1910     public final void jmp(Label l) {
1911         if (l.isBound()) {
1912             jmp(l.position(), false);
1913         } else {
1914             // By default, forward jumps are always 32-bit displacements, since
1915             // we can't yet know where the label will be bound. If you're sure that
1916             // the forward jump will not run beyond 256 bytes, use jmpb to
1917             // force an 8-bit displacement.
1918 
1919             l.addPatchAt(position());
1920             emitByte(0xE9);
1921             emitInt(0);
1922         }
1923     }
1924 
1925     public final void jmp(Register entry) {
1926         int encode = prefixAndEncode(entry.encoding);
1927         emitByte(0xFF);
1928         emitByte(0xE0 | encode);
1929     }
1930 
1931     public final void jmp(AMD64Address adr) {
1932         prefix(adr);
1933         emitByte(0xFF);
1934         emitOperandHelper(rsp, adr, 0);
1935     }
1936 
1937     public final void jmpb(Label l) {
1938         if (l.isBound()) {
1939             int shortSize = 2;
1940             int entry = l.position();
1941             assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp";
1942             long offs = entry - position();
1943             emitByte(0xEB);
1944             emitByte((int) ((offs - shortSize) & 0xFF));
1945         } else {
1946 
1947             l.addPatchAt(position());
1948             emitByte(0xEB);
1949             emitByte(0);
1950         }
1951     }
1952 
1953     public final void leaq(Register dst, AMD64Address src) {
1954         prefixq(src, dst);
1955         emitByte(0x8D);
1956         emitOperandHelper(dst, src, 0);
1957     }
1958 
1959     public final void leave() {
1960         emitByte(0xC9);
1961     }
1962 
1963     public final void lock() {
1964         emitByte(0xF0);
1965     }
1966 
1967     public final void movapd(Register dst, Register src) {
1968         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1969         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1970         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1971         emitByte(0x28);
1972         emitByte(0xC0 | encode);
1973     }
1974 
1975     public final void movaps(Register dst, Register src) {
1976         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1977         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1978         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
1979         emitByte(0x28);
1980         emitByte(0xC0 | encode);
1981     }
1982 
1983     public final void movb(AMD64Address dst, int imm8) {
1984         prefix(dst);
1985         emitByte(0xC6);
1986         emitOperandHelper(0, dst, 1);
1987         emitByte(imm8);
1988     }
1989 
1990     public final void movb(AMD64Address dst, Register src) {
1991         assert src.getRegisterCategory().equals(AMD64.CPU) : "must have byte register";
1992         prefix(dst, src, true);
1993         emitByte(0x88);
1994         emitOperandHelper(src, dst, 0);
1995     }
1996 
1997     public final void movl(Register dst, int imm32) {
1998         int encode = prefixAndEncode(dst.encoding);
1999         emitByte(0xB8 | encode);
2000         emitInt(imm32);
2001     }
2002 
2003     public final void movl(Register dst, Register src) {
2004         int encode = prefixAndEncode(dst.encoding, src.encoding);
2005         emitByte(0x8B);
2006         emitByte(0xC0 | encode);
2007     }
2008 
2009     public final void movl(Register dst, AMD64Address src) {
2010         prefix(src, dst);
2011         emitByte(0x8B);
2012         emitOperandHelper(dst, src, 0);
2013     }
2014 
2015     public final void movl(AMD64Address dst, int imm32) {
2016         prefix(dst);
2017         emitByte(0xC7);
2018         emitOperandHelper(0, dst, 4);
2019         emitInt(imm32);
2020     }
2021 
2022     public final void movl(AMD64Address dst, Register src) {
2023         prefix(dst, src);
2024         emitByte(0x89);
2025         emitOperandHelper(src, dst, 0);
2026     }
2027 
2028     /**
2029      * New CPUs require use of movsd and movss to avoid partial register stall when loading from
2030      * memory. But for old Opteron use movlpd instead of movsd. The selection is done in
2031      * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and
2032      * {@link AMD64MacroAssembler#movflt(Register, Register)}.
2033      */
2034     public final void movlpd(Register dst, AMD64Address src) {
2035         assert dst.getRegisterCategory().equals(AMD64.XMM);
2036         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2037         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2038         emitByte(0x12);
2039         emitOperandHelper(dst, src, 0);
2040     }
2041 
2042     public final void movlhps(Register dst, Register src) {
2043         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2044         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2045         int encode = simdPrefixAndEncode(dst, src, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2046         emitByte(0x16);
2047         emitByte(0xC0 | encode);
2048     }
2049 
2050     public final void movq(Register dst, AMD64Address src) {
2051         movq(dst, src, false);
2052     }
2053 
2054     public final void movq(Register dst, AMD64Address src, boolean wide) {
2055         if (dst.getRegisterCategory().equals(AMD64.XMM)) {
2056             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ wide, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2057             simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2058             emitByte(0x7E);
2059             emitOperandHelper(dst, src, wide, 0);
2060         } else {
2061             // gpr version of movq
2062             prefixq(src, dst);
2063             emitByte(0x8B);
2064             emitOperandHelper(dst, src, wide, 0);
2065         }
2066     }
2067 
2068     public final void movq(Register dst, Register src) {
2069         int encode = prefixqAndEncode(dst.encoding, src.encoding);
2070         emitByte(0x8B);
2071         emitByte(0xC0 | encode);
2072     }
2073 
2074     public final void movq(AMD64Address dst, Register src) {
2075         if (src.getRegisterCategory().equals(AMD64.XMM)) {
2076             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2077             simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2078             emitByte(0xD6);
2079             emitOperandHelper(src, dst, 0);
2080         } else {
2081             // gpr version of movq
2082             prefixq(dst, src);
2083             emitByte(0x89);
2084             emitOperandHelper(src, dst, 0);
2085         }
2086     }
2087 
2088     public final void movsbl(Register dst, AMD64Address src) {
2089         prefix(src, dst);
2090         emitByte(0x0F);
2091         emitByte(0xBE);
2092         emitOperandHelper(dst, src, 0);
2093     }
2094 
2095     public final void movsbl(Register dst, Register src) {
2096         int encode = prefixAndEncode(dst.encoding, false, src.encoding, true);
2097         emitByte(0x0F);
2098         emitByte(0xBE);
2099         emitByte(0xC0 | encode);
2100     }
2101 
2102     public final void movsbq(Register dst, AMD64Address src) {
2103         prefixq(src, dst);
2104         emitByte(0x0F);
2105         emitByte(0xBE);
2106         emitOperandHelper(dst, src, 0);
2107     }
2108 
2109     public final void movsbq(Register dst, Register src) {
2110         int encode = prefixqAndEncode(dst.encoding, src.encoding);
2111         emitByte(0x0F);
2112         emitByte(0xBE);
2113         emitByte(0xC0 | encode);
2114     }
2115 
2116     public final void movsd(Register dst, Register src) {
2117         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2118         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2119         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2120         emitByte(0x10);
2121         emitByte(0xC0 | encode);
2122     }
2123 
2124     public final void movsd(Register dst, AMD64Address src) {
2125         assert dst.getRegisterCategory().equals(AMD64.XMM);
2126         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2127         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2128         emitByte(0x10);
2129         emitOperandHelper(dst, src, 0);
2130     }
2131 
2132     public final void movsd(AMD64Address dst, Register src) {
2133         assert src.getRegisterCategory().equals(AMD64.XMM);
2134         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2135         simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2136         emitByte(0x11);
2137         emitOperandHelper(src, dst, 0);
2138     }
2139 
2140     public final void movss(Register dst, Register src) {
2141         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2142         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2143         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2144         emitByte(0x10);
2145         emitByte(0xC0 | encode);
2146     }
2147 
2148     public final void movss(Register dst, AMD64Address src) {
2149         assert dst.getRegisterCategory().equals(AMD64.XMM);
2150         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2151         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2152         emitByte(0x10);
2153         emitOperandHelper(dst, src, 0);
2154     }
2155 
2156     public final void movss(AMD64Address dst, Register src) {
2157         assert src.getRegisterCategory().equals(AMD64.XMM);
2158         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2159         simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2160         emitByte(0x11);
2161         emitOperandHelper(src, dst, 0);
2162     }
2163 
2164     public final void mulpd(Register dst, Register src) {
2165         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2166         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2167         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2168         emitByte(0x59);
2169         emitByte(0xC0 | encode);
2170     }
2171 
2172     public final void mulpd(Register dst, AMD64Address src) {
2173         assert dst.getRegisterCategory().equals(AMD64.XMM);
2174         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2175         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2176         emitByte(0x59);
2177         emitOperandHelper(dst, src, 0);
2178     }
2179 
2180     public final void mulsd(Register dst, Register src) {
2181         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2182         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2183         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2184         emitByte(0x59);
2185         emitByte(0xC0 | encode);
2186     }
2187 
2188     public final void mulsd(Register dst, AMD64Address src) {
2189         assert dst.getRegisterCategory().equals(AMD64.XMM);
2190         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2191         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2192         emitByte(0x59);
2193         emitOperandHelper(dst, src, 0);
2194     }
2195 
2196     public final void mulss(Register dst, Register src) {
2197         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2198         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2199         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2200         emitByte(0x59);
2201         emitByte(0xC0 | encode);
2202     }
2203 
2204     public final void movswl(Register dst, AMD64Address src) {
2205         prefix(src, dst);
2206         emitByte(0x0F);
2207         emitByte(0xBF);
2208         emitOperandHelper(dst, src, 0);
2209     }
2210 
2211     public final void movw(AMD64Address dst, int imm16) {
2212         emitByte(0x66); // switch to 16-bit mode
2213         prefix(dst);
2214         emitByte(0xC7);
2215         emitOperandHelper(0, dst, 2);
2216         emitShort(imm16);
2217     }
2218 
2219     public final void movw(AMD64Address dst, Register src) {
2220         emitByte(0x66);
2221         prefix(dst, src);
2222         emitByte(0x89);
2223         emitOperandHelper(src, dst, 0);
2224     }
2225 
2226     public final void movzbl(Register dst, AMD64Address src) {
2227         prefix(src, dst);
2228         emitByte(0x0F);
2229         emitByte(0xB6);
2230         emitOperandHelper(dst, src, 0);
2231     }
2232 
2233     public final void movzwl(Register dst, AMD64Address src) {
2234         prefix(src, dst);
2235         emitByte(0x0F);
2236         emitByte(0xB7);
2237         emitOperandHelper(dst, src, 0);
2238     }
2239 
2240     public final void negl(Register dst) {
2241         NEG.emit(this, DWORD, dst);
2242     }
2243 
2244     public final void notl(Register dst) {
2245         NOT.emit(this, DWORD, dst);
2246     }
2247 
2248     @Override
2249     public final void ensureUniquePC() {
2250         nop();
2251     }
2252 
2253     public final void nop() {
2254         nop(1);
2255     }
2256 
2257     public void nop(int count) {
2258         int i = count;
2259         if (UseNormalNop) {
2260             assert i > 0 : " ";
2261             // The fancy nops aren't currently recognized by debuggers making it a
2262             // pain to disassemble code while debugging. If assert are on clearly
2263             // speed is not an issue so simply use the single byte traditional nop
2264             // to do alignment.
2265 
2266             for (; i > 0; i--) {
2267                 emitByte(0x90);
2268             }
2269             return;
2270         }
2271 
2272         if (UseAddressNop) {
2273             //
2274             // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD.
2275             // 1: 0x90
2276             // 2: 0x66 0x90
2277             // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2278             // 4: 0x0F 0x1F 0x40 0x00
2279             // 5: 0x0F 0x1F 0x44 0x00 0x00
2280             // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2281             // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2282             // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2283             // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2284             // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2285             // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2286 
2287             // The rest coding is AMD specific - use consecutive Address nops
2288 
2289             // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2290             // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2291             // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2292             // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2293             // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2294             // Size prefixes (0x66) are added for larger sizes
2295 
2296             while (i >= 22) {
2297                 i -= 11;
2298                 emitByte(0x66); // size prefix
2299                 emitByte(0x66); // size prefix
2300                 emitByte(0x66); // size prefix
2301                 addrNop8();
2302             }
2303             // Generate first nop for size between 21-12
2304             switch (i) {
2305                 case 21:
2306                     i -= 11;
2307                     emitByte(0x66); // size prefix
2308                     emitByte(0x66); // size prefix
2309                     emitByte(0x66); // size prefix
2310                     addrNop8();
2311                     break;
2312                 case 20:
2313                 case 19:
2314                     i -= 10;
2315                     emitByte(0x66); // size prefix
2316                     emitByte(0x66); // size prefix
2317                     addrNop8();
2318                     break;
2319                 case 18:
2320                 case 17:
2321                     i -= 9;
2322                     emitByte(0x66); // size prefix
2323                     addrNop8();
2324                     break;
2325                 case 16:
2326                 case 15:
2327                     i -= 8;
2328                     addrNop8();
2329                     break;
2330                 case 14:
2331                 case 13:
2332                     i -= 7;
2333                     addrNop7();
2334                     break;
2335                 case 12:
2336                     i -= 6;
2337                     emitByte(0x66); // size prefix
2338                     addrNop5();
2339                     break;
2340                 default:
2341                     assert i < 12;
2342             }
2343 
2344             // Generate second nop for size between 11-1
2345             switch (i) {
2346                 case 11:
2347                     emitByte(0x66); // size prefix
2348                     emitByte(0x66); // size prefix
2349                     emitByte(0x66); // size prefix
2350                     addrNop8();
2351                     break;
2352                 case 10:
2353                     emitByte(0x66); // size prefix
2354                     emitByte(0x66); // size prefix
2355                     addrNop8();
2356                     break;
2357                 case 9:
2358                     emitByte(0x66); // size prefix
2359                     addrNop8();
2360                     break;
2361                 case 8:
2362                     addrNop8();
2363                     break;
2364                 case 7:
2365                     addrNop7();
2366                     break;
2367                 case 6:
2368                     emitByte(0x66); // size prefix
2369                     addrNop5();
2370                     break;
2371                 case 5:
2372                     addrNop5();
2373                     break;
2374                 case 4:
2375                     addrNop4();
2376                     break;
2377                 case 3:
2378                     // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2379                     emitByte(0x66); // size prefix
2380                     emitByte(0x66); // size prefix
2381                     emitByte(0x90); // nop
2382                     break;
2383                 case 2:
2384                     emitByte(0x66); // size prefix
2385                     emitByte(0x90); // nop
2386                     break;
2387                 case 1:
2388                     emitByte(0x90); // nop
2389                     break;
2390                 default:
2391                     assert i == 0;
2392             }
2393             return;
2394         }
2395 
2396         // Using nops with size prefixes "0x66 0x90".
2397         // From AMD Optimization Guide:
2398         // 1: 0x90
2399         // 2: 0x66 0x90
2400         // 3: 0x66 0x66 0x90
2401         // 4: 0x66 0x66 0x66 0x90
2402         // 5: 0x66 0x66 0x90 0x66 0x90
2403         // 6: 0x66 0x66 0x90 0x66 0x66 0x90
2404         // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2405         // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2406         // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2407         // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2408         //
2409         while (i > 12) {
2410             i -= 4;
2411             emitByte(0x66); // size prefix
2412             emitByte(0x66);
2413             emitByte(0x66);
2414             emitByte(0x90); // nop
2415         }
2416         // 1 - 12 nops
2417         if (i > 8) {
2418             if (i > 9) {
2419                 i -= 1;
2420                 emitByte(0x66);
2421             }
2422             i -= 3;
2423             emitByte(0x66);
2424             emitByte(0x66);
2425             emitByte(0x90);
2426         }
2427         // 1 - 8 nops
2428         if (i > 4) {
2429             if (i > 6) {
2430                 i -= 1;
2431                 emitByte(0x66);
2432             }
2433             i -= 3;
2434             emitByte(0x66);
2435             emitByte(0x66);
2436             emitByte(0x90);
2437         }
2438         switch (i) {
2439             case 4:
2440                 emitByte(0x66);
2441                 emitByte(0x66);
2442                 emitByte(0x66);
2443                 emitByte(0x90);
2444                 break;
2445             case 3:
2446                 emitByte(0x66);
2447                 emitByte(0x66);
2448                 emitByte(0x90);
2449                 break;
2450             case 2:
2451                 emitByte(0x66);
2452                 emitByte(0x90);
2453                 break;
2454             case 1:
2455                 emitByte(0x90);
2456                 break;
2457             default:
2458                 assert i == 0;
2459         }
2460     }
2461 
2462     public final void orl(Register dst, Register src) {
2463         OR.rmOp.emit(this, DWORD, dst, src);
2464     }
2465 
2466     public final void orl(Register dst, int imm32) {
2467         OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2468     }
2469 
2470     public final void pop(Register dst) {
2471         int encode = prefixAndEncode(dst.encoding);
2472         emitByte(0x58 | encode);
2473     }
2474 
2475     public void popfq() {
2476         emitByte(0x9D);
2477     }
2478 
2479     public final void ptest(Register dst, Register src) {
2480         assert supports(CPUFeature.SSE4_1);
2481         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2482         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2483         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
2484         emitByte(0x17);
2485         emitByte(0xC0 | encode);
2486     }
2487 
2488     public final void vptest(Register dst, Register src) {
2489         assert supports(CPUFeature.AVX);
2490         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2491         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2492         int encode = vexPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
2493         emitByte(0x17);
2494         emitByte(0xC0 | encode);
2495     }
2496 
2497     public final void push(Register src) {
2498         int encode = prefixAndEncode(src.encoding);
2499         emitByte(0x50 | encode);
2500     }
2501 
2502     public void pushfq() {
2503         emitByte(0x9c);
2504     }
2505 
2506     public final void paddd(Register dst, Register src) {
2507         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2508         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2509         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2510         emitByte(0xFE);
2511         emitByte(0xC0 | encode);
2512     }
2513 
2514     public final void paddq(Register dst, Register src) {
2515         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2516         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2517         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2518         emitByte(0xD4);
2519         emitByte(0xC0 | encode);
2520     }
2521 
2522     public final void pextrw(Register dst, Register src, int imm8) {
2523         assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
2524         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2525         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2526         emitByte(0xC5);
2527         emitByte(0xC0 | encode);
2528         emitByte(imm8);
2529     }
2530 
2531     public final void pinsrw(Register dst, Register src, int imm8) {
2532         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
2533         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2534         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2535         emitByte(0xC4);
2536         emitByte(0xC0 | encode);
2537         emitByte(imm8);
2538     }
2539 
2540     public final void por(Register dst, Register src) {
2541         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2542         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2543         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2544         emitByte(0xEB);
2545         emitByte(0xC0 | encode);
2546     }
2547 
2548     public final void pand(Register dst, Register src) {
2549         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2550         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2551         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2552         emitByte(0xDB);
2553         emitByte(0xC0 | encode);
2554     }
2555 
2556     public final void pxor(Register dst, Register src) {
2557         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2558         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2559         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2560         emitByte(0xEF);
2561         emitByte(0xC0 | encode);
2562     }
2563 
2564     public final void vpxor(Register dst, Register nds, Register src) {
2565         assert supports(CPUFeature.AVX);
2566         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2567         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2568         int encode = vexPrefixAndEncode(dst, nds, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2569         emitByte(0xEF);
2570         emitByte(0xC0 | encode);
2571     }
2572 
2573     public final void pslld(Register dst, int imm8) {
2574         assert isUByte(imm8) : "invalid value";
2575         assert dst.getRegisterCategory().equals(AMD64.XMM);
2576         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2577         // XMM6 is for /6 encoding: 66 0F 72 /6 ib
2578         int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2579         emitByte(0x72);
2580         emitByte(0xC0 | encode);
2581         emitByte(imm8 & 0xFF);
2582     }
2583 
2584     public final void psllq(Register dst, Register shift) {
2585         assert dst.getRegisterCategory().equals(AMD64.XMM) && shift.getRegisterCategory().equals(AMD64.XMM);
2586         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2587         int encode = simdPrefixAndEncode(dst, dst, shift, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2588         emitByte(0xF3);
2589         emitByte(0xC0 | encode);
2590     }
2591 
2592     public final void psllq(Register dst, int imm8) {
2593         assert isUByte(imm8) : "invalid value";
2594         assert dst.getRegisterCategory().equals(AMD64.XMM);
2595         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2596         // XMM6 is for /6 encoding: 66 0F 73 /6 ib
2597         int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2598         emitByte(0x73);
2599         emitByte(0xC0 | encode);
2600         emitByte(imm8);
2601     }
2602 
2603     public final void psrad(Register dst, int imm8) {
2604         assert isUByte(imm8) : "invalid value";
2605         assert dst.getRegisterCategory().equals(AMD64.XMM);
2606         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2607         // XMM4 is for /2 encoding: 66 0F 72 /4 ib
2608         int encode = simdPrefixAndEncode(AMD64.xmm4, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2609         emitByte(0x72);
2610         emitByte(0xC0 | encode);
2611         emitByte(imm8);
2612     }
2613 
2614     public final void psrld(Register dst, int imm8) {
2615         assert isUByte(imm8) : "invalid value";
2616         assert dst.getRegisterCategory().equals(AMD64.XMM);
2617         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2618         // XMM2 is for /2 encoding: 66 0F 72 /2 ib
2619         int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2620         emitByte(0x72);
2621         emitByte(0xC0 | encode);
2622         emitByte(imm8);
2623     }
2624 
2625     public final void psrlq(Register dst, int imm8) {
2626         assert isUByte(imm8) : "invalid value";
2627         assert dst.getRegisterCategory().equals(AMD64.XMM);
2628         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2629         // XMM2 is for /2 encoding: 66 0F 73 /2 ib
2630         int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2631         emitByte(0x73);
2632         emitByte(0xC0 | encode);
2633         emitByte(imm8);
2634     }
2635 
2636     public final void pshufd(Register dst, Register src, int imm8) {
2637         assert isUByte(imm8) : "invalid value";
2638         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2639         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2640         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2641         emitByte(0x70);
2642         emitByte(0xC0 | encode);
2643         emitByte(imm8);
2644     }
2645 
2646     public final void psubd(Register dst, Register src) {
2647         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2648         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2649         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2650         emitByte(0xFA);
2651         emitByte(0xC0 | encode);
2652     }
2653 
2654     public final void rcpps(Register dst, Register src) {
2655         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2656         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ true, /* noMaskReg */ false, /* usesVl */ false, target);
2657         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2658         emitByte(0x53);
2659         emitByte(0xC0 | encode);
2660     }
2661 
2662     public final void ret(int imm16) {
2663         if (imm16 == 0) {
2664             emitByte(0xC3);
2665         } else {
2666             emitByte(0xC2);
2667             emitShort(imm16);
2668         }
2669     }
2670 
2671     public final void sarl(Register dst, int imm8) {
2672         int encode = prefixAndEncode(dst.encoding);
2673         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2674         if (imm8 == 1) {
2675             emitByte(0xD1);
2676             emitByte(0xF8 | encode);
2677         } else {
2678             emitByte(0xC1);
2679             emitByte(0xF8 | encode);
2680             emitByte(imm8);
2681         }
2682     }
2683 
2684     public final void shll(Register dst, int imm8) {
2685         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2686         int encode = prefixAndEncode(dst.encoding);
2687         if (imm8 == 1) {
2688             emitByte(0xD1);
2689             emitByte(0xE0 | encode);
2690         } else {
2691             emitByte(0xC1);
2692             emitByte(0xE0 | encode);
2693             emitByte(imm8);
2694         }
2695     }
2696 
2697     public final void shll(Register dst) {
2698         int encode = prefixAndEncode(dst.encoding);
2699         emitByte(0xD3);
2700         emitByte(0xE0 | encode);
2701     }
2702 
2703     public final void shrl(Register dst, int imm8) {
2704         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2705         int encode = prefixAndEncode(dst.encoding);
2706         emitByte(0xC1);
2707         emitByte(0xE8 | encode);
2708         emitByte(imm8);
2709     }
2710 
2711     public final void shrl(Register dst) {
2712         int encode = prefixAndEncode(dst.encoding);
2713         emitByte(0xD3);
2714         emitByte(0xE8 | encode);
2715     }
2716 
2717     public final void subl(AMD64Address dst, int imm32) {
2718         SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2719     }
2720 
2721     public final void subl(Register dst, int imm32) {
2722         SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2723     }
2724 
2725     public final void subl(Register dst, Register src) {
2726         SUB.rmOp.emit(this, DWORD, dst, src);
2727     }
2728 
2729     public final void subpd(Register dst, Register src) {
2730         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2731         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2732         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2733         emitByte(0x5C);
2734         emitByte(0xC0 | encode);
2735     }
2736 
2737     public final void subsd(Register dst, Register src) {
2738         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2739         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2740         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2741         emitByte(0x5C);
2742         emitByte(0xC0 | encode);
2743     }
2744 
2745     public final void subsd(Register dst, AMD64Address src) {
2746         assert dst.getRegisterCategory().equals(AMD64.XMM);
2747         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2748         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2749         emitByte(0x5C);
2750         emitOperandHelper(dst, src, 0);
2751     }
2752 
2753     public final void testl(Register dst, int imm32) {
2754         // not using emitArith because test
2755         // doesn't support sign-extension of
2756         // 8bit operands
2757         int encode = dst.encoding;
2758         if (encode == 0) {
2759             emitByte(0xA9);
2760         } else {
2761             encode = prefixAndEncode(encode);
2762             emitByte(0xF7);
2763             emitByte(0xC0 | encode);
2764         }
2765         emitInt(imm32);
2766     }
2767 
2768     public final void testl(Register dst, Register src) {
2769         int encode = prefixAndEncode(dst.encoding, src.encoding);
2770         emitByte(0x85);
2771         emitByte(0xC0 | encode);
2772     }
2773 
2774     public final void testl(Register dst, AMD64Address src) {
2775         prefix(src, dst);
2776         emitByte(0x85);
2777         emitOperandHelper(dst, src, 0);
2778     }
2779 
2780     public final void unpckhpd(Register dst, Register src) {
2781         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2782         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2783         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2784         emitByte(0x15);
2785         emitByte(0xC0 | encode);
2786     }
2787 
2788     public final void unpcklpd(Register dst, Register src) {
2789         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2790         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2791         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2792         emitByte(0x14);
2793         emitByte(0xC0 | encode);
2794     }
2795 
2796     public final void xorl(Register dst, Register src) {
2797         XOR.rmOp.emit(this, DWORD, dst, src);
2798     }
2799 
2800     public final void xorpd(Register dst, Register src) {
2801         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2802         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2803         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2804         emitByte(0x57);
2805         emitByte(0xC0 | encode);
2806     }
2807 
2808     public final void xorps(Register dst, Register src) {
2809         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2810         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2811         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2812         emitByte(0x57);
2813         emitByte(0xC0 | encode);
2814     }
2815 
2816     protected final void decl(Register dst) {
2817         // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
2818         int encode = prefixAndEncode(dst.encoding);
2819         emitByte(0xFF);
2820         emitByte(0xC8 | encode);
2821     }
2822 
2823     protected final void incl(Register dst) {
2824         // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2825         int encode = prefixAndEncode(dst.encoding);
2826         emitByte(0xFF);
2827         emitByte(0xC0 | encode);
2828     }
2829 
2830     private int prefixAndEncode(int regEnc) {
2831         return prefixAndEncode(regEnc, false);
2832     }
2833 
2834     private int prefixAndEncode(int regEnc, boolean byteinst) {
2835         if (regEnc >= 8) {
2836             emitByte(Prefix.REXB);
2837             return regEnc - 8;
2838         } else if (byteinst && regEnc >= 4) {
2839             emitByte(Prefix.REX);
2840         }
2841         return regEnc;
2842     }
2843 
2844     private int prefixqAndEncode(int regEnc) {
2845         if (regEnc < 8) {
2846             emitByte(Prefix.REXW);
2847             return regEnc;
2848         } else {
2849             emitByte(Prefix.REXWB);
2850             return regEnc - 8;
2851         }
2852     }
2853 
2854     private int prefixAndEncode(int dstEnc, int srcEnc) {
2855         return prefixAndEncode(dstEnc, false, srcEnc, false);
2856     }
2857 
2858     private int prefixAndEncode(int dstEncoding, boolean dstIsByte, int srcEncoding, boolean srcIsByte) {
2859         int srcEnc = srcEncoding;
2860         int dstEnc = dstEncoding;
2861         if (dstEnc < 8) {
2862             if (srcEnc >= 8) {
2863                 emitByte(Prefix.REXB);
2864                 srcEnc -= 8;
2865             } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) {
2866                 emitByte(Prefix.REX);
2867             }
2868         } else {
2869             if (srcEnc < 8) {
2870                 emitByte(Prefix.REXR);
2871             } else {
2872                 emitByte(Prefix.REXRB);
2873                 srcEnc -= 8;
2874             }
2875             dstEnc -= 8;
2876         }
2877         return dstEnc << 3 | srcEnc;
2878     }
2879 
2880     /**
2881      * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand
2882      * prefix. If the given operands exceed 3 bits, the 4th bit is encoded in the prefix.
2883      *
2884      * @param regEncoding the encoding of the register part of the ModRM-Byte
2885      * @param rmEncoding the encoding of the r/m part of the ModRM-Byte
2886      * @return the lower 6 bits of the ModRM-Byte that should be emitted
2887      */
2888     private int prefixqAndEncode(int regEncoding, int rmEncoding) {
2889         int rmEnc = rmEncoding;
2890         int regEnc = regEncoding;
2891         if (regEnc < 8) {
2892             if (rmEnc < 8) {
2893                 emitByte(Prefix.REXW);
2894             } else {
2895                 emitByte(Prefix.REXWB);
2896                 rmEnc -= 8;
2897             }
2898         } else {
2899             if (rmEnc < 8) {
2900                 emitByte(Prefix.REXWR);
2901             } else {
2902                 emitByte(Prefix.REXWRB);
2903                 rmEnc -= 8;
2904             }
2905             regEnc -= 8;
2906         }
2907         return regEnc << 3 | rmEnc;
2908     }
2909 
2910     private void vexPrefix(int rxb, int ndsEncoding, int pre, int opc, AMD64InstructionAttr attributes) {
2911         int vectorLen = attributes.getVectorLen();
2912         boolean vexW = attributes.isRexVexW();
2913         boolean isXorB = ((rxb & 0x3) > 0);
2914         if (isXorB || vexW || (opc == VexOpcode.VEX_OPCODE_0F_38) || (opc == VexOpcode.VEX_OPCODE_0F_3A)) {
2915             emitByte(Prefix.VEX_3BYTES);
2916 
2917             int byte1 = (rxb << 5);
2918             byte1 = ((~byte1) & 0xE0) | opc;
2919             emitByte(byte1);
2920 
2921             int byte2 = ((~ndsEncoding) & 0xf) << 3;
2922             byte2 |= (vexW ? VexPrefix.VEX_W : 0) | ((vectorLen > 0) ? 4 : 0) | pre;
2923             emitByte(byte2);
2924         } else {
2925             emitByte(Prefix.VEX_2BYTES);
2926 
2927             int byte1 = ((rxb & 0x4) > 0) ? VexPrefix.VEX_R : 0;
2928             byte1 = (~byte1) & 0x80;
2929             byte1 |= ((~ndsEncoding) & 0xf) << 3;
2930             byte1 |= ((vectorLen > 0) ? 4 : 0) | pre;
2931             emitByte(byte1);
2932         }
2933     }
2934 
2935     private void vexPrefix(AMD64Address adr, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
2936         int rxb = getRXB(src, adr);
2937         int ndsEncoding = nds.isValid() ? nds.encoding : 0;
2938         vexPrefix(rxb, ndsEncoding, pre, opc, attributes);
2939         setCurAttributes(attributes);
2940     }
2941 
2942     private int vexPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
2943         int rxb = getRXB(dst, src);
2944         int ndsEncoding = nds.isValid() ? nds.encoding : 0;
2945         vexPrefix(rxb, ndsEncoding, pre, opc, attributes);
2946         // return modrm byte components for operands
2947         return (((dst.encoding & 7) << 3) | (src.encoding & 7));
2948     }
2949 
2950     private void simdPrefix(Register xreg, Register nds, AMD64Address adr, int pre, int opc, AMD64InstructionAttr attributes) {
2951         if (supports(CPUFeature.AVX)) {
2952             vexPrefix(adr, nds, xreg, pre, opc, attributes);
2953         } else {
2954             switch (pre) {
2955                 case VexSimdPrefix.VEX_SIMD_66:
2956                     emitByte(0x66);
2957                     break;
2958                 case VexSimdPrefix.VEX_SIMD_F2:
2959                     emitByte(0xF2);
2960                     break;
2961                 case VexSimdPrefix.VEX_SIMD_F3:
2962                     emitByte(0xF3);
2963                     break;
2964             }
2965             if (attributes.isRexVexW()) {
2966                 prefixq(adr, xreg);
2967             } else {
2968                 prefix(adr, xreg);
2969             }
2970             switch (opc) {
2971                 case VexOpcode.VEX_OPCODE_0F:
2972                     emitByte(0x0F);
2973                     break;
2974                 case VexOpcode.VEX_OPCODE_0F_38:
2975                     emitByte(0x0F);
2976                     emitByte(0x38);
2977                     break;
2978                 case VexOpcode.VEX_OPCODE_0F_3A:
2979                     emitByte(0x0F);
2980                     emitByte(0x3A);
2981                     break;
2982             }
2983         }
2984     }
2985 
2986     private int simdPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
2987         if (supports(CPUFeature.AVX)) {
2988             return vexPrefixAndEncode(dst, nds, src, pre, opc, attributes);
2989         } else {
2990             switch (pre) {
2991                 case VexSimdPrefix.VEX_SIMD_66:
2992                     emitByte(0x66);
2993                     break;
2994                 case VexSimdPrefix.VEX_SIMD_F2:
2995                     emitByte(0xF2);
2996                     break;
2997                 case VexSimdPrefix.VEX_SIMD_F3:
2998                     emitByte(0xF3);
2999                     break;
3000             }
3001             int encode;
3002             int dstEncoding = dst.encoding;
3003             int srcEncoding = src.encoding;
3004             if (attributes.isRexVexW()) {
3005                 encode = prefixqAndEncode(dstEncoding, srcEncoding);
3006             } else {
3007                 encode = prefixAndEncode(dstEncoding, srcEncoding);
3008             }
3009             switch (opc) {
3010                 case VexOpcode.VEX_OPCODE_0F:
3011                     emitByte(0x0F);
3012                     break;
3013                 case VexOpcode.VEX_OPCODE_0F_38:
3014                     emitByte(0x0F);
3015                     emitByte(0x38);
3016                     break;
3017                 case VexOpcode.VEX_OPCODE_0F_3A:
3018                     emitByte(0x0F);
3019                     emitByte(0x3A);
3020                     break;
3021             }
3022             return encode;
3023         }
3024     }
3025 
3026     private static boolean needsRex(Register reg) {
3027         return reg.encoding >= MinEncodingNeedsRex;
3028     }
3029 
3030     private void prefix(AMD64Address adr) {
3031         if (needsRex(adr.getBase())) {
3032             if (needsRex(adr.getIndex())) {
3033                 emitByte(Prefix.REXXB);
3034             } else {
3035                 emitByte(Prefix.REXB);
3036             }
3037         } else {
3038             if (needsRex(adr.getIndex())) {
3039                 emitByte(Prefix.REXX);
3040             }
3041         }
3042     }
3043 
3044     private void prefixq(AMD64Address adr) {
3045         if (needsRex(adr.getBase())) {
3046             if (needsRex(adr.getIndex())) {
3047                 emitByte(Prefix.REXWXB);
3048             } else {
3049                 emitByte(Prefix.REXWB);
3050             }
3051         } else {
3052             if (needsRex(adr.getIndex())) {
3053                 emitByte(Prefix.REXWX);
3054             } else {
3055                 emitByte(Prefix.REXW);
3056             }
3057         }
3058     }
3059 
3060     private void prefix(AMD64Address adr, Register reg) {
3061         prefix(adr, reg, false);
3062     }
3063 
3064     private void prefix(AMD64Address adr, Register reg, boolean byteinst) {
3065         if (reg.encoding < 8) {
3066             if (needsRex(adr.getBase())) {
3067                 if (needsRex(adr.getIndex())) {
3068                     emitByte(Prefix.REXXB);
3069                 } else {
3070                     emitByte(Prefix.REXB);
3071                 }
3072             } else {
3073                 if (needsRex(adr.getIndex())) {
3074                     emitByte(Prefix.REXX);
3075                 } else if (byteinst && reg.encoding >= 4) {
3076                     emitByte(Prefix.REX);
3077                 }
3078             }
3079         } else {
3080             if (needsRex(adr.getBase())) {
3081                 if (needsRex(adr.getIndex())) {
3082                     emitByte(Prefix.REXRXB);
3083                 } else {
3084                     emitByte(Prefix.REXRB);
3085                 }
3086             } else {
3087                 if (needsRex(adr.getIndex())) {
3088                     emitByte(Prefix.REXRX);
3089                 } else {
3090                     emitByte(Prefix.REXR);
3091                 }
3092             }
3093         }
3094     }
3095 
3096     private void prefixq(AMD64Address adr, Register src) {
3097         if (src.encoding < 8) {
3098             if (needsRex(adr.getBase())) {
3099                 if (needsRex(adr.getIndex())) {
3100                     emitByte(Prefix.REXWXB);
3101                 } else {
3102                     emitByte(Prefix.REXWB);
3103                 }
3104             } else {
3105                 if (needsRex(adr.getIndex())) {
3106                     emitByte(Prefix.REXWX);
3107                 } else {
3108                     emitByte(Prefix.REXW);
3109                 }
3110             }
3111         } else {
3112             if (needsRex(adr.getBase())) {
3113                 if (needsRex(adr.getIndex())) {
3114                     emitByte(Prefix.REXWRXB);
3115                 } else {
3116                     emitByte(Prefix.REXWRB);
3117                 }
3118             } else {
3119                 if (needsRex(adr.getIndex())) {
3120                     emitByte(Prefix.REXWRX);
3121                 } else {
3122                     emitByte(Prefix.REXWR);
3123                 }
3124             }
3125         }
3126     }
3127 
3128     public final void addq(Register dst, int imm32) {
3129         ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3130     }
3131 
3132     public final void addq(AMD64Address dst, int imm32) {
3133         ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3134     }
3135 
3136     public final void addq(Register dst, Register src) {
3137         ADD.rmOp.emit(this, QWORD, dst, src);
3138     }
3139 
3140     public final void addq(AMD64Address dst, Register src) {
3141         ADD.mrOp.emit(this, QWORD, dst, src);
3142     }
3143 
3144     public final void andq(Register dst, int imm32) {
3145         AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3146     }
3147 
3148     public final void bsrq(Register dst, Register src) {
3149         int encode = prefixqAndEncode(dst.encoding(), src.encoding());
3150         emitByte(0x0F);
3151         emitByte(0xBD);
3152         emitByte(0xC0 | encode);
3153     }
3154 
3155     public final void bswapq(Register reg) {
3156         int encode = prefixqAndEncode(reg.encoding);
3157         emitByte(0x0F);
3158         emitByte(0xC8 | encode);
3159     }
3160 
3161     public final void cdqq() {
3162         emitByte(Prefix.REXW);
3163         emitByte(0x99);
3164     }
3165 
3166     public final void cmovq(ConditionFlag cc, Register dst, Register src) {
3167         int encode = prefixqAndEncode(dst.encoding, src.encoding);
3168         emitByte(0x0F);
3169         emitByte(0x40 | cc.getValue());
3170         emitByte(0xC0 | encode);
3171     }
3172 
3173     public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) {
3174         prefixq(src, dst);
3175         emitByte(0x0F);
3176         emitByte(0x40 | cc.getValue());
3177         emitOperandHelper(dst, src, 0);
3178     }
3179 
3180     public final void cmpq(Register dst, int imm32) {
3181         CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3182     }
3183 
3184     public final void cmpq(Register dst, Register src) {
3185         CMP.rmOp.emit(this, QWORD, dst, src);
3186     }
3187 
3188     public final void cmpq(Register dst, AMD64Address src) {
3189         CMP.rmOp.emit(this, QWORD, dst, src);
3190     }
3191 
3192     public final void cmpxchgq(Register reg, AMD64Address adr) {
3193         prefixq(adr, reg);
3194         emitByte(0x0F);
3195         emitByte(0xB1);
3196         emitOperandHelper(reg, adr, 0);
3197     }
3198 
3199     public final void cvtdq2pd(Register dst, Register src) {
3200         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
3201         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3202         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3203         emitByte(0xE6);
3204         emitByte(0xC0 | encode);
3205     }
3206 
3207     public final void cvtsi2sdq(Register dst, Register src) {
3208         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
3209         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3210         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3211         emitByte(0x2A);
3212         emitByte(0xC0 | encode);
3213     }
3214 
3215     public final void cvttsd2siq(Register dst, Register src) {
3216         assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
3217         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3218         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3219         emitByte(0x2C);
3220         emitByte(0xC0 | encode);
3221     }
3222 
3223     public final void cvttpd2dq(Register dst, Register src) {
3224         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
3225         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3226         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3227         emitByte(0xE6);
3228         emitByte(0xC0 | encode);
3229     }
3230 
3231     protected final void decq(Register dst) {
3232         // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3233         int encode = prefixqAndEncode(dst.encoding);
3234         emitByte(0xFF);
3235         emitByte(0xC8 | encode);
3236     }
3237 
3238     public final void decq(AMD64Address dst) {
3239         DEC.emit(this, QWORD, dst);
3240     }
3241 
3242     public final void imulq(Register dst, Register src) {
3243         int encode = prefixqAndEncode(dst.encoding, src.encoding);
3244         emitByte(0x0F);
3245         emitByte(0xAF);
3246         emitByte(0xC0 | encode);
3247     }
3248 
3249     public final void incq(Register dst) {
3250         // Don't use it directly. Use Macroincrementq() instead.
3251         // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3252         int encode = prefixqAndEncode(dst.encoding);
3253         emitByte(0xFF);
3254         emitByte(0xC0 | encode);
3255     }
3256 
3257     public final void incq(AMD64Address dst) {
3258         INC.emit(this, QWORD, dst);
3259     }
3260 
3261     public final void movq(Register dst, long imm64) {
3262         int encode = prefixqAndEncode(dst.encoding);
3263         emitByte(0xB8 | encode);
3264         emitLong(imm64);
3265     }
3266 
3267     public final void movslq(Register dst, int imm32) {
3268         int encode = prefixqAndEncode(dst.encoding);
3269         emitByte(0xC7);
3270         emitByte(0xC0 | encode);
3271         emitInt(imm32);
3272     }
3273 
3274     public final void movdq(Register dst, AMD64Address src) {
3275         assert dst.getRegisterCategory().equals(AMD64.XMM);
3276         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3277         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3278         emitByte(0x6E);
3279         emitOperandHelper(dst, src, 0);
3280     }
3281 
3282     public final void movdq(AMD64Address dst, Register src) {
3283         assert src.getRegisterCategory().equals(AMD64.XMM);
3284         // swap src/dst to get correct prefix
3285         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3286         simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3287         emitByte(0x7E);
3288         emitOperandHelper(src, dst, 0);
3289     }
3290 
3291     public final void movdq(Register dst, Register src) {
3292         if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) {
3293             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3294             int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3295             emitByte(0x6E);
3296             emitByte(0xC0 | encode);
3297         } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) {
3298             // swap src/dst to get correct prefix
3299             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3300             int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3301             emitByte(0x7E);
3302             emitByte(0xC0 | encode);
3303         } else {
3304             throw new InternalError("should not reach here");
3305         }
3306     }
3307 
3308     public final void movdl(Register dst, Register src) {
3309         if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) {
3310             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3311             int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3312             emitByte(0x6E);
3313             emitByte(0xC0 | encode);
3314         } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) {
3315             // swap src/dst to get correct prefix
3316             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3317             int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3318             emitByte(0x7E);
3319             emitByte(0xC0 | encode);
3320         } else {
3321             throw new InternalError("should not reach here");
3322         }
3323     }
3324 
3325     public final void movddup(Register dst, Register src) {
3326         assert supports(CPUFeature.SSE3);
3327         assert dst.getRegisterCategory().equals(AMD64.XMM);
3328         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3329         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3330         emitByte(0x12);
3331         emitByte(0xC0 | encode);
3332     }
3333 
3334     public final void movdqu(Register dst, AMD64Address src) {
3335         assert dst.getRegisterCategory().equals(AMD64.XMM);
3336         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3337         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3338         emitByte(0x6F);
3339         emitOperandHelper(dst, src, 0);
3340     }
3341 
3342     public final void movdqu(Register dst, Register src) {
3343         assert dst.getRegisterCategory().equals(AMD64.XMM);
3344         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3345         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3346         emitByte(0x6F);
3347         emitByte(0xC0 | encode);
3348     }
3349 
3350     public final void vmovdqu(Register dst, AMD64Address src) {
3351         assert supports(CPUFeature.AVX);
3352         assert dst.getRegisterCategory().equals(AMD64.XMM);
3353         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3354         vexPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3355         emitByte(0x6F);
3356         emitOperandHelper(dst, src, 0);
3357     }
3358 
3359     public final void vzeroupper() {
3360         assert supports(CPUFeature.AVX);
3361         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3362         vexPrefixAndEncode(AMD64.xmm0, AMD64.xmm0, AMD64.xmm0, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
3363         emitByte(0x77);
3364     }
3365 
3366     public final void movslq(AMD64Address dst, int imm32) {
3367         prefixq(dst);
3368         emitByte(0xC7);
3369         emitOperandHelper(0, dst, 4);
3370         emitInt(imm32);
3371     }
3372 
3373     public final void movslq(Register dst, AMD64Address src) {
3374         prefixq(src, dst);
3375         emitByte(0x63);
3376         emitOperandHelper(dst, src, 0);
3377     }
3378 
3379     public final void movslq(Register dst, Register src) {
3380         int encode = prefixqAndEncode(dst.encoding, src.encoding);
3381         emitByte(0x63);
3382         emitByte(0xC0 | encode);
3383     }
3384 
3385     public final void negq(Register dst) {
3386         int encode = prefixqAndEncode(dst.encoding);
3387         emitByte(0xF7);
3388         emitByte(0xD8 | encode);
3389     }
3390 
3391     public final void orq(Register dst, Register src) {
3392         OR.rmOp.emit(this, QWORD, dst, src);
3393     }
3394 
3395     public final void shlq(Register dst, int imm8) {
3396         assert isShiftCount(imm8 >> 1) : "illegal shift count";
3397         int encode = prefixqAndEncode(dst.encoding);
3398         if (imm8 == 1) {
3399             emitByte(0xD1);
3400             emitByte(0xE0 | encode);
3401         } else {
3402             emitByte(0xC1);
3403             emitByte(0xE0 | encode);
3404             emitByte(imm8);
3405         }
3406     }
3407 
3408     public final void shlq(Register dst) {
3409         int encode = prefixqAndEncode(dst.encoding);
3410         emitByte(0xD3);
3411         emitByte(0xE0 | encode);
3412     }
3413 
3414     public final void shrq(Register dst, int imm8) {
3415         assert isShiftCount(imm8 >> 1) : "illegal shift count";
3416         int encode = prefixqAndEncode(dst.encoding);
3417         if (imm8 == 1) {
3418             emitByte(0xD1);
3419             emitByte(0xE8 | encode);
3420         } else {
3421             emitByte(0xC1);
3422             emitByte(0xE8 | encode);
3423             emitByte(imm8);
3424         }
3425     }
3426 
3427     public final void shrq(Register dst) {
3428         int encode = prefixqAndEncode(dst.encoding);
3429         emitByte(0xD3);
3430         emitByte(0xE8 | encode);
3431     }
3432 
3433     public final void sbbq(Register dst, Register src) {
3434         SBB.rmOp.emit(this, QWORD, dst, src);
3435     }
3436 
3437     public final void subq(Register dst, int imm32) {
3438         SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3439     }
3440 
3441     public final void subq(AMD64Address dst, int imm32) {
3442         SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3443     }
3444 
3445     public final void subqWide(Register dst, int imm32) {
3446         // don't use the sign-extending version, forcing a 32-bit immediate
3447         SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32);
3448     }
3449 
3450     public final void subq(Register dst, Register src) {
3451         SUB.rmOp.emit(this, QWORD, dst, src);
3452     }
3453 
3454     public final void testq(Register dst, Register src) {
3455         int encode = prefixqAndEncode(dst.encoding, src.encoding);
3456         emitByte(0x85);
3457         emitByte(0xC0 | encode);
3458     }
3459 
3460     public final void xaddl(AMD64Address dst, Register src) {
3461         prefix(dst, src);
3462         emitByte(0x0F);
3463         emitByte(0xC1);
3464         emitOperandHelper(src, dst, 0);
3465     }
3466 
3467     public final void xaddq(AMD64Address dst, Register src) {
3468         prefixq(dst, src);
3469         emitByte(0x0F);
3470         emitByte(0xC1);
3471         emitOperandHelper(src, dst, 0);
3472     }
3473 
3474     public final void xchgl(Register dst, AMD64Address src) {
3475         prefix(src, dst);
3476         emitByte(0x87);
3477         emitOperandHelper(dst, src, 0);
3478     }
3479 
3480     public final void xchgq(Register dst, AMD64Address src) {
3481         prefixq(src, dst);
3482         emitByte(0x87);
3483         emitOperandHelper(dst, src, 0);
3484     }
3485 
3486     public final void membar(int barriers) {
3487         if (target.isMP) {
3488             // We only have to handle StoreLoad
3489             if ((barriers & STORE_LOAD) != 0) {
3490                 // All usable chips support "locked" instructions which suffice
3491                 // as barriers, and are much faster than the alternative of
3492                 // using cpuid instruction. We use here a locked add [rsp],0.
3493                 // This is conveniently otherwise a no-op except for blowing
3494                 // flags.
3495                 // Any change to this code may need to revisit other places in
3496                 // the code where this idiom is used, in particular the
3497                 // orderAccess code.
3498                 lock();
3499                 addl(new AMD64Address(rsp, 0), 0); // Assert the lock# signal here
3500             }
3501         }
3502     }
3503 
3504     @Override
3505     protected final void patchJumpTarget(int branch, int branchTarget) {
3506         int op = getByte(branch);
3507         assert op == 0xE8 // call
3508                         ||
3509                         op == 0x00 // jump table entry
3510                         || op == 0xE9 // jmp
3511                         || op == 0xEB // short jmp
3512                         || (op & 0xF0) == 0x70 // short jcc
3513                         || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc
3514         : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
3515 
3516         if (op == 0x00) {
3517             int offsetToJumpTableBase = getShort(branch + 1);
3518             int jumpTableBase = branch - offsetToJumpTableBase;
3519             int imm32 = branchTarget - jumpTableBase;
3520             emitInt(imm32, branch);
3521         } else if (op == 0xEB || (op & 0xF0) == 0x70) {
3522 
3523             // short offset operators (jmp and jcc)
3524             final int imm8 = branchTarget - (branch + 2);
3525             /*
3526              * Since a wrongly patched short branch can potentially lead to working but really bad
3527              * behaving code we should always fail with an exception instead of having an assert.
3528              */
3529             if (!NumUtil.isByte(imm8)) {
3530                 throw new InternalError("branch displacement out of range: " + imm8);
3531             }
3532             emitByte(imm8, branch + 1);
3533 
3534         } else {
3535 
3536             int off = 1;
3537             if (op == 0x0F) {
3538                 off = 2;
3539             }
3540 
3541             int imm32 = branchTarget - (branch + 4 + off);
3542             emitInt(imm32, branch + off);
3543         }
3544     }
3545 
3546     public void nullCheck(AMD64Address address) {
3547         testl(AMD64.rax, address);
3548     }
3549 
3550     @Override
3551     public void align(int modulus) {
3552         if (position() % modulus != 0) {
3553             nop(modulus - (position() % modulus));
3554         }
3555     }
3556 
3557     /**
3558      * Emits a direct call instruction. Note that the actual call target is not specified, because
3559      * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is
3560      * responsible to add the call address to the appropriate patching tables.
3561      */
3562     public final void call() {
3563         if (codePatchingAnnotationConsumer != null) {
3564             int pos = position();
3565             codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(pos, pos + 1, 4, pos + 5));
3566         }
3567         emitByte(0xE8);
3568         emitInt(0);
3569     }
3570 
3571     public final void call(Register src) {
3572         int encode = prefixAndEncode(src.encoding);
3573         emitByte(0xFF);
3574         emitByte(0xD0 | encode);
3575     }
3576 
3577     public final void int3() {
3578         emitByte(0xCC);
3579     }
3580 
3581     public final void pause() {
3582         emitByte(0xF3);
3583         emitByte(0x90);
3584     }
3585 
3586     private void emitx87(int b1, int b2, int i) {
3587         assert 0 <= i && i < 8 : "illegal stack offset";
3588         emitByte(b1);
3589         emitByte(b2 + i);
3590     }
3591 
3592     public final void fldd(AMD64Address src) {
3593         emitByte(0xDD);
3594         emitOperandHelper(0, src, 0);
3595     }
3596 
3597     public final void flds(AMD64Address src) {
3598         emitByte(0xD9);
3599         emitOperandHelper(0, src, 0);
3600     }
3601 
3602     public final void fldln2() {
3603         emitByte(0xD9);
3604         emitByte(0xED);
3605     }
3606 
3607     public final void fldlg2() {
3608         emitByte(0xD9);
3609         emitByte(0xEC);
3610     }
3611 
3612     public final void fyl2x() {
3613         emitByte(0xD9);
3614         emitByte(0xF1);
3615     }
3616 
3617     public final void fstps(AMD64Address src) {
3618         emitByte(0xD9);
3619         emitOperandHelper(3, src, 0);
3620     }
3621 
3622     public final void fstpd(AMD64Address src) {
3623         emitByte(0xDD);
3624         emitOperandHelper(3, src, 0);
3625     }
3626 
3627     private void emitFPUArith(int b1, int b2, int i) {
3628         assert 0 <= i && i < 8 : "illegal FPU register: " + i;
3629         emitByte(b1);
3630         emitByte(b2 + i);
3631     }
3632 
3633     public void ffree(int i) {
3634         emitFPUArith(0xDD, 0xC0, i);
3635     }
3636 
3637     public void fincstp() {
3638         emitByte(0xD9);
3639         emitByte(0xF7);
3640     }
3641 
3642     public void fxch(int i) {
3643         emitFPUArith(0xD9, 0xC8, i);
3644     }
3645 
3646     public void fnstswAX() {
3647         emitByte(0xDF);
3648         emitByte(0xE0);
3649     }
3650 
3651     public void fwait() {
3652         emitByte(0x9B);
3653     }
3654 
3655     public void fprem() {
3656         emitByte(0xD9);
3657         emitByte(0xF8);
3658     }
3659 
3660     public final void fsin() {
3661         emitByte(0xD9);
3662         emitByte(0xFE);
3663     }
3664 
3665     public final void fcos() {
3666         emitByte(0xD9);
3667         emitByte(0xFF);
3668     }
3669 
3670     public final void fptan() {
3671         emitByte(0xD9);
3672         emitByte(0xF2);
3673     }
3674 
3675     public final void fstp(int i) {
3676         emitx87(0xDD, 0xD8, i);
3677     }
3678 
3679     @Override
3680     public AMD64Address makeAddress(Register base, int displacement) {
3681         return new AMD64Address(base, displacement);
3682     }
3683 
3684     @Override
3685     public AMD64Address getPlaceholder(int instructionStartPosition) {
3686         return new AMD64Address(rip, Register.None, Scale.Times1, 0, instructionStartPosition);
3687     }
3688 
3689     private void prefetchPrefix(AMD64Address src) {
3690         prefix(src);
3691         emitByte(0x0F);
3692     }
3693 
3694     public void prefetchnta(AMD64Address src) {
3695         prefetchPrefix(src);
3696         emitByte(0x18);
3697         emitOperandHelper(0, src, 0);
3698     }
3699 
3700     void prefetchr(AMD64Address src) {
3701         assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
3702         prefetchPrefix(src);
3703         emitByte(0x0D);
3704         emitOperandHelper(0, src, 0);
3705     }
3706 
3707     public void prefetcht0(AMD64Address src) {
3708         assert supports(CPUFeature.SSE);
3709         prefetchPrefix(src);
3710         emitByte(0x18);
3711         emitOperandHelper(1, src, 0);
3712     }
3713 
3714     public void prefetcht1(AMD64Address src) {
3715         assert supports(CPUFeature.SSE);
3716         prefetchPrefix(src);
3717         emitByte(0x18);
3718         emitOperandHelper(2, src, 0);
3719     }
3720 
3721     public void prefetcht2(AMD64Address src) {
3722         assert supports(CPUFeature.SSE);
3723         prefix(src);
3724         emitByte(0x0f);
3725         emitByte(0x18);
3726         emitOperandHelper(3, src, 0);
3727     }
3728 
3729     public void prefetchw(AMD64Address src) {
3730         assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
3731         prefix(src);
3732         emitByte(0x0f);
3733         emitByte(0x0D);
3734         emitOperandHelper(1, src, 0);
3735     }
3736 
3737     public void rdtsc() {
3738         emitByte(0x0F);
3739         emitByte(0x31);
3740     }
3741 
3742     /**
3743      * Emits an instruction which is considered to be illegal. This is used if we deliberately want
3744      * to crash the program (debugging etc.).
3745      */
3746     public void illegal() {
3747         emitByte(0x0f);
3748         emitByte(0x0b);
3749     }
3750 }