1 /*
   2  * Copyright (c) 2009, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 package org.graalvm.compiler.asm.amd64;
  24 
  25 import static org.graalvm.compiler.core.common.NumUtil.isByte;
  26 import static org.graalvm.compiler.core.common.NumUtil.isInt;
  27 import static org.graalvm.compiler.core.common.NumUtil.isShiftCount;
  28 import static org.graalvm.compiler.core.common.NumUtil.isUByte;
  29 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop;
  30 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop;
  31 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD;
  32 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND;
  33 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP;
  34 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR;
  35 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB;
  36 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB;
  37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR;
  38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC;
  39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC;
  40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG;
  41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT;
  42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.BYTE;
  43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.DWORD;
  44 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PD;
  45 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PS;
  46 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.QWORD;
  47 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SD;
  48 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SS;
  49 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.WORD;
  50 import static jdk.vm.ci.amd64.AMD64.CPU;
  51 import static jdk.vm.ci.amd64.AMD64.XMM;
  52 import static jdk.vm.ci.amd64.AMD64.r12;
  53 import static jdk.vm.ci.amd64.AMD64.r13;
  54 import static jdk.vm.ci.amd64.AMD64.rbp;
  55 import static jdk.vm.ci.amd64.AMD64.rip;
  56 import static jdk.vm.ci.amd64.AMD64.rsp;
  57 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD;
  58 
  59 import org.graalvm.compiler.asm.Assembler;
  60 import org.graalvm.compiler.asm.Label;
  61 import org.graalvm.compiler.core.common.NumUtil;
  62 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
  63 
  64 import jdk.vm.ci.amd64.AMD64;
  65 import jdk.vm.ci.amd64.AMD64.CPUFeature;
  66 import jdk.vm.ci.code.Register;
  67 import jdk.vm.ci.code.Register.RegisterCategory;
  68 import jdk.vm.ci.code.TargetDescription;
  69 
  70 /**
  71  * This class implements an assembler that can encode most X86 instructions.
  72  */
  73 public class AMD64Assembler extends Assembler {
  74 
  75     private static final int MinEncodingNeedsRex = 8;
  76 
  77     /**
  78      * The x86 condition codes used for conditional jumps/moves.
  79      */
  80     public enum ConditionFlag {
  81         Zero(0x4, "|zero|"),
  82         NotZero(0x5, "|nzero|"),
  83         Equal(0x4, "="),
  84         NotEqual(0x5, "!="),
  85         Less(0xc, "<"),
  86         LessEqual(0xe, "<="),
  87         Greater(0xf, ">"),
  88         GreaterEqual(0xd, ">="),
  89         Below(0x2, "|<|"),
  90         BelowEqual(0x6, "|<=|"),
  91         Above(0x7, "|>|"),
  92         AboveEqual(0x3, "|>=|"),
  93         Overflow(0x0, "|of|"),
  94         NoOverflow(0x1, "|nof|"),
  95         CarrySet(0x2, "|carry|"),
  96         CarryClear(0x3, "|ncarry|"),
  97         Negative(0x8, "|neg|"),
  98         Positive(0x9, "|pos|"),
  99         Parity(0xa, "|par|"),
 100         NoParity(0xb, "|npar|");
 101 
 102         private final int value;
 103         private final String operator;
 104 
 105         ConditionFlag(int value, String operator) {
 106             this.value = value;
 107             this.operator = operator;
 108         }
 109 
 110         public ConditionFlag negate() {
 111             switch (this) {
 112                 case Zero:
 113                     return NotZero;
 114                 case NotZero:
 115                     return Zero;
 116                 case Equal:
 117                     return NotEqual;
 118                 case NotEqual:
 119                     return Equal;
 120                 case Less:
 121                     return GreaterEqual;
 122                 case LessEqual:
 123                     return Greater;
 124                 case Greater:
 125                     return LessEqual;
 126                 case GreaterEqual:
 127                     return Less;
 128                 case Below:
 129                     return AboveEqual;
 130                 case BelowEqual:
 131                     return Above;
 132                 case Above:
 133                     return BelowEqual;
 134                 case AboveEqual:
 135                     return Below;
 136                 case Overflow:
 137                     return NoOverflow;
 138                 case NoOverflow:
 139                     return Overflow;
 140                 case CarrySet:
 141                     return CarryClear;
 142                 case CarryClear:
 143                     return CarrySet;
 144                 case Negative:
 145                     return Positive;
 146                 case Positive:
 147                     return Negative;
 148                 case Parity:
 149                     return NoParity;
 150                 case NoParity:
 151                     return Parity;
 152             }
 153             throw new IllegalArgumentException();
 154         }
 155 
 156         public int getValue() {
 157             return value;
 158         }
 159 
 160         @Override
 161         public String toString() {
 162             return operator;
 163         }
 164     }
 165 
 166     /**
 167      * Constants for X86 prefix bytes.
 168      */
 169     private static class Prefix {
 170         private static final int REX = 0x40;
 171         private static final int REXB = 0x41;
 172         private static final int REXX = 0x42;
 173         private static final int REXXB = 0x43;
 174         private static final int REXR = 0x44;
 175         private static final int REXRB = 0x45;
 176         private static final int REXRX = 0x46;
 177         private static final int REXRXB = 0x47;
 178         private static final int REXW = 0x48;
 179         private static final int REXWB = 0x49;
 180         private static final int REXWX = 0x4A;
 181         private static final int REXWXB = 0x4B;
 182         private static final int REXWR = 0x4C;
 183         private static final int REXWRB = 0x4D;
 184         private static final int REXWRX = 0x4E;
 185         private static final int REXWRXB = 0x4F;
 186         private static final int VEX_3BYTES = 0xC4;
 187         private static final int VEX_2BYTES = 0xC5;
 188     }
 189 
 190     private static class VexPrefix {
 191         private static final int VEX_R = 0x80;
 192         private static final int VEX_W = 0x80;
 193     }
 194 
 195     private static class AvxVectorLen {
 196         private static final int AVX_128bit = 0x0;
 197         private static final int AVX_256bit = 0x1;
 198     }
 199 
 200     private static class VexSimdPrefix {
 201         private static final int VEX_SIMD_NONE = 0x0;
 202         private static final int VEX_SIMD_66 = 0x1;
 203         private static final int VEX_SIMD_F3 = 0x2;
 204         private static final int VEX_SIMD_F2 = 0x3;
 205     }
 206 
 207     private static class VexOpcode {
 208         private static final int VEX_OPCODE_NONE = 0x0;
 209         private static final int VEX_OPCODE_0F = 0x1;
 210         private static final int VEX_OPCODE_0F_38 = 0x2;
 211         private static final int VEX_OPCODE_0F_3A = 0x3;
 212     }
 213 
 214     private AMD64InstructionAttr curAttributes;
 215 
 216     AMD64InstructionAttr getCurAttributes() {
 217         return curAttributes;
 218     }
 219 
 220     void setCurAttributes(AMD64InstructionAttr attributes) {
 221         curAttributes = attributes;
 222     }
 223 
 224     /**
 225      * The x86 operand sizes.
 226      */
 227     public enum OperandSize {
 228         BYTE(1) {
 229             @Override
 230             protected void emitImmediate(AMD64Assembler asm, int imm) {
 231                 assert imm == (byte) imm;
 232                 asm.emitByte(imm);
 233             }
 234 
 235             @Override
 236             protected int immediateSize() {
 237                 return 1;
 238             }
 239         },
 240 
 241         WORD(2, 0x66) {
 242             @Override
 243             protected void emitImmediate(AMD64Assembler asm, int imm) {
 244                 assert imm == (short) imm;
 245                 asm.emitShort(imm);
 246             }
 247 
 248             @Override
 249             protected int immediateSize() {
 250                 return 2;
 251             }
 252         },
 253 
 254         DWORD(4) {
 255             @Override
 256             protected void emitImmediate(AMD64Assembler asm, int imm) {
 257                 asm.emitInt(imm);
 258             }
 259 
 260             @Override
 261             protected int immediateSize() {
 262                 return 4;
 263             }
 264         },
 265 
 266         QWORD(8) {
 267             @Override
 268             protected void emitImmediate(AMD64Assembler asm, int imm) {
 269                 asm.emitInt(imm);
 270             }
 271 
 272             @Override
 273             protected int immediateSize() {
 274                 return 4;
 275             }
 276         },
 277 
 278         SS(4, 0xF3, true),
 279 
 280         SD(8, 0xF2, true),
 281 
 282         PS(16, true),
 283 
 284         PD(16, 0x66, true);
 285 
 286         private final int sizePrefix;
 287 
 288         private final int bytes;
 289         private final boolean xmm;
 290 
 291         OperandSize(int bytes) {
 292             this(bytes, 0);
 293         }
 294 
 295         OperandSize(int bytes, int sizePrefix) {
 296             this(bytes, sizePrefix, false);
 297         }
 298 
 299         OperandSize(int bytes, boolean xmm) {
 300             this(bytes, 0, xmm);
 301         }
 302 
 303         OperandSize(int bytes, int sizePrefix, boolean xmm) {
 304             this.sizePrefix = sizePrefix;
 305             this.bytes = bytes;
 306             this.xmm = xmm;
 307         }
 308 
 309         public int getBytes() {
 310             return bytes;
 311         }
 312 
 313         public boolean isXmmType() {
 314             return xmm;
 315         }
 316 
 317         /**
 318          * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded
 319          * as sign-extended 32-bit values.
 320          *
 321          * @param asm
 322          * @param imm
 323          */
 324         protected void emitImmediate(AMD64Assembler asm, int imm) {
 325             throw new UnsupportedOperationException();
 326         }
 327 
 328         protected int immediateSize() {
 329             throw new UnsupportedOperationException();
 330         }
 331     }
 332 
 333     /**
 334      * Operand size and register type constraints.
 335      */
 336     private enum OpAssertion {
 337         ByteAssertion(CPU, CPU, BYTE),
 338         IntegerAssertion(CPU, CPU, WORD, DWORD, QWORD),
 339         No16BitAssertion(CPU, CPU, DWORD, QWORD),
 340         No32BitAssertion(CPU, CPU, WORD, QWORD),
 341         QwordOnlyAssertion(CPU, CPU, QWORD),
 342         FloatingAssertion(XMM, XMM, SS, SD, PS, PD),
 343         PackedFloatingAssertion(XMM, XMM, PS, PD),
 344         SingleAssertion(XMM, XMM, SS),
 345         DoubleAssertion(XMM, XMM, SD),
 346         PackedDoubleAssertion(XMM, XMM, PD),
 347         IntToFloatingAssertion(XMM, CPU, DWORD, QWORD),
 348         FloatingToIntAssertion(CPU, XMM, DWORD, QWORD);
 349 
 350         private final RegisterCategory resultCategory;
 351         private final RegisterCategory inputCategory;
 352         private final OperandSize[] allowedSizes;
 353 
 354         OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) {
 355             this.resultCategory = resultCategory;
 356             this.inputCategory = inputCategory;
 357             this.allowedSizes = allowedSizes;
 358         }
 359 
 360         protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) {
 361             assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op;
 362             assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op;
 363 
 364             for (OperandSize s : allowedSizes) {
 365                 if (size == s) {
 366                     return true;
 367                 }
 368             }
 369 
 370             assert false : "invalid operand size " + size + " used in " + op;
 371             return false;
 372         }
 373     }
 374 
 375     public abstract static class OperandDataAnnotation extends CodeAnnotation {
 376         /**
 377          * The position (bytes from the beginning of the method) of the operand.
 378          */
 379         public final int operandPosition;
 380         /**
 381          * The size of the operand, in bytes.
 382          */
 383         public final int operandSize;
 384         /**
 385          * The position (bytes from the beginning of the method) of the next instruction. On AMD64,
 386          * RIP-relative operands are relative to this position.
 387          */
 388         public final int nextInstructionPosition;
 389 
 390         OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) {
 391             super(instructionPosition);
 392 
 393             this.operandPosition = operandPosition;
 394             this.operandSize = operandSize;
 395             this.nextInstructionPosition = nextInstructionPosition;
 396         }
 397 
 398         @Override
 399         public String toString() {
 400             return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize;
 401         }
 402     }
 403 
 404     /**
 405      * Annotation that stores additional information about the displacement of a
 406      * {@link Assembler#getPlaceholder placeholder address} that needs patching.
 407      */
 408     public static class AddressDisplacementAnnotation extends OperandDataAnnotation {
 409         AddressDisplacementAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) {
 410             super(instructionPosition, operandPosition, operndSize, nextInstructionPosition);
 411         }
 412     }
 413 
 414     /**
 415      * Annotation that stores additional information about the immediate operand, e.g., of a call
 416      * instruction, that needs patching.
 417      */
 418     public static class ImmediateOperandAnnotation extends OperandDataAnnotation {
 419         ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) {
 420             super(instructionPosition, operandPosition, operndSize, nextInstructionPosition);
 421         }
 422     }
 423 
 424     /**
 425      * Constructs an assembler for the AMD64 architecture.
 426      */
 427     public AMD64Assembler(TargetDescription target) {
 428         super(target);
 429     }
 430 
 431     public boolean supports(CPUFeature feature) {
 432         return ((AMD64) target.arch).getFeatures().contains(feature);
 433     }
 434 
 435     private static int encode(Register r) {
 436         assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding;
 437         return r.encoding & 0x7;
 438     }
 439 
 440     /**
 441      * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a
 442      * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm
 443      * field. The X bit must be 0.
 444      */
 445     protected static int getRXB(Register reg, Register rm) {
 446         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
 447         rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3;
 448         return rxb;
 449     }
 450 
 451     /**
 452      * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There
 453      * are two cases for the memory operand:<br>
 454      * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0.
 455      * <br>
 456      * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base.
 457      */
 458     protected static int getRXB(Register reg, AMD64Address rm) {
 459         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
 460         if (!rm.getIndex().equals(Register.None)) {
 461             rxb |= (rm.getIndex().encoding & 0x08) >> 2;
 462         }
 463         if (!rm.getBase().equals(Register.None)) {
 464             rxb |= (rm.getBase().encoding & 0x08) >> 3;
 465         }
 466         return rxb;
 467     }
 468 
 469     /**
 470      * Emit the ModR/M byte for one register operand and an opcode extension in the R field.
 471      * <p>
 472      * Format: [ 11 reg r/m ]
 473      */
 474     protected void emitModRM(int reg, Register rm) {
 475         assert (reg & 0x07) == reg;
 476         emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07));
 477     }
 478 
 479     /**
 480      * Emit the ModR/M byte for two register operands.
 481      * <p>
 482      * Format: [ 11 reg r/m ]
 483      */
 484     protected void emitModRM(Register reg, Register rm) {
 485         emitModRM(reg.encoding & 0x07, rm);
 486     }
 487 
 488     protected void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) {
 489         assert !reg.equals(Register.None);
 490         emitOperandHelper(encode(reg), addr, false, additionalInstructionSize);
 491     }
 492 
 493     /**
 494      * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand.
 495      *
 496      * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
 497      */
 498     protected void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
 499         assert !reg.equals(Register.None);
 500         emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize);
 501     }
 502 
 503     protected void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) {
 504         emitOperandHelper(reg, addr, false, additionalInstructionSize);
 505     }
 506 
 507     /**
 508      * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode
 509      * extension in the R field.
 510      *
 511      * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
 512      * @param additionalInstructionSize the number of bytes that will be emitted after the operand,
 513      *            so that the start position of the next instruction can be computed even though
 514      *            this instruction has not been completely emitted yet.
 515      */
 516     protected void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
 517         assert (reg & 0x07) == reg;
 518         int regenc = reg << 3;
 519 
 520         Register base = addr.getBase();
 521         Register index = addr.getIndex();
 522 
 523         AMD64Address.Scale scale = addr.getScale();
 524         int disp = addr.getDisplacement();
 525 
 526         if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder()
 527             // [00 000 101] disp32
 528             assert index.equals(Register.None) : "cannot use RIP relative addressing with index register";
 529             emitByte(0x05 | regenc);
 530             if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) {
 531                 codePatchingAnnotationConsumer.accept(new AddressDisplacementAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize));
 532             }
 533             emitInt(disp);
 534         } else if (base.isValid()) {
 535             int baseenc = base.isValid() ? encode(base) : 0;
 536             if (index.isValid()) {
 537                 int indexenc = encode(index) << 3;
 538                 // [base + indexscale + disp]
 539                 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
 540                     // [base + indexscale]
 541                     // [00 reg 100][ss index base]
 542                     assert !index.equals(rsp) : "illegal addressing mode";
 543                     emitByte(0x04 | regenc);
 544                     emitByte(scale.log2 << 6 | indexenc | baseenc);
 545                 } else if (isByte(disp) && !force4Byte) {
 546                     // [base + indexscale + imm8]
 547                     // [01 reg 100][ss index base] imm8
 548                     assert !index.equals(rsp) : "illegal addressing mode";
 549                     emitByte(0x44 | regenc);
 550                     emitByte(scale.log2 << 6 | indexenc | baseenc);
 551                     emitByte(disp & 0xFF);
 552                 } else {
 553                     // [base + indexscale + disp32]
 554                     // [10 reg 100][ss index base] disp32
 555                     assert !index.equals(rsp) : "illegal addressing mode";
 556                     emitByte(0x84 | regenc);
 557                     emitByte(scale.log2 << 6 | indexenc | baseenc);
 558                     emitInt(disp);
 559                 }
 560             } else if (base.equals(rsp) || base.equals(r12)) {
 561                 // [rsp + disp]
 562                 if (disp == 0) {
 563                     // [rsp]
 564                     // [00 reg 100][00 100 100]
 565                     emitByte(0x04 | regenc);
 566                     emitByte(0x24);
 567                 } else if (isByte(disp) && !force4Byte) {
 568                     // [rsp + imm8]
 569                     // [01 reg 100][00 100 100] disp8
 570                     emitByte(0x44 | regenc);
 571                     emitByte(0x24);
 572                     emitByte(disp & 0xFF);
 573                 } else {
 574                     // [rsp + imm32]
 575                     // [10 reg 100][00 100 100] disp32
 576                     emitByte(0x84 | regenc);
 577                     emitByte(0x24);
 578                     emitInt(disp);
 579                 }
 580             } else {
 581                 // [base + disp]
 582                 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode";
 583                 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
 584                     // [base]
 585                     // [00 reg base]
 586                     emitByte(0x00 | regenc | baseenc);
 587                 } else if (isByte(disp) && !force4Byte) {
 588                     // [base + disp8]
 589                     // [01 reg base] disp8
 590                     emitByte(0x40 | regenc | baseenc);
 591                     emitByte(disp & 0xFF);
 592                 } else {
 593                     // [base + disp32]
 594                     // [10 reg base] disp32
 595                     emitByte(0x80 | regenc | baseenc);
 596                     emitInt(disp);
 597                 }
 598             }
 599         } else {
 600             if (index.isValid()) {
 601                 int indexenc = encode(index) << 3;
 602                 // [indexscale + disp]
 603                 // [00 reg 100][ss index 101] disp32
 604                 assert !index.equals(rsp) : "illegal addressing mode";
 605                 emitByte(0x04 | regenc);
 606                 emitByte(scale.log2 << 6 | indexenc | 0x05);
 607                 emitInt(disp);
 608             } else {
 609                 // [disp] ABSOLUTE
 610                 // [00 reg 100][00 100 101] disp32
 611                 emitByte(0x04 | regenc);
 612                 emitByte(0x25);
 613                 emitInt(disp);
 614             }
 615         }
 616         setCurAttributes(null);
 617     }
 618 
 619     /**
 620      * Base class for AMD64 opcodes.
 621      */
 622     public static class AMD64Op {
 623 
 624         protected static final int P_0F = 0x0F;
 625         protected static final int P_0F38 = 0x380F;
 626         protected static final int P_0F3A = 0x3A0F;
 627 
 628         private final String opcode;
 629 
 630         protected final int prefix1;
 631         protected final int prefix2;
 632         protected final int op;
 633 
 634         private final boolean dstIsByte;
 635         private final boolean srcIsByte;
 636 
 637         private final OpAssertion assertion;
 638         private final CPUFeature feature;
 639 
 640         protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
 641             this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature);
 642         }
 643 
 644         protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
 645             this.opcode = opcode;
 646             this.prefix1 = prefix1;
 647             this.prefix2 = prefix2;
 648             this.op = op;
 649 
 650             this.dstIsByte = dstIsByte;
 651             this.srcIsByte = srcIsByte;
 652 
 653             this.assertion = assertion;
 654             this.feature = feature;
 655         }
 656 
 657         protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) {
 658             if (prefix1 != 0) {
 659                 asm.emitByte(prefix1);
 660             }
 661             if (size.sizePrefix != 0) {
 662                 asm.emitByte(size.sizePrefix);
 663             }
 664             int rexPrefix = 0x40 | rxb;
 665             if (size == QWORD) {
 666                 rexPrefix |= 0x08;
 667             }
 668             if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) {
 669                 asm.emitByte(rexPrefix);
 670             }
 671             if (prefix2 > 0xFF) {
 672                 asm.emitShort(prefix2);
 673             } else if (prefix2 > 0) {
 674                 asm.emitByte(prefix2);
 675             }
 676             asm.emitByte(op);
 677         }
 678 
 679         protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) {
 680             assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode);
 681             assert assertion.checkOperands(this, size, resultReg, inputReg);
 682             return true;
 683         }
 684 
 685         @Override
 686         public String toString() {
 687             return opcode;
 688         }
 689     }
 690 
 691     /**
 692      * Base class for AMD64 opcodes with immediate operands.
 693      */
 694     public static class AMD64ImmOp extends AMD64Op {
 695 
 696         private final boolean immIsByte;
 697 
 698         protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
 699             super(opcode, 0, prefix, op, assertion, null);
 700             this.immIsByte = immIsByte;
 701         }
 702 
 703         protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) {
 704             if (immIsByte) {
 705                 assert imm == (byte) imm;
 706                 asm.emitByte(imm);
 707             } else {
 708                 size.emitImmediate(asm, imm);
 709             }
 710         }
 711 
 712         protected final int immediateSize(OperandSize size) {
 713             if (immIsByte) {
 714                 return 1;
 715             } else {
 716                 return size.bytes;
 717             }
 718         }
 719     }
 720 
 721     /**
 722      * Opcode with operand order of either RM or MR for 2 address forms.
 723      */
 724     public abstract static class AMD64RROp extends AMD64Op {
 725 
 726         protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
 727             super(opcode, prefix1, prefix2, op, assertion, feature);
 728         }
 729 
 730         protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
 731             super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature);
 732         }
 733 
 734         public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src);
 735     }
 736 
 737     /**
 738      * Opcode with operand order of either RM or MR for 3 address forms.
 739      */
 740     public abstract static class AMD64RRROp extends AMD64Op {
 741 
 742         protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
 743             super(opcode, prefix1, prefix2, op, assertion, feature);
 744         }
 745 
 746         protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
 747             super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature);
 748         }
 749 
 750         public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src);
 751     }
 752 
 753     /**
 754      * Opcode with operand order of RM.
 755      */
 756     public static class AMD64RMOp extends AMD64RROp {
 757         // @formatter:off
 758         public static final AMD64RMOp IMUL   = new AMD64RMOp("IMUL",         P_0F, 0xAF);
 759         public static final AMD64RMOp BSF    = new AMD64RMOp("BSF",          P_0F, 0xBC);
 760         public static final AMD64RMOp BSR    = new AMD64RMOp("BSR",          P_0F, 0xBD);
 761         public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, CPUFeature.POPCNT);
 762         public static final AMD64RMOp TZCNT  = new AMD64RMOp("TZCNT",  0xF3, P_0F, 0xBC, CPUFeature.BMI1);
 763         public static final AMD64RMOp LZCNT  = new AMD64RMOp("LZCNT",  0xF3, P_0F, 0xBD, CPUFeature.LZCNT);
 764         public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB",       P_0F, 0xB6, false, true, OpAssertion.IntegerAssertion);
 765         public static final AMD64RMOp MOVZX  = new AMD64RMOp("MOVZX",        P_0F, 0xB7, OpAssertion.No16BitAssertion);
 766         public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB",       P_0F, 0xBE, false, true, OpAssertion.IntegerAssertion);
 767         public static final AMD64RMOp MOVSX  = new AMD64RMOp("MOVSX",        P_0F, 0xBF, OpAssertion.No16BitAssertion);
 768         public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD",             0x63, OpAssertion.QwordOnlyAssertion);
 769         public static final AMD64RMOp MOVB   = new AMD64RMOp("MOVB",               0x8A, OpAssertion.ByteAssertion);
 770         public static final AMD64RMOp MOV    = new AMD64RMOp("MOV",                0x8B);
 771 
 772         // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix
 773         public static final AMD64RMOp MOVD   = new AMD64RMOp("MOVD",   0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
 774         public static final AMD64RMOp MOVQ   = new AMD64RMOp("MOVQ",   0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
 775         public static final AMD64RMOp MOVSS  = new AMD64RMOp("MOVSS",        P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE);
 776         public static final AMD64RMOp MOVSD  = new AMD64RMOp("MOVSD",        P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE);
 777 
 778         // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient.
 779         public static final AMD64RMOp TESTB  = new AMD64RMOp("TEST",               0x84, OpAssertion.ByteAssertion);
 780         public static final AMD64RMOp TEST   = new AMD64RMOp("TEST",               0x85);
 781         // @formatter:on
 782 
 783         protected AMD64RMOp(String opcode, int op) {
 784             this(opcode, 0, op);
 785         }
 786 
 787         protected AMD64RMOp(String opcode, int op, OpAssertion assertion) {
 788             this(opcode, 0, op, assertion);
 789         }
 790 
 791         protected AMD64RMOp(String opcode, int prefix, int op) {
 792             this(opcode, 0, prefix, op, null);
 793         }
 794 
 795         protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) {
 796             this(opcode, 0, prefix, op, assertion, null);
 797         }
 798 
 799         protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
 800             this(opcode, 0, prefix, op, assertion, feature);
 801         }
 802 
 803         protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) {
 804             super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null);
 805         }
 806 
 807         protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) {
 808             this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature);
 809         }
 810 
 811         protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
 812             super(opcode, prefix1, prefix2, op, assertion, feature);
 813         }
 814 
 815         @Override
 816         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
 817             assert verify(asm, size, dst, src);
 818             boolean isSimd = false;
 819             boolean noNds = false;
 820 
 821             switch (op) {
 822                 case 0x2A:
 823                 case 0x2C:
 824                 case 0x2E:
 825                 case 0x5A:
 826                 case 0x6E:
 827                     isSimd = true;
 828                     noNds = true;
 829                     break;
 830                 case 0x10:
 831                 case 0x51:
 832                 case 0x54:
 833                 case 0x55:
 834                 case 0x56:
 835                 case 0x57:
 836                 case 0x58:
 837                 case 0x59:
 838                 case 0x5C:
 839                 case 0x5D:
 840                 case 0x5E:
 841                 case 0x5F:
 842                     isSimd = true;
 843                     break;
 844             }
 845 
 846             if (isSimd) {
 847                 int pre;
 848                 int opc;
 849                 boolean rexVexW = (size == QWORD) ? true : false;
 850                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
 851                 int curPrefix = size.sizePrefix | prefix1;
 852                 switch (curPrefix) {
 853                     case 0x66:
 854                         pre = VexSimdPrefix.VEX_SIMD_66;
 855                         break;
 856                     case 0xF2:
 857                         pre = VexSimdPrefix.VEX_SIMD_F2;
 858                         break;
 859                     case 0xF3:
 860                         pre = VexSimdPrefix.VEX_SIMD_F3;
 861                         break;
 862                     default:
 863                         pre = VexSimdPrefix.VEX_SIMD_NONE;
 864                         break;
 865                 }
 866                 switch (prefix2) {
 867                     case P_0F:
 868                         opc = VexOpcode.VEX_OPCODE_0F;
 869                         break;
 870                     case P_0F38:
 871                         opc = VexOpcode.VEX_OPCODE_0F_38;
 872                         break;
 873                     case P_0F3A:
 874                         opc = VexOpcode.VEX_OPCODE_0F_3A;
 875                         break;
 876                     default:
 877                         opc = VexOpcode.VEX_OPCODE_NONE;
 878                         break;
 879                 }
 880                 int encode;
 881                 if (noNds) {
 882                     encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes);
 883                 } else {
 884                     encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes);
 885                 }
 886                 asm.emitByte(op);
 887                 asm.emitByte(0xC0 | encode);
 888             } else {
 889                 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
 890                 asm.emitModRM(dst, src);
 891             }
 892         }
 893 
 894         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) {
 895             assert verify(asm, size, dst, null);
 896             boolean isSimd = false;
 897             boolean noNds = false;
 898 
 899             switch (op) {
 900                 case 0x10:
 901                 case 0x2A:
 902                 case 0x2C:
 903                 case 0x2E:
 904                 case 0x6E:
 905                     isSimd = true;
 906                     noNds = true;
 907                     break;
 908                 case 0x51:
 909                 case 0x54:
 910                 case 0x55:
 911                 case 0x56:
 912                 case 0x57:
 913                 case 0x58:
 914                 case 0x59:
 915                 case 0x5C:
 916                 case 0x5D:
 917                 case 0x5E:
 918                 case 0x5F:
 919                     isSimd = true;
 920                     break;
 921             }
 922 
 923             if (isSimd) {
 924                 int pre;
 925                 int opc;
 926                 boolean rexVexW = (size == QWORD) ? true : false;
 927                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
 928                 int curPrefix = size.sizePrefix | prefix1;
 929                 switch (curPrefix) {
 930                     case 0x66:
 931                         pre = VexSimdPrefix.VEX_SIMD_66;
 932                         break;
 933                     case 0xF2:
 934                         pre = VexSimdPrefix.VEX_SIMD_F2;
 935                         break;
 936                     case 0xF3:
 937                         pre = VexSimdPrefix.VEX_SIMD_F3;
 938                         break;
 939                     default:
 940                         pre = VexSimdPrefix.VEX_SIMD_NONE;
 941                         break;
 942                 }
 943                 switch (prefix2) {
 944                     case P_0F:
 945                         opc = VexOpcode.VEX_OPCODE_0F;
 946                         break;
 947                     case P_0F38:
 948                         opc = VexOpcode.VEX_OPCODE_0F_38;
 949                         break;
 950                     case P_0F3A:
 951                         opc = VexOpcode.VEX_OPCODE_0F_3A;
 952                         break;
 953                     default:
 954                         opc = VexOpcode.VEX_OPCODE_NONE;
 955                         break;
 956                 }
 957                 if (noNds) {
 958                     asm.simdPrefix(dst, Register.None, src, pre, opc, attributes);
 959                 } else {
 960                     asm.simdPrefix(dst, dst, src, pre, opc, attributes);
 961                 }
 962                 asm.emitByte(op);
 963                 asm.emitOperandHelper(dst, src, 0);
 964             } else {
 965                 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
 966                 asm.emitOperandHelper(dst, src, 0);
 967             }
 968         }
 969     }
 970 
 971     /**
 972      * Opcode with operand order of RM.
 973      */
 974     public static class AMD64RRMOp extends AMD64RRROp {
 975         protected AMD64RRMOp(String opcode, int op) {
 976             this(opcode, 0, op);
 977         }
 978 
 979         protected AMD64RRMOp(String opcode, int op, OpAssertion assertion) {
 980             this(opcode, 0, op, assertion);
 981         }
 982 
 983         protected AMD64RRMOp(String opcode, int prefix, int op) {
 984             this(opcode, 0, prefix, op, null);
 985         }
 986 
 987         protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion) {
 988             this(opcode, 0, prefix, op, assertion, null);
 989         }
 990 
 991         protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
 992             this(opcode, 0, prefix, op, assertion, feature);
 993         }
 994 
 995         protected AMD64RRMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) {
 996             super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null);
 997         }
 998 
 999         protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) {
1000             this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature);
1001         }
1002 
1003         protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
1004             super(opcode, prefix1, prefix2, op, assertion, feature);
1005         }
1006 
1007         @Override
1008         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src) {
1009             assert verify(asm, size, dst, src);
1010             int pre;
1011             int opc;
1012             boolean rexVexW = (size == QWORD) ? true : false;
1013             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1014             int curPrefix = size.sizePrefix | prefix1;
1015             switch (curPrefix) {
1016                 case 0x66:
1017                     pre = VexSimdPrefix.VEX_SIMD_66;
1018                     break;
1019                 case 0xF2:
1020                     pre = VexSimdPrefix.VEX_SIMD_F2;
1021                     break;
1022                 case 0xF3:
1023                     pre = VexSimdPrefix.VEX_SIMD_F3;
1024                     break;
1025                 default:
1026                     pre = VexSimdPrefix.VEX_SIMD_NONE;
1027                     break;
1028             }
1029             switch (prefix2) {
1030                 case P_0F:
1031                     opc = VexOpcode.VEX_OPCODE_0F;
1032                     break;
1033                 case P_0F38:
1034                     opc = VexOpcode.VEX_OPCODE_0F_38;
1035                     break;
1036                 case P_0F3A:
1037                     opc = VexOpcode.VEX_OPCODE_0F_3A;
1038                     break;
1039                 default:
1040                     opc = VexOpcode.VEX_OPCODE_NONE;
1041                     break;
1042             }
1043             int encode;
1044             encode = asm.simdPrefixAndEncode(dst, nds, src, pre, opc, attributes);
1045             asm.emitByte(op);
1046             asm.emitByte(0xC0 | encode);
1047         }
1048 
1049         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, AMD64Address src) {
1050             assert verify(asm, size, dst, null);
1051             int pre;
1052             int opc;
1053             boolean rexVexW = (size == QWORD) ? true : false;
1054             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1055             int curPrefix = size.sizePrefix | prefix1;
1056             switch (curPrefix) {
1057                 case 0x66:
1058                     pre = VexSimdPrefix.VEX_SIMD_66;
1059                     break;
1060                 case 0xF2:
1061                     pre = VexSimdPrefix.VEX_SIMD_F2;
1062                     break;
1063                 case 0xF3:
1064                     pre = VexSimdPrefix.VEX_SIMD_F3;
1065                     break;
1066                 default:
1067                     pre = VexSimdPrefix.VEX_SIMD_NONE;
1068                     break;
1069             }
1070             switch (prefix2) {
1071                 case P_0F:
1072                     opc = VexOpcode.VEX_OPCODE_0F;
1073                     break;
1074                 case P_0F38:
1075                     opc = VexOpcode.VEX_OPCODE_0F_38;
1076                     break;
1077                 case P_0F3A:
1078                     opc = VexOpcode.VEX_OPCODE_0F_3A;
1079                     break;
1080                 default:
1081                     opc = VexOpcode.VEX_OPCODE_NONE;
1082                     break;
1083             }
1084             asm.simdPrefix(dst, nds, src, pre, opc, attributes);
1085             asm.emitByte(op);
1086             asm.emitOperandHelper(dst, src, 0);
1087         }
1088     }
1089 
1090     /**
1091      * Opcode with operand order of MR.
1092      */
1093     public static class AMD64MROp extends AMD64RROp {
1094         // @formatter:off
1095         public static final AMD64MROp MOVB   = new AMD64MROp("MOVB",               0x88, OpAssertion.ByteAssertion);
1096         public static final AMD64MROp MOV    = new AMD64MROp("MOV",                0x89);
1097 
1098         // MOVD and MOVQ are the same opcode, just with different operand size prefix
1099         // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used.
1100         public static final AMD64MROp MOVD   = new AMD64MROp("MOVD",   0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
1101         public static final AMD64MROp MOVQ   = new AMD64MROp("MOVQ",   0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
1102 
1103         // MOVSS and MOVSD are the same opcode, just with different operand size prefix
1104         public static final AMD64MROp MOVSS  = new AMD64MROp("MOVSS",        P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE);
1105         public static final AMD64MROp MOVSD  = new AMD64MROp("MOVSD",        P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE);
1106         // @formatter:on
1107 
1108         protected AMD64MROp(String opcode, int op) {
1109             this(opcode, 0, op);
1110         }
1111 
1112         protected AMD64MROp(String opcode, int op, OpAssertion assertion) {
1113             this(opcode, 0, op, assertion);
1114         }
1115 
1116         protected AMD64MROp(String opcode, int prefix, int op) {
1117             this(opcode, prefix, op, OpAssertion.IntegerAssertion);
1118         }
1119 
1120         protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) {
1121             this(opcode, prefix, op, assertion, null);
1122         }
1123 
1124         protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
1125             this(opcode, 0, prefix, op, assertion, feature);
1126         }
1127 
1128         protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
1129             super(opcode, prefix1, prefix2, op, assertion, feature);
1130         }
1131 
1132         @Override
1133         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
1134             assert verify(asm, size, src, dst);
1135             boolean isSimd = false;
1136             boolean noNds = false;
1137 
1138             switch (op) {
1139                 case 0x7E:
1140                     isSimd = true;
1141                     noNds = true;
1142                     break;
1143                 case 0x11:
1144                     isSimd = true;
1145                     break;
1146             }
1147 
1148             if (isSimd) {
1149                 int pre;
1150                 int opc;
1151                 boolean rexVexW = (size == QWORD) ? true : false;
1152                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1153                 int curPrefix = size.sizePrefix | prefix1;
1154                 switch (curPrefix) {
1155                     case 0x66:
1156                         pre = VexSimdPrefix.VEX_SIMD_66;
1157                         break;
1158                     case 0xF2:
1159                         pre = VexSimdPrefix.VEX_SIMD_F2;
1160                         break;
1161                     case 0xF3:
1162                         pre = VexSimdPrefix.VEX_SIMD_F3;
1163                         break;
1164                     default:
1165                         pre = VexSimdPrefix.VEX_SIMD_NONE;
1166                         break;
1167                 }
1168                 switch (prefix2) {
1169                     case P_0F:
1170                         opc = VexOpcode.VEX_OPCODE_0F;
1171                         break;
1172                     case P_0F38:
1173                         opc = VexOpcode.VEX_OPCODE_0F_38;
1174                         break;
1175                     case P_0F3A:
1176                         opc = VexOpcode.VEX_OPCODE_0F_3A;
1177                         break;
1178                     default:
1179                         opc = VexOpcode.VEX_OPCODE_NONE;
1180                         break;
1181                 }
1182                 int encode;
1183                 if (noNds) {
1184                     encode = asm.simdPrefixAndEncode(src, Register.None, dst, pre, opc, attributes);
1185                 } else {
1186                     encode = asm.simdPrefixAndEncode(src, src, dst, pre, opc, attributes);
1187                 }
1188                 asm.emitByte(op);
1189                 asm.emitByte(0xC0 | encode);
1190             } else {
1191                 emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding);
1192                 asm.emitModRM(src, dst);
1193             }
1194         }
1195 
1196         public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) {
1197             assert verify(asm, size, null, src);
1198             boolean isSimd = false;
1199 
1200             switch (op) {
1201                 case 0x7E:
1202                 case 0x11:
1203                     isSimd = true;
1204                     break;
1205             }
1206 
1207             if (isSimd) {
1208                 int pre;
1209                 int opc;
1210                 boolean rexVexW = (size == QWORD) ? true : false;
1211                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1212                 int curPrefix = size.sizePrefix | prefix1;
1213                 switch (curPrefix) {
1214                     case 0x66:
1215                         pre = VexSimdPrefix.VEX_SIMD_66;
1216                         break;
1217                     case 0xF2:
1218                         pre = VexSimdPrefix.VEX_SIMD_F2;
1219                         break;
1220                     case 0xF3:
1221                         pre = VexSimdPrefix.VEX_SIMD_F3;
1222                         break;
1223                     default:
1224                         pre = VexSimdPrefix.VEX_SIMD_NONE;
1225                         break;
1226                 }
1227                 switch (prefix2) {
1228                     case P_0F:
1229                         opc = VexOpcode.VEX_OPCODE_0F;
1230                         break;
1231                     case P_0F38:
1232                         opc = VexOpcode.VEX_OPCODE_0F_38;
1233                         break;
1234                     case P_0F3A:
1235                         opc = VexOpcode.VEX_OPCODE_0F_3A;
1236                         break;
1237                     default:
1238                         opc = VexOpcode.VEX_OPCODE_NONE;
1239                         break;
1240                 }
1241                 asm.simdPrefix(src, Register.None, dst, pre, opc, attributes);
1242                 asm.emitByte(op);
1243                 asm.emitOperandHelper(src, dst, 0);
1244             } else {
1245                 emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0);
1246                 asm.emitOperandHelper(src, dst, 0);
1247             }
1248         }
1249     }
1250 
1251     /**
1252      * Opcodes with operand order of M.
1253      */
1254     public static class AMD64MOp extends AMD64Op {
1255         // @formatter:off
1256         public static final AMD64MOp NOT  = new AMD64MOp("NOT",  0xF7, 2);
1257         public static final AMD64MOp NEG  = new AMD64MOp("NEG",  0xF7, 3);
1258         public static final AMD64MOp MUL  = new AMD64MOp("MUL",  0xF7, 4);
1259         public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5);
1260         public static final AMD64MOp DIV  = new AMD64MOp("DIV",  0xF7, 6);
1261         public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7);
1262         public static final AMD64MOp INC  = new AMD64MOp("INC",  0xFF, 0);
1263         public static final AMD64MOp DEC  = new AMD64MOp("DEC",  0xFF, 1);
1264         public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6);
1265         public static final AMD64MOp POP  = new AMD64MOp("POP",  0x8F, 0, OpAssertion.No32BitAssertion);
1266         // @formatter:on
1267 
1268         private final int ext;
1269 
1270         protected AMD64MOp(String opcode, int op, int ext) {
1271             this(opcode, 0, op, ext);
1272         }
1273 
1274         protected AMD64MOp(String opcode, int prefix, int op, int ext) {
1275             this(opcode, prefix, op, ext, OpAssertion.IntegerAssertion);
1276         }
1277 
1278         protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) {
1279             this(opcode, 0, op, ext, assertion);
1280         }
1281 
1282         protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) {
1283             super(opcode, 0, prefix, op, assertion, null);
1284             this.ext = ext;
1285         }
1286 
1287         public final void emit(AMD64Assembler asm, OperandSize size, Register dst) {
1288             assert verify(asm, size, dst, null);
1289             emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
1290             asm.emitModRM(ext, dst);
1291         }
1292 
1293         public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) {
1294             assert verify(asm, size, null, null);
1295             emitOpcode(asm, size, getRXB(null, dst), 0, 0);
1296             asm.emitOperandHelper(ext, dst, 0);
1297         }
1298     }
1299 
1300     /**
1301      * Opcodes with operand order of MI.
1302      */
1303     public static class AMD64MIOp extends AMD64ImmOp {
1304         // @formatter:off
1305         public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true,  0xC6, 0, OpAssertion.ByteAssertion);
1306         public static final AMD64MIOp MOV  = new AMD64MIOp("MOV",  false, 0xC7, 0);
1307         public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0);
1308         // @formatter:on
1309 
1310         private final int ext;
1311 
1312         protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) {
1313             this(opcode, immIsByte, op, ext, OpAssertion.IntegerAssertion);
1314         }
1315 
1316         protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) {
1317             this(opcode, immIsByte, 0, op, ext, assertion);
1318         }
1319 
1320         protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) {
1321             super(opcode, immIsByte, prefix, op, assertion);
1322             this.ext = ext;
1323         }
1324 
1325         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) {
1326             assert verify(asm, size, dst, null);
1327             emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
1328             asm.emitModRM(ext, dst);
1329             emitImmediate(asm, size, imm);
1330         }
1331 
1332         public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) {
1333             assert verify(asm, size, null, null);
1334             emitOpcode(asm, size, getRXB(null, dst), 0, 0);
1335             asm.emitOperandHelper(ext, dst, immediateSize(size));
1336             emitImmediate(asm, size, imm);
1337         }
1338     }
1339 
1340     /**
1341      * Opcodes with operand order of RMI.
1342      *
1343      * We only have one form of round as the operation is always treated with single variant input,
1344      * making its extension to 3 address forms redundant.
1345      */
1346     public static class AMD64RMIOp extends AMD64ImmOp {
1347         // @formatter:off
1348         public static final AMD64RMIOp IMUL    = new AMD64RMIOp("IMUL", false, 0x69);
1349         public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true,  0x6B);
1350         public static final AMD64RMIOp ROUNDSS = new AMD64RMIOp("ROUNDSS", true, P_0F3A, 0x0A, OpAssertion.PackedDoubleAssertion);
1351         public static final AMD64RMIOp ROUNDSD = new AMD64RMIOp("ROUNDSD", true, P_0F3A, 0x0B, OpAssertion.PackedDoubleAssertion);
1352         // @formatter:on
1353 
1354         protected AMD64RMIOp(String opcode, boolean immIsByte, int op) {
1355             this(opcode, immIsByte, 0, op, OpAssertion.IntegerAssertion);
1356         }
1357 
1358         protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
1359             super(opcode, immIsByte, prefix, op, assertion);
1360         }
1361 
1362         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) {
1363             assert verify(asm, size, dst, src);
1364             boolean isSimd = false;
1365             boolean noNds = false;
1366 
1367             switch (op) {
1368                 case 0x0A:
1369                 case 0x0B:
1370                     isSimd = true;
1371                     noNds = true;
1372                     break;
1373             }
1374 
1375             if (isSimd) {
1376                 int pre;
1377                 int opc;
1378                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1379                 int curPrefix = size.sizePrefix | prefix1;
1380                 switch (curPrefix) {
1381                     case 0x66:
1382                         pre = VexSimdPrefix.VEX_SIMD_66;
1383                         break;
1384                     case 0xF2:
1385                         pre = VexSimdPrefix.VEX_SIMD_F2;
1386                         break;
1387                     case 0xF3:
1388                         pre = VexSimdPrefix.VEX_SIMD_F3;
1389                         break;
1390                     default:
1391                         pre = VexSimdPrefix.VEX_SIMD_NONE;
1392                         break;
1393                 }
1394                 switch (prefix2) {
1395                     case P_0F:
1396                         opc = VexOpcode.VEX_OPCODE_0F;
1397                         break;
1398                     case P_0F38:
1399                         opc = VexOpcode.VEX_OPCODE_0F_38;
1400                         break;
1401                     case P_0F3A:
1402                         opc = VexOpcode.VEX_OPCODE_0F_3A;
1403                         break;
1404                     default:
1405                         opc = VexOpcode.VEX_OPCODE_NONE;
1406                         break;
1407                 }
1408                 int encode;
1409                 if (noNds) {
1410                     encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes);
1411                 } else {
1412                     encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes);
1413                 }
1414                 asm.emitByte(op);
1415                 asm.emitByte(0xC0 | encode);
1416                 emitImmediate(asm, size, imm);
1417             } else {
1418                 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
1419                 asm.emitModRM(dst, src);
1420                 emitImmediate(asm, size, imm);
1421             }
1422         }
1423 
1424         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) {
1425             assert verify(asm, size, dst, null);
1426 
1427             boolean isSimd = false;
1428             boolean noNds = false;
1429 
1430             switch (op) {
1431                 case 0x0A:
1432                 case 0x0B:
1433                     isSimd = true;
1434                     noNds = true;
1435                     break;
1436             }
1437 
1438             if (isSimd) {
1439                 int pre;
1440                 int opc;
1441                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1442                 int curPrefix = size.sizePrefix | prefix1;
1443                 switch (curPrefix) {
1444                     case 0x66:
1445                         pre = VexSimdPrefix.VEX_SIMD_66;
1446                         break;
1447                     case 0xF2:
1448                         pre = VexSimdPrefix.VEX_SIMD_F2;
1449                         break;
1450                     case 0xF3:
1451                         pre = VexSimdPrefix.VEX_SIMD_F3;
1452                         break;
1453                     default:
1454                         pre = VexSimdPrefix.VEX_SIMD_NONE;
1455                         break;
1456                 }
1457                 switch (prefix2) {
1458                     case P_0F:
1459                         opc = VexOpcode.VEX_OPCODE_0F;
1460                         break;
1461                     case P_0F38:
1462                         opc = VexOpcode.VEX_OPCODE_0F_38;
1463                         break;
1464                     case P_0F3A:
1465                         opc = VexOpcode.VEX_OPCODE_0F_3A;
1466                         break;
1467                     default:
1468                         opc = VexOpcode.VEX_OPCODE_NONE;
1469                         break;
1470                 }
1471 
1472                 if (noNds) {
1473                     asm.simdPrefix(dst, Register.None, src, pre, opc, attributes);
1474                 } else {
1475                     asm.simdPrefix(dst, dst, src, pre, opc, attributes);
1476                 }
1477                 asm.emitByte(op);
1478                 asm.emitOperandHelper(dst, src, immediateSize(size));
1479                 emitImmediate(asm, size, imm);
1480             } else {
1481                 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
1482                 asm.emitOperandHelper(dst, src, immediateSize(size));
1483                 emitImmediate(asm, size, imm);
1484             }
1485         }
1486     }
1487 
1488     public static class SSEOp extends AMD64RMOp {
1489         // @formatter:off
1490         public static final SSEOp CVTSI2SS  = new SSEOp("CVTSI2SS",  0xF3, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion);
1491         public static final SSEOp CVTSI2SD  = new SSEOp("CVTSI2SS",  0xF2, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion);
1492         public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion);
1493         public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion);
1494         public static final SSEOp UCOMIS    = new SSEOp("UCOMIS",          P_0F, 0x2E, OpAssertion.PackedFloatingAssertion);
1495         public static final SSEOp SQRT      = new SSEOp("SQRT",            P_0F, 0x51);
1496         public static final SSEOp AND       = new SSEOp("AND",             P_0F, 0x54, OpAssertion.PackedFloatingAssertion);
1497         public static final SSEOp ANDN      = new SSEOp("ANDN",            P_0F, 0x55, OpAssertion.PackedFloatingAssertion);
1498         public static final SSEOp OR        = new SSEOp("OR",              P_0F, 0x56, OpAssertion.PackedFloatingAssertion);
1499         public static final SSEOp XOR       = new SSEOp("XOR",             P_0F, 0x57, OpAssertion.PackedFloatingAssertion);
1500         public static final SSEOp ADD       = new SSEOp("ADD",             P_0F, 0x58);
1501         public static final SSEOp MUL       = new SSEOp("MUL",             P_0F, 0x59);
1502         public static final SSEOp CVTSS2SD  = new SSEOp("CVTSS2SD",        P_0F, 0x5A, OpAssertion.SingleAssertion);
1503         public static final SSEOp CVTSD2SS  = new SSEOp("CVTSD2SS",        P_0F, 0x5A, OpAssertion.DoubleAssertion);
1504         public static final SSEOp SUB       = new SSEOp("SUB",             P_0F, 0x5C);
1505         public static final SSEOp MIN       = new SSEOp("MIN",             P_0F, 0x5D);
1506         public static final SSEOp DIV       = new SSEOp("DIV",             P_0F, 0x5E);
1507         public static final SSEOp MAX       = new SSEOp("MAX",             P_0F, 0x5F);
1508         // @formatter:on
1509 
1510         protected SSEOp(String opcode, int prefix, int op) {
1511             this(opcode, prefix, op, OpAssertion.FloatingAssertion);
1512         }
1513 
1514         protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) {
1515             this(opcode, 0, prefix, op, assertion);
1516         }
1517 
1518         protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) {
1519             super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2);
1520         }
1521     }
1522 
1523     public static class AVXOp extends AMD64RRMOp {
1524         // @formatter:off
1525         public static final AVXOp AND       = new AVXOp("AND",             P_0F, 0x54, OpAssertion.PackedFloatingAssertion);
1526         public static final AVXOp ANDN      = new AVXOp("ANDN",            P_0F, 0x55, OpAssertion.PackedFloatingAssertion);
1527         public static final AVXOp OR        = new AVXOp("OR",              P_0F, 0x56, OpAssertion.PackedFloatingAssertion);
1528         public static final AVXOp XOR       = new AVXOp("XOR",             P_0F, 0x57, OpAssertion.PackedFloatingAssertion);
1529         public static final AVXOp ADD       = new AVXOp("ADD",             P_0F, 0x58);
1530         public static final AVXOp MUL       = new AVXOp("MUL",             P_0F, 0x59);
1531         public static final AVXOp SUB       = new AVXOp("SUB",             P_0F, 0x5C);
1532         public static final AVXOp MIN       = new AVXOp("MIN",             P_0F, 0x5D);
1533         public static final AVXOp DIV       = new AVXOp("DIV",             P_0F, 0x5E);
1534         public static final AVXOp MAX       = new AVXOp("MAX",             P_0F, 0x5F);
1535         // @formatter:on
1536 
1537         protected AVXOp(String opcode, int prefix, int op) {
1538             this(opcode, prefix, op, OpAssertion.FloatingAssertion);
1539         }
1540 
1541         protected AVXOp(String opcode, int prefix, int op, OpAssertion assertion) {
1542             this(opcode, 0, prefix, op, assertion);
1543         }
1544 
1545         protected AVXOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) {
1546             super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.AVX);
1547         }
1548     }
1549 
1550     /**
1551      * Arithmetic operation with operand order of RM, MR or MI.
1552      */
1553     public static final class AMD64BinaryArithmetic {
1554         // @formatter:off
1555         public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0);
1556         public static final AMD64BinaryArithmetic OR  = new AMD64BinaryArithmetic("OR",  1);
1557         public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2);
1558         public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3);
1559         public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4);
1560         public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5);
1561         public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6);
1562         public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7);
1563         // @formatter:on
1564 
1565         private final AMD64MIOp byteImmOp;
1566         private final AMD64MROp byteMrOp;
1567         private final AMD64RMOp byteRmOp;
1568 
1569         private final AMD64MIOp immOp;
1570         private final AMD64MIOp immSxOp;
1571         private final AMD64MROp mrOp;
1572         private final AMD64RMOp rmOp;
1573 
1574         private AMD64BinaryArithmetic(String opcode, int code) {
1575             int baseOp = code << 3;
1576 
1577             byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion);
1578             byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion);
1579             byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion);
1580 
1581             immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.IntegerAssertion);
1582             immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.IntegerAssertion);
1583             mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.IntegerAssertion);
1584             rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.IntegerAssertion);
1585         }
1586 
1587         public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) {
1588             if (size == BYTE) {
1589                 return byteImmOp;
1590             } else if (sx) {
1591                 return immSxOp;
1592             } else {
1593                 return immOp;
1594             }
1595         }
1596 
1597         public AMD64MROp getMROpcode(OperandSize size) {
1598             if (size == BYTE) {
1599                 return byteMrOp;
1600             } else {
1601                 return mrOp;
1602             }
1603         }
1604 
1605         public AMD64RMOp getRMOpcode(OperandSize size) {
1606             if (size == BYTE) {
1607                 return byteRmOp;
1608             } else {
1609                 return rmOp;
1610             }
1611         }
1612     }
1613 
1614     /**
1615      * Shift operation with operand order of M1, MC or MI.
1616      */
1617     public static final class AMD64Shift {
1618         // @formatter:off
1619         public static final AMD64Shift ROL = new AMD64Shift("ROL", 0);
1620         public static final AMD64Shift ROR = new AMD64Shift("ROR", 1);
1621         public static final AMD64Shift RCL = new AMD64Shift("RCL", 2);
1622         public static final AMD64Shift RCR = new AMD64Shift("RCR", 3);
1623         public static final AMD64Shift SHL = new AMD64Shift("SHL", 4);
1624         public static final AMD64Shift SHR = new AMD64Shift("SHR", 5);
1625         public static final AMD64Shift SAR = new AMD64Shift("SAR", 7);
1626         // @formatter:on
1627 
1628         public final AMD64MOp m1Op;
1629         public final AMD64MOp mcOp;
1630         public final AMD64MIOp miOp;
1631 
1632         private AMD64Shift(String opcode, int code) {
1633             m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.IntegerAssertion);
1634             mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.IntegerAssertion);
1635             miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.IntegerAssertion);
1636         }
1637     }
1638 
1639     public final void addl(AMD64Address dst, int imm32) {
1640         ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1641     }
1642 
1643     public final void addl(Register dst, int imm32) {
1644         ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1645     }
1646 
1647     public final void addl(Register dst, Register src) {
1648         ADD.rmOp.emit(this, DWORD, dst, src);
1649     }
1650 
1651     public final void addpd(Register dst, Register src) {
1652         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1653         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1654         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1655         emitByte(0x58);
1656         emitByte(0xC0 | encode);
1657     }
1658 
1659     public final void addpd(Register dst, AMD64Address src) {
1660         assert dst.getRegisterCategory().equals(AMD64.XMM);
1661         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1662         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1663         emitByte(0x58);
1664         emitOperandHelper(dst, src, 0);
1665     }
1666 
1667     public final void addsd(Register dst, Register src) {
1668         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1669         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1670         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1671         emitByte(0x58);
1672         emitByte(0xC0 | encode);
1673     }
1674 
1675     public final void addsd(Register dst, AMD64Address src) {
1676         assert dst.getRegisterCategory().equals(AMD64.XMM);
1677         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1678         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1679         emitByte(0x58);
1680         emitOperandHelper(dst, src, 0);
1681     }
1682 
1683     private void addrNop4() {
1684         // 4 bytes: NOP DWORD PTR [EAX+0]
1685         emitByte(0x0F);
1686         emitByte(0x1F);
1687         emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc);
1688         emitByte(0); // 8-bits offset (1 byte)
1689     }
1690 
1691     private void addrNop5() {
1692         // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1693         emitByte(0x0F);
1694         emitByte(0x1F);
1695         emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4);
1696         emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
1697         emitByte(0); // 8-bits offset (1 byte)
1698     }
1699 
1700     private void addrNop7() {
1701         // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1702         emitByte(0x0F);
1703         emitByte(0x1F);
1704         emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc);
1705         emitInt(0); // 32-bits offset (4 bytes)
1706     }
1707 
1708     private void addrNop8() {
1709         // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1710         emitByte(0x0F);
1711         emitByte(0x1F);
1712         emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4);
1713         emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
1714         emitInt(0); // 32-bits offset (4 bytes)
1715     }
1716 
1717     public final void andl(Register dst, int imm32) {
1718         AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1719     }
1720 
1721     public final void andl(Register dst, Register src) {
1722         AND.rmOp.emit(this, DWORD, dst, src);
1723     }
1724 
1725     public final void andpd(Register dst, Register src) {
1726         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1727         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1728         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1729         emitByte(0x54);
1730         emitByte(0xC0 | encode);
1731     }
1732 
1733     public final void andpd(Register dst, AMD64Address src) {
1734         assert dst.getRegisterCategory().equals(AMD64.XMM);
1735         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1736         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1737         emitByte(0x54);
1738         emitOperandHelper(dst, src, 0);
1739     }
1740 
1741     public final void bsrl(Register dst, Register src) {
1742         int encode = prefixAndEncode(dst.encoding(), src.encoding());
1743         emitByte(0x0F);
1744         emitByte(0xBD);
1745         emitByte(0xC0 | encode);
1746     }
1747 
1748     public final void bswapl(Register reg) {
1749         int encode = prefixAndEncode(reg.encoding);
1750         emitByte(0x0F);
1751         emitByte(0xC8 | encode);
1752     }
1753 
1754     public final void cdql() {
1755         emitByte(0x99);
1756     }
1757 
1758     public final void cmovl(ConditionFlag cc, Register dst, Register src) {
1759         int encode = prefixAndEncode(dst.encoding, src.encoding);
1760         emitByte(0x0F);
1761         emitByte(0x40 | cc.getValue());
1762         emitByte(0xC0 | encode);
1763     }
1764 
1765     public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) {
1766         prefix(src, dst);
1767         emitByte(0x0F);
1768         emitByte(0x40 | cc.getValue());
1769         emitOperandHelper(dst, src, 0);
1770     }
1771 
1772     public final void cmpl(Register dst, int imm32) {
1773         CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1774     }
1775 
1776     public final void cmpl(Register dst, Register src) {
1777         CMP.rmOp.emit(this, DWORD, dst, src);
1778     }
1779 
1780     public final void cmpl(Register dst, AMD64Address src) {
1781         CMP.rmOp.emit(this, DWORD, dst, src);
1782     }
1783 
1784     public final void cmpl(AMD64Address dst, int imm32) {
1785         CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1786     }
1787 
1788     // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax,
1789     // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,.
1790     // The ZF is set if the compared values were equal, and cleared otherwise.
1791     public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg
1792         prefix(adr, reg);
1793         emitByte(0x0F);
1794         emitByte(0xB1);
1795         emitOperandHelper(reg, adr, 0);
1796     }
1797 
1798     public final void cvtsi2sdl(Register dst, Register src) {
1799         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
1800         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1801         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1802         emitByte(0x2A);
1803         emitByte(0xC0 | encode);
1804     }
1805 
1806     public final void cvttsd2sil(Register dst, Register src) {
1807         assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
1808         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1809         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1810         emitByte(0x2C);
1811         emitByte(0xC0 | encode);
1812     }
1813 
1814     protected final void decl(AMD64Address dst) {
1815         prefix(dst);
1816         emitByte(0xFF);
1817         emitOperandHelper(1, dst, 0);
1818     }
1819 
1820     public final void divsd(Register dst, Register src) {
1821         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1822         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1823         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1824         emitByte(0x5E);
1825         emitByte(0xC0 | encode);
1826     }
1827 
1828     public final void hlt() {
1829         emitByte(0xF4);
1830     }
1831 
1832     public final void imull(Register dst, Register src, int value) {
1833         if (isByte(value)) {
1834             AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value);
1835         } else {
1836             AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value);
1837         }
1838     }
1839 
1840     protected final void incl(AMD64Address dst) {
1841         prefix(dst);
1842         emitByte(0xFF);
1843         emitOperandHelper(0, dst, 0);
1844     }
1845 
1846     public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) {
1847         int shortSize = 2;
1848         int longSize = 6;
1849         long disp = jumpTarget - position();
1850         if (!forceDisp32 && isByte(disp - shortSize)) {
1851             // 0111 tttn #8-bit disp
1852             emitByte(0x70 | cc.getValue());
1853             emitByte((int) ((disp - shortSize) & 0xFF));
1854         } else {
1855             // 0000 1111 1000 tttn #32-bit disp
1856             assert isInt(disp - longSize) : "must be 32bit offset (call4)";
1857             emitByte(0x0F);
1858             emitByte(0x80 | cc.getValue());
1859             emitInt((int) (disp - longSize));
1860         }
1861     }
1862 
1863     public final void jcc(ConditionFlag cc, Label l) {
1864         assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc";
1865         if (l.isBound()) {
1866             jcc(cc, l.position(), false);
1867         } else {
1868             // Note: could eliminate cond. jumps to this jump if condition
1869             // is the same however, seems to be rather unlikely case.
1870             // Note: use jccb() if label to be bound is very close to get
1871             // an 8-bit displacement
1872             l.addPatchAt(position());
1873             emitByte(0x0F);
1874             emitByte(0x80 | cc.getValue());
1875             emitInt(0);
1876         }
1877 
1878     }
1879 
1880     public final void jccb(ConditionFlag cc, Label l) {
1881         if (l.isBound()) {
1882             int shortSize = 2;
1883             int entry = l.position();
1884             assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp";
1885             long disp = entry - position();
1886             // 0111 tttn #8-bit disp
1887             emitByte(0x70 | cc.getValue());
1888             emitByte((int) ((disp - shortSize) & 0xFF));
1889         } else {
1890             l.addPatchAt(position());
1891             emitByte(0x70 | cc.getValue());
1892             emitByte(0);
1893         }
1894     }
1895 
1896     public final void jmp(int jumpTarget, boolean forceDisp32) {
1897         int shortSize = 2;
1898         int longSize = 5;
1899         long disp = jumpTarget - position();
1900         if (!forceDisp32 && isByte(disp - shortSize)) {
1901             emitByte(0xEB);
1902             emitByte((int) ((disp - shortSize) & 0xFF));
1903         } else {
1904             emitByte(0xE9);
1905             emitInt((int) (disp - longSize));
1906         }
1907     }
1908 
1909     @Override
1910     public final void jmp(Label l) {
1911         if (l.isBound()) {
1912             jmp(l.position(), false);
1913         } else {
1914             // By default, forward jumps are always 32-bit displacements, since
1915             // we can't yet know where the label will be bound. If you're sure that
1916             // the forward jump will not run beyond 256 bytes, use jmpb to
1917             // force an 8-bit displacement.
1918 
1919             l.addPatchAt(position());
1920             emitByte(0xE9);
1921             emitInt(0);
1922         }
1923     }
1924 
1925     public final void jmp(Register entry) {
1926         int encode = prefixAndEncode(entry.encoding);
1927         emitByte(0xFF);
1928         emitByte(0xE0 | encode);
1929     }
1930 
1931     public final void jmp(AMD64Address adr) {
1932         prefix(adr);
1933         emitByte(0xFF);
1934         emitOperandHelper(rsp, adr, 0);
1935     }
1936 
1937     public final void jmpb(Label l) {
1938         if (l.isBound()) {
1939             int shortSize = 2;
1940             int entry = l.position();
1941             assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp";
1942             long offs = entry - position();
1943             emitByte(0xEB);
1944             emitByte((int) ((offs - shortSize) & 0xFF));
1945         } else {
1946 
1947             l.addPatchAt(position());
1948             emitByte(0xEB);
1949             emitByte(0);
1950         }
1951     }
1952 
1953     public final void leaq(Register dst, AMD64Address src) {
1954         prefixq(src, dst);
1955         emitByte(0x8D);
1956         emitOperandHelper(dst, src, 0);
1957     }
1958 
1959     public final void leave() {
1960         emitByte(0xC9);
1961     }
1962 
1963     public final void lock() {
1964         emitByte(0xF0);
1965     }
1966 
1967     public final void movapd(Register dst, Register src) {
1968         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1969         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1970         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1971         emitByte(0x28);
1972         emitByte(0xC0 | encode);
1973     }
1974 
1975     public final void movaps(Register dst, Register src) {
1976         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1977         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1978         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
1979         emitByte(0x28);
1980         emitByte(0xC0 | encode);
1981     }
1982 
1983     public final void movb(AMD64Address dst, int imm8) {
1984         prefix(dst);
1985         emitByte(0xC6);
1986         emitOperandHelper(0, dst, 1);
1987         emitByte(imm8);
1988     }
1989 
1990     public final void movb(AMD64Address dst, Register src) {
1991         assert src.getRegisterCategory().equals(AMD64.CPU) : "must have byte register";
1992         prefix(dst, src, true);
1993         emitByte(0x88);
1994         emitOperandHelper(src, dst, 0);
1995     }
1996 
1997     public final void movl(Register dst, int imm32) {
1998         int encode = prefixAndEncode(dst.encoding);
1999         emitByte(0xB8 | encode);
2000         emitInt(imm32);
2001     }
2002 
2003     public final void movl(Register dst, Register src) {
2004         int encode = prefixAndEncode(dst.encoding, src.encoding);
2005         emitByte(0x8B);
2006         emitByte(0xC0 | encode);
2007     }
2008 
2009     public final void movl(Register dst, AMD64Address src) {
2010         prefix(src, dst);
2011         emitByte(0x8B);
2012         emitOperandHelper(dst, src, 0);
2013     }
2014 
2015     public final void movl(AMD64Address dst, int imm32) {
2016         prefix(dst);
2017         emitByte(0xC7);
2018         emitOperandHelper(0, dst, 4);
2019         emitInt(imm32);
2020     }
2021 
2022     public final void movl(AMD64Address dst, Register src) {
2023         prefix(dst, src);
2024         emitByte(0x89);
2025         emitOperandHelper(src, dst, 0);
2026     }
2027 
2028     /**
2029      * New CPUs require use of movsd and movss to avoid partial register stall when loading from
2030      * memory. But for old Opteron use movlpd instead of movsd. The selection is done in
2031      * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and
2032      * {@link AMD64MacroAssembler#movflt(Register, Register)}.
2033      */
2034     public final void movlpd(Register dst, AMD64Address src) {
2035         assert dst.getRegisterCategory().equals(AMD64.XMM);
2036         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2037         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2038         emitByte(0x12);
2039         emitOperandHelper(dst, src, 0);
2040     }
2041 
2042     public final void movlhps(Register dst, Register src) {
2043         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2044         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2045         int encode = simdPrefixAndEncode(dst, src, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2046         emitByte(0x16);
2047         emitByte(0xC0 | encode);
2048     }
2049 
2050     public final void movq(Register dst, AMD64Address src) {
2051         movq(dst, src, false);
2052     }
2053 
2054     public final void movq(Register dst, AMD64Address src, boolean wide) {
2055         if (dst.getRegisterCategory().equals(AMD64.XMM)) {
2056             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ wide, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2057             simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2058             emitByte(0x7E);
2059             emitOperandHelper(dst, src, wide, 0);
2060         } else {
2061             // gpr version of movq
2062             prefixq(src, dst);
2063             emitByte(0x8B);
2064             emitOperandHelper(dst, src, wide, 0);
2065         }
2066     }
2067 
2068     public final void movq(Register dst, Register src) {
2069         int encode = prefixqAndEncode(dst.encoding, src.encoding);
2070         emitByte(0x8B);
2071         emitByte(0xC0 | encode);
2072     }
2073 
2074     public final void movq(AMD64Address dst, Register src) {
2075         if (src.getRegisterCategory().equals(AMD64.XMM)) {
2076             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2077             simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2078             emitByte(0xD6);
2079             emitOperandHelper(src, dst, 0);
2080         } else {
2081             // gpr version of movq
2082             prefixq(dst, src);
2083             emitByte(0x89);
2084             emitOperandHelper(src, dst, 0);
2085         }
2086     }
2087 
2088     public final void movsbl(Register dst, AMD64Address src) {
2089         prefix(src, dst);
2090         emitByte(0x0F);
2091         emitByte(0xBE);
2092         emitOperandHelper(dst, src, 0);
2093     }
2094 
2095     public final void movsbl(Register dst, Register src) {
2096         int encode = prefixAndEncode(dst.encoding, false, src.encoding, true);
2097         emitByte(0x0F);
2098         emitByte(0xBE);
2099         emitByte(0xC0 | encode);
2100     }
2101 
2102     public final void movsbq(Register dst, AMD64Address src) {
2103         prefixq(src, dst);
2104         emitByte(0x0F);
2105         emitByte(0xBE);
2106         emitOperandHelper(dst, src, 0);
2107     }
2108 
2109     public final void movsbq(Register dst, Register src) {
2110         int encode = prefixqAndEncode(dst.encoding, src.encoding);
2111         emitByte(0x0F);
2112         emitByte(0xBE);
2113         emitByte(0xC0 | encode);
2114     }
2115 
2116     public final void movsd(Register dst, Register src) {
2117         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2118         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2119         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2120         emitByte(0x10);
2121         emitByte(0xC0 | encode);
2122     }
2123 
2124     public final void movsd(Register dst, AMD64Address src) {
2125         assert dst.getRegisterCategory().equals(AMD64.XMM);
2126         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2127         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2128         emitByte(0x10);
2129         emitOperandHelper(dst, src, 0);
2130     }
2131 
2132     public final void movsd(AMD64Address dst, Register src) {
2133         assert src.getRegisterCategory().equals(AMD64.XMM);
2134         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2135         simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2136         emitByte(0x11);
2137         emitOperandHelper(src, dst, 0);
2138     }
2139 
2140     public final void movss(Register dst, Register src) {
2141         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2142         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2143         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2144         emitByte(0x10);
2145         emitByte(0xC0 | encode);
2146     }
2147 
2148     public final void movss(Register dst, AMD64Address src) {
2149         assert dst.getRegisterCategory().equals(AMD64.XMM);
2150         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2151         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2152         emitByte(0x10);
2153         emitOperandHelper(dst, src, 0);
2154     }
2155 
2156     public final void movss(AMD64Address dst, Register src) {
2157         assert src.getRegisterCategory().equals(AMD64.XMM);
2158         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2159         simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2160         emitByte(0x11);
2161         emitOperandHelper(src, dst, 0);
2162     }
2163 
2164     public final void mulpd(Register dst, Register src) {
2165         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2166         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2167         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2168         emitByte(0x59);
2169         emitByte(0xC0 | encode);
2170     }
2171 
2172     public final void mulpd(Register dst, AMD64Address src) {
2173         assert dst.getRegisterCategory().equals(AMD64.XMM);
2174         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2175         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2176         emitByte(0x59);
2177         emitOperandHelper(dst, src, 0);
2178     }
2179 
2180     public final void mulsd(Register dst, Register src) {
2181         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2182         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2183         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2184         emitByte(0x59);
2185         emitByte(0xC0 | encode);
2186     }
2187 
2188     public final void mulsd(Register dst, AMD64Address src) {
2189         assert dst.getRegisterCategory().equals(AMD64.XMM);
2190         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2191         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2192         emitByte(0x59);
2193         emitOperandHelper(dst, src, 0);
2194     }
2195 
2196     public final void mulss(Register dst, Register src) {
2197         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2198         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2199         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2200         emitByte(0x59);
2201         emitByte(0xC0 | encode);
2202     }
2203 
2204     public final void movswl(Register dst, AMD64Address src) {
2205         prefix(src, dst);
2206         emitByte(0x0F);
2207         emitByte(0xBF);
2208         emitOperandHelper(dst, src, 0);
2209     }
2210 
2211     public final void movw(AMD64Address dst, int imm16) {
2212         emitByte(0x66); // switch to 16-bit mode
2213         prefix(dst);
2214         emitByte(0xC7);
2215         emitOperandHelper(0, dst, 2);
2216         emitShort(imm16);
2217     }
2218 
2219     public final void movw(AMD64Address dst, Register src) {
2220         emitByte(0x66);
2221         prefix(dst, src);
2222         emitByte(0x89);
2223         emitOperandHelper(src, dst, 0);
2224     }
2225 
2226     public final void movzbl(Register dst, AMD64Address src) {
2227         prefix(src, dst);
2228         emitByte(0x0F);
2229         emitByte(0xB6);
2230         emitOperandHelper(dst, src, 0);
2231     }
2232 
2233     public final void movzwl(Register dst, AMD64Address src) {
2234         prefix(src, dst);
2235         emitByte(0x0F);
2236         emitByte(0xB7);
2237         emitOperandHelper(dst, src, 0);
2238     }
2239 
2240     public final void negl(Register dst) {
2241         NEG.emit(this, DWORD, dst);
2242     }
2243 
2244     public final void notl(Register dst) {
2245         NOT.emit(this, DWORD, dst);
2246     }
2247 
2248     @Override
2249     public final void ensureUniquePC() {
2250         nop();
2251     }
2252 
2253     public final void nop() {
2254         nop(1);
2255     }
2256 
2257     public void nop(int count) {
2258         int i = count;
2259         if (UseNormalNop) {
2260             assert i > 0 : " ";
2261             // The fancy nops aren't currently recognized by debuggers making it a
2262             // pain to disassemble code while debugging. If assert are on clearly
2263             // speed is not an issue so simply use the single byte traditional nop
2264             // to do alignment.
2265 
2266             for (; i > 0; i--) {
2267                 emitByte(0x90);
2268             }
2269             return;
2270         }
2271 
2272         if (UseAddressNop) {
2273             //
2274             // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD.
2275             // 1: 0x90
2276             // 2: 0x66 0x90
2277             // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2278             // 4: 0x0F 0x1F 0x40 0x00
2279             // 5: 0x0F 0x1F 0x44 0x00 0x00
2280             // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2281             // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2282             // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2283             // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2284             // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2285             // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2286 
2287             // The rest coding is AMD specific - use consecutive Address nops
2288 
2289             // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2290             // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2291             // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2292             // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2293             // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2294             // Size prefixes (0x66) are added for larger sizes
2295 
2296             while (i >= 22) {
2297                 i -= 11;
2298                 emitByte(0x66); // size prefix
2299                 emitByte(0x66); // size prefix
2300                 emitByte(0x66); // size prefix
2301                 addrNop8();
2302             }
2303             // Generate first nop for size between 21-12
2304             switch (i) {
2305                 case 21:
2306                     i -= 11;
2307                     emitByte(0x66); // size prefix
2308                     emitByte(0x66); // size prefix
2309                     emitByte(0x66); // size prefix
2310                     addrNop8();
2311                     break;
2312                 case 20:
2313                 case 19:
2314                     i -= 10;
2315                     emitByte(0x66); // size prefix
2316                     emitByte(0x66); // size prefix
2317                     addrNop8();
2318                     break;
2319                 case 18:
2320                 case 17:
2321                     i -= 9;
2322                     emitByte(0x66); // size prefix
2323                     addrNop8();
2324                     break;
2325                 case 16:
2326                 case 15:
2327                     i -= 8;
2328                     addrNop8();
2329                     break;
2330                 case 14:
2331                 case 13:
2332                     i -= 7;
2333                     addrNop7();
2334                     break;
2335                 case 12:
2336                     i -= 6;
2337                     emitByte(0x66); // size prefix
2338                     addrNop5();
2339                     break;
2340                 default:
2341                     assert i < 12;
2342             }
2343 
2344             // Generate second nop for size between 11-1
2345             switch (i) {
2346                 case 11:
2347                     emitByte(0x66); // size prefix
2348                     emitByte(0x66); // size prefix
2349                     emitByte(0x66); // size prefix
2350                     addrNop8();
2351                     break;
2352                 case 10:
2353                     emitByte(0x66); // size prefix
2354                     emitByte(0x66); // size prefix
2355                     addrNop8();
2356                     break;
2357                 case 9:
2358                     emitByte(0x66); // size prefix
2359                     addrNop8();
2360                     break;
2361                 case 8:
2362                     addrNop8();
2363                     break;
2364                 case 7:
2365                     addrNop7();
2366                     break;
2367                 case 6:
2368                     emitByte(0x66); // size prefix
2369                     addrNop5();
2370                     break;
2371                 case 5:
2372                     addrNop5();
2373                     break;
2374                 case 4:
2375                     addrNop4();
2376                     break;
2377                 case 3:
2378                     // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2379                     emitByte(0x66); // size prefix
2380                     emitByte(0x66); // size prefix
2381                     emitByte(0x90); // nop
2382                     break;
2383                 case 2:
2384                     emitByte(0x66); // size prefix
2385                     emitByte(0x90); // nop
2386                     break;
2387                 case 1:
2388                     emitByte(0x90); // nop
2389                     break;
2390                 default:
2391                     assert i == 0;
2392             }
2393             return;
2394         }
2395 
2396         // Using nops with size prefixes "0x66 0x90".
2397         // From AMD Optimization Guide:
2398         // 1: 0x90
2399         // 2: 0x66 0x90
2400         // 3: 0x66 0x66 0x90
2401         // 4: 0x66 0x66 0x66 0x90
2402         // 5: 0x66 0x66 0x90 0x66 0x90
2403         // 6: 0x66 0x66 0x90 0x66 0x66 0x90
2404         // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2405         // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2406         // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2407         // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2408         //
2409         while (i > 12) {
2410             i -= 4;
2411             emitByte(0x66); // size prefix
2412             emitByte(0x66);
2413             emitByte(0x66);
2414             emitByte(0x90); // nop
2415         }
2416         // 1 - 12 nops
2417         if (i > 8) {
2418             if (i > 9) {
2419                 i -= 1;
2420                 emitByte(0x66);
2421             }
2422             i -= 3;
2423             emitByte(0x66);
2424             emitByte(0x66);
2425             emitByte(0x90);
2426         }
2427         // 1 - 8 nops
2428         if (i > 4) {
2429             if (i > 6) {
2430                 i -= 1;
2431                 emitByte(0x66);
2432             }
2433             i -= 3;
2434             emitByte(0x66);
2435             emitByte(0x66);
2436             emitByte(0x90);
2437         }
2438         switch (i) {
2439             case 4:
2440                 emitByte(0x66);
2441                 emitByte(0x66);
2442                 emitByte(0x66);
2443                 emitByte(0x90);
2444                 break;
2445             case 3:
2446                 emitByte(0x66);
2447                 emitByte(0x66);
2448                 emitByte(0x90);
2449                 break;
2450             case 2:
2451                 emitByte(0x66);
2452                 emitByte(0x90);
2453                 break;
2454             case 1:
2455                 emitByte(0x90);
2456                 break;
2457             default:
2458                 assert i == 0;
2459         }
2460     }
2461 
2462     public final void orl(Register dst, Register src) {
2463         OR.rmOp.emit(this, DWORD, dst, src);
2464     }
2465 
2466     public final void orl(Register dst, int imm32) {
2467         OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2468     }
2469 
2470     public final void pop(Register dst) {
2471         int encode = prefixAndEncode(dst.encoding);
2472         emitByte(0x58 | encode);
2473     }
2474 
2475     public void popfq() {
2476         emitByte(0x9D);
2477     }
2478 
2479     public final void ptest(Register dst, Register src) {
2480         assert supports(CPUFeature.SSE4_1);
2481         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2482         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2483         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
2484         emitByte(0x17);
2485         emitByte(0xC0 | encode);
2486     }
2487 
2488     public final void vptest(Register dst, Register src) {
2489         assert supports(CPUFeature.AVX);
2490         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2491         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2492         int encode = vexPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
2493         emitByte(0x17);
2494         emitByte(0xC0 | encode);
2495     }
2496 
2497     void pcmpestri(Register dst, AMD64Address src, int imm8) {
2498         assert supports(CPUFeature.SSE4_2);
2499         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2500         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_3A, attributes);
2501         emitByte(0x61);
2502         emitOperandHelper(dst, src, 0);
2503         emitByte(imm8);
2504     }
2505 
2506     void pcmpestri(Register dst, Register src, int imm8) {
2507         assert supports(CPUFeature.SSE4_2);
2508         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2509         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_3A, attributes);
2510         emitByte(0x61);
2511         emitByte(0xC0 | encode);
2512         emitByte(imm8);
2513     }
2514 
2515     public final void push(Register src) {
2516         int encode = prefixAndEncode(src.encoding);
2517         emitByte(0x50 | encode);
2518     }
2519 
2520     public void pushfq() {
2521         emitByte(0x9c);
2522     }
2523 
2524     public final void paddd(Register dst, Register src) {
2525         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2526         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2527         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2528         emitByte(0xFE);
2529         emitByte(0xC0 | encode);
2530     }
2531 
2532     public final void paddq(Register dst, Register src) {
2533         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2534         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2535         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2536         emitByte(0xD4);
2537         emitByte(0xC0 | encode);
2538     }
2539 
2540     public final void pextrw(Register dst, Register src, int imm8) {
2541         assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
2542         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2543         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2544         emitByte(0xC5);
2545         emitByte(0xC0 | encode);
2546         emitByte(imm8);
2547     }
2548 
2549     public final void pinsrw(Register dst, Register src, int imm8) {
2550         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
2551         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2552         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2553         emitByte(0xC4);
2554         emitByte(0xC0 | encode);
2555         emitByte(imm8);
2556     }
2557 
2558     public final void por(Register dst, Register src) {
2559         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2560         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2561         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2562         emitByte(0xEB);
2563         emitByte(0xC0 | encode);
2564     }
2565 
2566     public final void pand(Register dst, Register src) {
2567         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2568         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2569         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2570         emitByte(0xDB);
2571         emitByte(0xC0 | encode);
2572     }
2573 
2574     public final void pxor(Register dst, Register src) {
2575         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2576         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2577         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2578         emitByte(0xEF);
2579         emitByte(0xC0 | encode);
2580     }
2581 
2582     public final void vpxor(Register dst, Register nds, Register src) {
2583         assert supports(CPUFeature.AVX);
2584         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2585         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2586         int encode = vexPrefixAndEncode(dst, nds, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2587         emitByte(0xEF);
2588         emitByte(0xC0 | encode);
2589     }
2590 
2591     public final void pslld(Register dst, int imm8) {
2592         assert isUByte(imm8) : "invalid value";
2593         assert dst.getRegisterCategory().equals(AMD64.XMM);
2594         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2595         // XMM6 is for /6 encoding: 66 0F 72 /6 ib
2596         int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2597         emitByte(0x72);
2598         emitByte(0xC0 | encode);
2599         emitByte(imm8 & 0xFF);
2600     }
2601 
2602     public final void psllq(Register dst, Register shift) {
2603         assert dst.getRegisterCategory().equals(AMD64.XMM) && shift.getRegisterCategory().equals(AMD64.XMM);
2604         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2605         int encode = simdPrefixAndEncode(dst, dst, shift, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2606         emitByte(0xF3);
2607         emitByte(0xC0 | encode);
2608     }
2609 
2610     public final void psllq(Register dst, int imm8) {
2611         assert isUByte(imm8) : "invalid value";
2612         assert dst.getRegisterCategory().equals(AMD64.XMM);
2613         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2614         // XMM6 is for /6 encoding: 66 0F 73 /6 ib
2615         int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2616         emitByte(0x73);
2617         emitByte(0xC0 | encode);
2618         emitByte(imm8);
2619     }
2620 
2621     public final void psrad(Register dst, int imm8) {
2622         assert isUByte(imm8) : "invalid value";
2623         assert dst.getRegisterCategory().equals(AMD64.XMM);
2624         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2625         // XMM4 is for /2 encoding: 66 0F 72 /4 ib
2626         int encode = simdPrefixAndEncode(AMD64.xmm4, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2627         emitByte(0x72);
2628         emitByte(0xC0 | encode);
2629         emitByte(imm8);
2630     }
2631 
2632     public final void psrld(Register dst, int imm8) {
2633         assert isUByte(imm8) : "invalid value";
2634         assert dst.getRegisterCategory().equals(AMD64.XMM);
2635         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2636         // XMM2 is for /2 encoding: 66 0F 72 /2 ib
2637         int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2638         emitByte(0x72);
2639         emitByte(0xC0 | encode);
2640         emitByte(imm8);
2641     }
2642 
2643     public final void psrlq(Register dst, int imm8) {
2644         assert isUByte(imm8) : "invalid value";
2645         assert dst.getRegisterCategory().equals(AMD64.XMM);
2646         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2647         // XMM2 is for /2 encoding: 66 0F 73 /2 ib
2648         int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2649         emitByte(0x73);
2650         emitByte(0xC0 | encode);
2651         emitByte(imm8);
2652     }
2653 
2654     public final void psrldq(Register dst, int imm8) {
2655         assert isUByte(imm8) : "invalid value";
2656         assert dst.getRegisterCategory().equals(AMD64.XMM);
2657         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2658         int encode = simdPrefixAndEncode(AMD64.xmm3, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2659         emitByte(0x73);
2660         emitByte(0xC0 | encode);
2661         emitByte(imm8);
2662     }
2663 
2664     public final void pshufd(Register dst, Register src, int imm8) {
2665         assert isUByte(imm8) : "invalid value";
2666         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2667         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2668         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2669         emitByte(0x70);
2670         emitByte(0xC0 | encode);
2671         emitByte(imm8);
2672     }
2673 
2674     public final void psubd(Register dst, Register src) {
2675         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2676         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2677         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2678         emitByte(0xFA);
2679         emitByte(0xC0 | encode);
2680     }
2681 
2682     public final void rcpps(Register dst, Register src) {
2683         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2684         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ true, /* noMaskReg */ false, /* usesVl */ false, target);
2685         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2686         emitByte(0x53);
2687         emitByte(0xC0 | encode);
2688     }
2689 
2690     public final void ret(int imm16) {
2691         if (imm16 == 0) {
2692             emitByte(0xC3);
2693         } else {
2694             emitByte(0xC2);
2695             emitShort(imm16);
2696         }
2697     }
2698 
2699     public final void sarl(Register dst, int imm8) {
2700         int encode = prefixAndEncode(dst.encoding);
2701         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2702         if (imm8 == 1) {
2703             emitByte(0xD1);
2704             emitByte(0xF8 | encode);
2705         } else {
2706             emitByte(0xC1);
2707             emitByte(0xF8 | encode);
2708             emitByte(imm8);
2709         }
2710     }
2711 
2712     public final void shll(Register dst, int imm8) {
2713         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2714         int encode = prefixAndEncode(dst.encoding);
2715         if (imm8 == 1) {
2716             emitByte(0xD1);
2717             emitByte(0xE0 | encode);
2718         } else {
2719             emitByte(0xC1);
2720             emitByte(0xE0 | encode);
2721             emitByte(imm8);
2722         }
2723     }
2724 
2725     public final void shll(Register dst) {
2726         int encode = prefixAndEncode(dst.encoding);
2727         emitByte(0xD3);
2728         emitByte(0xE0 | encode);
2729     }
2730 
2731     public final void shrl(Register dst, int imm8) {
2732         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2733         int encode = prefixAndEncode(dst.encoding);
2734         emitByte(0xC1);
2735         emitByte(0xE8 | encode);
2736         emitByte(imm8);
2737     }
2738 
2739     public final void shrl(Register dst) {
2740         int encode = prefixAndEncode(dst.encoding);
2741         emitByte(0xD3);
2742         emitByte(0xE8 | encode);
2743     }
2744 
2745     public final void subl(AMD64Address dst, int imm32) {
2746         SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2747     }
2748 
2749     public final void subl(Register dst, int imm32) {
2750         SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2751     }
2752 
2753     public final void subl(Register dst, Register src) {
2754         SUB.rmOp.emit(this, DWORD, dst, src);
2755     }
2756 
2757     public final void subpd(Register dst, Register src) {
2758         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2759         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2760         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2761         emitByte(0x5C);
2762         emitByte(0xC0 | encode);
2763     }
2764 
2765     public final void subsd(Register dst, Register src) {
2766         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2767         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2768         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2769         emitByte(0x5C);
2770         emitByte(0xC0 | encode);
2771     }
2772 
2773     public final void subsd(Register dst, AMD64Address src) {
2774         assert dst.getRegisterCategory().equals(AMD64.XMM);
2775         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2776         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2777         emitByte(0x5C);
2778         emitOperandHelper(dst, src, 0);
2779     }
2780 
2781     public final void testl(Register dst, int imm32) {
2782         // not using emitArith because test
2783         // doesn't support sign-extension of
2784         // 8bit operands
2785         int encode = dst.encoding;
2786         if (encode == 0) {
2787             emitByte(0xA9);
2788         } else {
2789             encode = prefixAndEncode(encode);
2790             emitByte(0xF7);
2791             emitByte(0xC0 | encode);
2792         }
2793         emitInt(imm32);
2794     }
2795 
2796     public final void testl(Register dst, Register src) {
2797         int encode = prefixAndEncode(dst.encoding, src.encoding);
2798         emitByte(0x85);
2799         emitByte(0xC0 | encode);
2800     }
2801 
2802     public final void testl(Register dst, AMD64Address src) {
2803         prefix(src, dst);
2804         emitByte(0x85);
2805         emitOperandHelper(dst, src, 0);
2806     }
2807 
2808     public final void unpckhpd(Register dst, Register src) {
2809         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2810         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2811         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2812         emitByte(0x15);
2813         emitByte(0xC0 | encode);
2814     }
2815 
2816     public final void unpcklpd(Register dst, Register src) {
2817         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2818         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2819         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2820         emitByte(0x14);
2821         emitByte(0xC0 | encode);
2822     }
2823 
2824     public final void xorl(Register dst, Register src) {
2825         XOR.rmOp.emit(this, DWORD, dst, src);
2826     }
2827 
2828     public final void xorpd(Register dst, Register src) {
2829         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2830         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2831         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2832         emitByte(0x57);
2833         emitByte(0xC0 | encode);
2834     }
2835 
2836     public final void xorps(Register dst, Register src) {
2837         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2838         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2839         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2840         emitByte(0x57);
2841         emitByte(0xC0 | encode);
2842     }
2843 
2844     protected final void decl(Register dst) {
2845         // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
2846         int encode = prefixAndEncode(dst.encoding);
2847         emitByte(0xFF);
2848         emitByte(0xC8 | encode);
2849     }
2850 
2851     protected final void incl(Register dst) {
2852         // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2853         int encode = prefixAndEncode(dst.encoding);
2854         emitByte(0xFF);
2855         emitByte(0xC0 | encode);
2856     }
2857 
2858     private int prefixAndEncode(int regEnc) {
2859         return prefixAndEncode(regEnc, false);
2860     }
2861 
2862     private int prefixAndEncode(int regEnc, boolean byteinst) {
2863         if (regEnc >= 8) {
2864             emitByte(Prefix.REXB);
2865             return regEnc - 8;
2866         } else if (byteinst && regEnc >= 4) {
2867             emitByte(Prefix.REX);
2868         }
2869         return regEnc;
2870     }
2871 
2872     private int prefixqAndEncode(int regEnc) {
2873         if (regEnc < 8) {
2874             emitByte(Prefix.REXW);
2875             return regEnc;
2876         } else {
2877             emitByte(Prefix.REXWB);
2878             return regEnc - 8;
2879         }
2880     }
2881 
2882     private int prefixAndEncode(int dstEnc, int srcEnc) {
2883         return prefixAndEncode(dstEnc, false, srcEnc, false);
2884     }
2885 
2886     private int prefixAndEncode(int dstEncoding, boolean dstIsByte, int srcEncoding, boolean srcIsByte) {
2887         int srcEnc = srcEncoding;
2888         int dstEnc = dstEncoding;
2889         if (dstEnc < 8) {
2890             if (srcEnc >= 8) {
2891                 emitByte(Prefix.REXB);
2892                 srcEnc -= 8;
2893             } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) {
2894                 emitByte(Prefix.REX);
2895             }
2896         } else {
2897             if (srcEnc < 8) {
2898                 emitByte(Prefix.REXR);
2899             } else {
2900                 emitByte(Prefix.REXRB);
2901                 srcEnc -= 8;
2902             }
2903             dstEnc -= 8;
2904         }
2905         return dstEnc << 3 | srcEnc;
2906     }
2907 
2908     /**
2909      * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand
2910      * prefix. If the given operands exceed 3 bits, the 4th bit is encoded in the prefix.
2911      *
2912      * @param regEncoding the encoding of the register part of the ModRM-Byte
2913      * @param rmEncoding the encoding of the r/m part of the ModRM-Byte
2914      * @return the lower 6 bits of the ModRM-Byte that should be emitted
2915      */
2916     private int prefixqAndEncode(int regEncoding, int rmEncoding) {
2917         int rmEnc = rmEncoding;
2918         int regEnc = regEncoding;
2919         if (regEnc < 8) {
2920             if (rmEnc < 8) {
2921                 emitByte(Prefix.REXW);
2922             } else {
2923                 emitByte(Prefix.REXWB);
2924                 rmEnc -= 8;
2925             }
2926         } else {
2927             if (rmEnc < 8) {
2928                 emitByte(Prefix.REXWR);
2929             } else {
2930                 emitByte(Prefix.REXWRB);
2931                 rmEnc -= 8;
2932             }
2933             regEnc -= 8;
2934         }
2935         return regEnc << 3 | rmEnc;
2936     }
2937 
2938     private void vexPrefix(int rxb, int ndsEncoding, int pre, int opc, AMD64InstructionAttr attributes) {
2939         int vectorLen = attributes.getVectorLen();
2940         boolean vexW = attributes.isRexVexW();
2941         boolean isXorB = ((rxb & 0x3) > 0);
2942         if (isXorB || vexW || (opc == VexOpcode.VEX_OPCODE_0F_38) || (opc == VexOpcode.VEX_OPCODE_0F_3A)) {
2943             emitByte(Prefix.VEX_3BYTES);
2944 
2945             int byte1 = (rxb << 5);
2946             byte1 = ((~byte1) & 0xE0) | opc;
2947             emitByte(byte1);
2948 
2949             int byte2 = ((~ndsEncoding) & 0xf) << 3;
2950             byte2 |= (vexW ? VexPrefix.VEX_W : 0) | ((vectorLen > 0) ? 4 : 0) | pre;
2951             emitByte(byte2);
2952         } else {
2953             emitByte(Prefix.VEX_2BYTES);
2954 
2955             int byte1 = ((rxb & 0x4) > 0) ? VexPrefix.VEX_R : 0;
2956             byte1 = (~byte1) & 0x80;
2957             byte1 |= ((~ndsEncoding) & 0xf) << 3;
2958             byte1 |= ((vectorLen > 0) ? 4 : 0) | pre;
2959             emitByte(byte1);
2960         }
2961     }
2962 
2963     private void vexPrefix(AMD64Address adr, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
2964         int rxb = getRXB(src, adr);
2965         int ndsEncoding = nds.isValid() ? nds.encoding : 0;
2966         vexPrefix(rxb, ndsEncoding, pre, opc, attributes);
2967         setCurAttributes(attributes);
2968     }
2969 
2970     private int vexPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
2971         int rxb = getRXB(dst, src);
2972         int ndsEncoding = nds.isValid() ? nds.encoding : 0;
2973         vexPrefix(rxb, ndsEncoding, pre, opc, attributes);
2974         // return modrm byte components for operands
2975         return (((dst.encoding & 7) << 3) | (src.encoding & 7));
2976     }
2977 
2978     private void simdPrefix(Register xreg, Register nds, AMD64Address adr, int pre, int opc, AMD64InstructionAttr attributes) {
2979         if (supports(CPUFeature.AVX)) {
2980             vexPrefix(adr, nds, xreg, pre, opc, attributes);
2981         } else {
2982             switch (pre) {
2983                 case VexSimdPrefix.VEX_SIMD_66:
2984                     emitByte(0x66);
2985                     break;
2986                 case VexSimdPrefix.VEX_SIMD_F2:
2987                     emitByte(0xF2);
2988                     break;
2989                 case VexSimdPrefix.VEX_SIMD_F3:
2990                     emitByte(0xF3);
2991                     break;
2992             }
2993             if (attributes.isRexVexW()) {
2994                 prefixq(adr, xreg);
2995             } else {
2996                 prefix(adr, xreg);
2997             }
2998             switch (opc) {
2999                 case VexOpcode.VEX_OPCODE_0F:
3000                     emitByte(0x0F);
3001                     break;
3002                 case VexOpcode.VEX_OPCODE_0F_38:
3003                     emitByte(0x0F);
3004                     emitByte(0x38);
3005                     break;
3006                 case VexOpcode.VEX_OPCODE_0F_3A:
3007                     emitByte(0x0F);
3008                     emitByte(0x3A);
3009                     break;
3010             }
3011         }
3012     }
3013 
3014     private int simdPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
3015         if (supports(CPUFeature.AVX)) {
3016             return vexPrefixAndEncode(dst, nds, src, pre, opc, attributes);
3017         } else {
3018             switch (pre) {
3019                 case VexSimdPrefix.VEX_SIMD_66:
3020                     emitByte(0x66);
3021                     break;
3022                 case VexSimdPrefix.VEX_SIMD_F2:
3023                     emitByte(0xF2);
3024                     break;
3025                 case VexSimdPrefix.VEX_SIMD_F3:
3026                     emitByte(0xF3);
3027                     break;
3028             }
3029             int encode;
3030             int dstEncoding = dst.encoding;
3031             int srcEncoding = src.encoding;
3032             if (attributes.isRexVexW()) {
3033                 encode = prefixqAndEncode(dstEncoding, srcEncoding);
3034             } else {
3035                 encode = prefixAndEncode(dstEncoding, srcEncoding);
3036             }
3037             switch (opc) {
3038                 case VexOpcode.VEX_OPCODE_0F:
3039                     emitByte(0x0F);
3040                     break;
3041                 case VexOpcode.VEX_OPCODE_0F_38:
3042                     emitByte(0x0F);
3043                     emitByte(0x38);
3044                     break;
3045                 case VexOpcode.VEX_OPCODE_0F_3A:
3046                     emitByte(0x0F);
3047                     emitByte(0x3A);
3048                     break;
3049             }
3050             return encode;
3051         }
3052     }
3053 
3054     private static boolean needsRex(Register reg) {
3055         return reg.encoding >= MinEncodingNeedsRex;
3056     }
3057 
3058     private void prefix(AMD64Address adr) {
3059         if (needsRex(adr.getBase())) {
3060             if (needsRex(adr.getIndex())) {
3061                 emitByte(Prefix.REXXB);
3062             } else {
3063                 emitByte(Prefix.REXB);
3064             }
3065         } else {
3066             if (needsRex(adr.getIndex())) {
3067                 emitByte(Prefix.REXX);
3068             }
3069         }
3070     }
3071 
3072     private void prefixq(AMD64Address adr) {
3073         if (needsRex(adr.getBase())) {
3074             if (needsRex(adr.getIndex())) {
3075                 emitByte(Prefix.REXWXB);
3076             } else {
3077                 emitByte(Prefix.REXWB);
3078             }
3079         } else {
3080             if (needsRex(adr.getIndex())) {
3081                 emitByte(Prefix.REXWX);
3082             } else {
3083                 emitByte(Prefix.REXW);
3084             }
3085         }
3086     }
3087 
3088     private void prefix(AMD64Address adr, Register reg) {
3089         prefix(adr, reg, false);
3090     }
3091 
3092     private void prefix(AMD64Address adr, Register reg, boolean byteinst) {
3093         if (reg.encoding < 8) {
3094             if (needsRex(adr.getBase())) {
3095                 if (needsRex(adr.getIndex())) {
3096                     emitByte(Prefix.REXXB);
3097                 } else {
3098                     emitByte(Prefix.REXB);
3099                 }
3100             } else {
3101                 if (needsRex(adr.getIndex())) {
3102                     emitByte(Prefix.REXX);
3103                 } else if (byteinst && reg.encoding >= 4) {
3104                     emitByte(Prefix.REX);
3105                 }
3106             }
3107         } else {
3108             if (needsRex(adr.getBase())) {
3109                 if (needsRex(adr.getIndex())) {
3110                     emitByte(Prefix.REXRXB);
3111                 } else {
3112                     emitByte(Prefix.REXRB);
3113                 }
3114             } else {
3115                 if (needsRex(adr.getIndex())) {
3116                     emitByte(Prefix.REXRX);
3117                 } else {
3118                     emitByte(Prefix.REXR);
3119                 }
3120             }
3121         }
3122     }
3123 
3124     private void prefixq(AMD64Address adr, Register src) {
3125         if (src.encoding < 8) {
3126             if (needsRex(adr.getBase())) {
3127                 if (needsRex(adr.getIndex())) {
3128                     emitByte(Prefix.REXWXB);
3129                 } else {
3130                     emitByte(Prefix.REXWB);
3131                 }
3132             } else {
3133                 if (needsRex(adr.getIndex())) {
3134                     emitByte(Prefix.REXWX);
3135                 } else {
3136                     emitByte(Prefix.REXW);
3137                 }
3138             }
3139         } else {
3140             if (needsRex(adr.getBase())) {
3141                 if (needsRex(adr.getIndex())) {
3142                     emitByte(Prefix.REXWRXB);
3143                 } else {
3144                     emitByte(Prefix.REXWRB);
3145                 }
3146             } else {
3147                 if (needsRex(adr.getIndex())) {
3148                     emitByte(Prefix.REXWRX);
3149                 } else {
3150                     emitByte(Prefix.REXWR);
3151                 }
3152             }
3153         }
3154     }
3155 
3156     public final void addq(Register dst, int imm32) {
3157         ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3158     }
3159 
3160     public final void addq(AMD64Address dst, int imm32) {
3161         ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3162     }
3163 
3164     public final void addq(Register dst, Register src) {
3165         ADD.rmOp.emit(this, QWORD, dst, src);
3166     }
3167 
3168     public final void addq(AMD64Address dst, Register src) {
3169         ADD.mrOp.emit(this, QWORD, dst, src);
3170     }
3171 
3172     public final void andq(Register dst, int imm32) {
3173         AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3174     }
3175 
3176     public final void bsrq(Register dst, Register src) {
3177         int encode = prefixqAndEncode(dst.encoding(), src.encoding());
3178         emitByte(0x0F);
3179         emitByte(0xBD);
3180         emitByte(0xC0 | encode);
3181     }
3182 
3183     public final void bswapq(Register reg) {
3184         int encode = prefixqAndEncode(reg.encoding);
3185         emitByte(0x0F);
3186         emitByte(0xC8 | encode);
3187     }
3188 
3189     public final void cdqq() {
3190         emitByte(Prefix.REXW);
3191         emitByte(0x99);
3192     }
3193 
3194     public final void cmovq(ConditionFlag cc, Register dst, Register src) {
3195         int encode = prefixqAndEncode(dst.encoding, src.encoding);
3196         emitByte(0x0F);
3197         emitByte(0x40 | cc.getValue());
3198         emitByte(0xC0 | encode);
3199     }
3200 
3201     public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) {
3202         prefixq(src, dst);
3203         emitByte(0x0F);
3204         emitByte(0x40 | cc.getValue());
3205         emitOperandHelper(dst, src, 0);
3206     }
3207 
3208     public final void cmpq(Register dst, int imm32) {
3209         CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3210     }
3211 
3212     public final void cmpq(Register dst, Register src) {
3213         CMP.rmOp.emit(this, QWORD, dst, src);
3214     }
3215 
3216     public final void cmpq(Register dst, AMD64Address src) {
3217         CMP.rmOp.emit(this, QWORD, dst, src);
3218     }
3219 
3220     public final void cmpxchgq(Register reg, AMD64Address adr) {
3221         prefixq(adr, reg);
3222         emitByte(0x0F);
3223         emitByte(0xB1);
3224         emitOperandHelper(reg, adr, 0);
3225     }
3226 
3227     public final void cvtdq2pd(Register dst, Register src) {
3228         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
3229         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3230         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3231         emitByte(0xE6);
3232         emitByte(0xC0 | encode);
3233     }
3234 
3235     public final void cvtsi2sdq(Register dst, Register src) {
3236         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
3237         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3238         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3239         emitByte(0x2A);
3240         emitByte(0xC0 | encode);
3241     }
3242 
3243     public final void cvttsd2siq(Register dst, Register src) {
3244         assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
3245         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3246         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3247         emitByte(0x2C);
3248         emitByte(0xC0 | encode);
3249     }
3250 
3251     public final void cvttpd2dq(Register dst, Register src) {
3252         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
3253         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3254         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3255         emitByte(0xE6);
3256         emitByte(0xC0 | encode);
3257     }
3258 
3259     protected final void decq(Register dst) {
3260         // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3261         int encode = prefixqAndEncode(dst.encoding);
3262         emitByte(0xFF);
3263         emitByte(0xC8 | encode);
3264     }
3265 
3266     public final void decq(AMD64Address dst) {
3267         DEC.emit(this, QWORD, dst);
3268     }
3269 
3270     public final void imulq(Register dst, Register src) {
3271         int encode = prefixqAndEncode(dst.encoding, src.encoding);
3272         emitByte(0x0F);
3273         emitByte(0xAF);
3274         emitByte(0xC0 | encode);
3275     }
3276 
3277     public final void incq(Register dst) {
3278         // Don't use it directly. Use Macroincrementq() instead.
3279         // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3280         int encode = prefixqAndEncode(dst.encoding);
3281         emitByte(0xFF);
3282         emitByte(0xC0 | encode);
3283     }
3284 
3285     public final void incq(AMD64Address dst) {
3286         INC.emit(this, QWORD, dst);
3287     }
3288 
3289     public final void movq(Register dst, long imm64) {
3290         int encode = prefixqAndEncode(dst.encoding);
3291         emitByte(0xB8 | encode);
3292         emitLong(imm64);
3293     }
3294 
3295     public final void movslq(Register dst, int imm32) {
3296         int encode = prefixqAndEncode(dst.encoding);
3297         emitByte(0xC7);
3298         emitByte(0xC0 | encode);
3299         emitInt(imm32);
3300     }
3301 
3302     public final void movdq(Register dst, AMD64Address src) {
3303         assert dst.getRegisterCategory().equals(AMD64.XMM);
3304         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3305         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3306         emitByte(0x6E);
3307         emitOperandHelper(dst, src, 0);
3308     }
3309 
3310     public final void movdq(AMD64Address dst, Register src) {
3311         assert src.getRegisterCategory().equals(AMD64.XMM);
3312         // swap src/dst to get correct prefix
3313         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3314         simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3315         emitByte(0x7E);
3316         emitOperandHelper(src, dst, 0);
3317     }
3318 
3319     public final void movdq(Register dst, Register src) {
3320         if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) {
3321             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3322             int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3323             emitByte(0x6E);
3324             emitByte(0xC0 | encode);
3325         } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) {
3326             // swap src/dst to get correct prefix
3327             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3328             int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3329             emitByte(0x7E);
3330             emitByte(0xC0 | encode);
3331         } else {
3332             throw new InternalError("should not reach here");
3333         }
3334     }
3335 
3336     public final void movdl(Register dst, Register src) {
3337         if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) {
3338             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3339             int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3340             emitByte(0x6E);
3341             emitByte(0xC0 | encode);
3342         } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) {
3343             // swap src/dst to get correct prefix
3344             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3345             int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3346             emitByte(0x7E);
3347             emitByte(0xC0 | encode);
3348         } else {
3349             throw new InternalError("should not reach here");
3350         }
3351     }
3352 
3353     public final void movdl(Register dst, AMD64Address src) {
3354         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3355         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3356         emitByte(0x6E);
3357         emitOperandHelper(dst, src, 0);
3358     }
3359 
3360     public final void movddup(Register dst, Register src) {
3361         assert supports(CPUFeature.SSE3);
3362         assert dst.getRegisterCategory().equals(AMD64.XMM);
3363         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3364         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3365         emitByte(0x12);
3366         emitByte(0xC0 | encode);
3367     }
3368 
3369     public final void movdqu(Register dst, AMD64Address src) {
3370         assert dst.getRegisterCategory().equals(AMD64.XMM);
3371         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3372         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3373         emitByte(0x6F);
3374         emitOperandHelper(dst, src, 0);
3375     }
3376 
3377     public final void movdqu(Register dst, Register src) {
3378         assert dst.getRegisterCategory().equals(AMD64.XMM);
3379         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3380         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3381         emitByte(0x6F);
3382         emitByte(0xC0 | encode);
3383     }
3384 
3385     public final void vmovdqu(Register dst, AMD64Address src) {
3386         assert supports(CPUFeature.AVX);
3387         assert dst.getRegisterCategory().equals(AMD64.XMM);
3388         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3389         vexPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3390         emitByte(0x6F);
3391         emitOperandHelper(dst, src, 0);
3392     }
3393 
3394     public final void vzeroupper() {
3395         assert supports(CPUFeature.AVX);
3396         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3397         vexPrefixAndEncode(AMD64.xmm0, AMD64.xmm0, AMD64.xmm0, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
3398         emitByte(0x77);
3399     }
3400 
3401     public final void movslq(AMD64Address dst, int imm32) {
3402         prefixq(dst);
3403         emitByte(0xC7);
3404         emitOperandHelper(0, dst, 4);
3405         emitInt(imm32);
3406     }
3407 
3408     public final void movslq(Register dst, AMD64Address src) {
3409         prefixq(src, dst);
3410         emitByte(0x63);
3411         emitOperandHelper(dst, src, 0);
3412     }
3413 
3414     public final void movslq(Register dst, Register src) {
3415         int encode = prefixqAndEncode(dst.encoding, src.encoding);
3416         emitByte(0x63);
3417         emitByte(0xC0 | encode);
3418     }
3419 
3420     public final void negq(Register dst) {
3421         int encode = prefixqAndEncode(dst.encoding);
3422         emitByte(0xF7);
3423         emitByte(0xD8 | encode);
3424     }
3425 
3426     public final void orq(Register dst, Register src) {
3427         OR.rmOp.emit(this, QWORD, dst, src);
3428     }
3429 
3430     public final void shlq(Register dst, int imm8) {
3431         assert isShiftCount(imm8 >> 1) : "illegal shift count";
3432         int encode = prefixqAndEncode(dst.encoding);
3433         if (imm8 == 1) {
3434             emitByte(0xD1);
3435             emitByte(0xE0 | encode);
3436         } else {
3437             emitByte(0xC1);
3438             emitByte(0xE0 | encode);
3439             emitByte(imm8);
3440         }
3441     }
3442 
3443     public final void shlq(Register dst) {
3444         int encode = prefixqAndEncode(dst.encoding);
3445         emitByte(0xD3);
3446         emitByte(0xE0 | encode);
3447     }
3448 
3449     public final void shrq(Register dst, int imm8) {
3450         assert isShiftCount(imm8 >> 1) : "illegal shift count";
3451         int encode = prefixqAndEncode(dst.encoding);
3452         if (imm8 == 1) {
3453             emitByte(0xD1);
3454             emitByte(0xE8 | encode);
3455         } else {
3456             emitByte(0xC1);
3457             emitByte(0xE8 | encode);
3458             emitByte(imm8);
3459         }
3460     }
3461 
3462     public final void shrq(Register dst) {
3463         int encode = prefixqAndEncode(dst.encoding);
3464         emitByte(0xD3);
3465         emitByte(0xE8 | encode);
3466     }
3467 
3468     public final void sbbq(Register dst, Register src) {
3469         SBB.rmOp.emit(this, QWORD, dst, src);
3470     }
3471 
3472     public final void subq(Register dst, int imm32) {
3473         SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3474     }
3475 
3476     public final void subq(AMD64Address dst, int imm32) {
3477         SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3478     }
3479 
3480     public final void subqWide(Register dst, int imm32) {
3481         // don't use the sign-extending version, forcing a 32-bit immediate
3482         SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32);
3483     }
3484 
3485     public final void subq(Register dst, Register src) {
3486         SUB.rmOp.emit(this, QWORD, dst, src);
3487     }
3488 
3489     public final void testq(Register dst, Register src) {
3490         int encode = prefixqAndEncode(dst.encoding, src.encoding);
3491         emitByte(0x85);
3492         emitByte(0xC0 | encode);
3493     }
3494 
3495     public final void btrq(Register src, int imm8) {
3496         int encode = prefixqAndEncode(src.encoding);
3497         emitByte(0x0F);
3498         emitByte(0xBA);
3499         emitByte(0xF0 | encode);
3500         emitByte(imm8);
3501     }
3502 
3503     public final void xaddl(AMD64Address dst, Register src) {
3504         prefix(dst, src);
3505         emitByte(0x0F);
3506         emitByte(0xC1);
3507         emitOperandHelper(src, dst, 0);
3508     }
3509 
3510     public final void xaddq(AMD64Address dst, Register src) {
3511         prefixq(dst, src);
3512         emitByte(0x0F);
3513         emitByte(0xC1);
3514         emitOperandHelper(src, dst, 0);
3515     }
3516 
3517     public final void xchgl(Register dst, AMD64Address src) {
3518         prefix(src, dst);
3519         emitByte(0x87);
3520         emitOperandHelper(dst, src, 0);
3521     }
3522 
3523     public final void xchgq(Register dst, AMD64Address src) {
3524         prefixq(src, dst);
3525         emitByte(0x87);
3526         emitOperandHelper(dst, src, 0);
3527     }
3528 
3529     public final void membar(int barriers) {
3530         if (target.isMP) {
3531             // We only have to handle StoreLoad
3532             if ((barriers & STORE_LOAD) != 0) {
3533                 // All usable chips support "locked" instructions which suffice
3534                 // as barriers, and are much faster than the alternative of
3535                 // using cpuid instruction. We use here a locked add [rsp],0.
3536                 // This is conveniently otherwise a no-op except for blowing
3537                 // flags.
3538                 // Any change to this code may need to revisit other places in
3539                 // the code where this idiom is used, in particular the
3540                 // orderAccess code.
3541                 lock();
3542                 addl(new AMD64Address(rsp, 0), 0); // Assert the lock# signal here
3543             }
3544         }
3545     }
3546 
3547     @Override
3548     protected final void patchJumpTarget(int branch, int branchTarget) {
3549         int op = getByte(branch);
3550         assert op == 0xE8 // call
3551                         ||
3552                         op == 0x00 // jump table entry
3553                         || op == 0xE9 // jmp
3554                         || op == 0xEB // short jmp
3555                         || (op & 0xF0) == 0x70 // short jcc
3556                         || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc
3557         : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
3558 
3559         if (op == 0x00) {
3560             int offsetToJumpTableBase = getShort(branch + 1);
3561             int jumpTableBase = branch - offsetToJumpTableBase;
3562             int imm32 = branchTarget - jumpTableBase;
3563             emitInt(imm32, branch);
3564         } else if (op == 0xEB || (op & 0xF0) == 0x70) {
3565 
3566             // short offset operators (jmp and jcc)
3567             final int imm8 = branchTarget - (branch + 2);
3568             /*
3569              * Since a wrongly patched short branch can potentially lead to working but really bad
3570              * behaving code we should always fail with an exception instead of having an assert.
3571              */
3572             if (!NumUtil.isByte(imm8)) {
3573                 throw new InternalError("branch displacement out of range: " + imm8);
3574             }
3575             emitByte(imm8, branch + 1);
3576 
3577         } else {
3578 
3579             int off = 1;
3580             if (op == 0x0F) {
3581                 off = 2;
3582             }
3583 
3584             int imm32 = branchTarget - (branch + 4 + off);
3585             emitInt(imm32, branch + off);
3586         }
3587     }
3588 
3589     public void nullCheck(AMD64Address address) {
3590         testl(AMD64.rax, address);
3591     }
3592 
3593     @Override
3594     public void align(int modulus) {
3595         if (position() % modulus != 0) {
3596             nop(modulus - (position() % modulus));
3597         }
3598     }
3599 
3600     /**
3601      * Emits a direct call instruction. Note that the actual call target is not specified, because
3602      * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is
3603      * responsible to add the call address to the appropriate patching tables.
3604      */
3605     public final void call() {
3606         if (codePatchingAnnotationConsumer != null) {
3607             int pos = position();
3608             codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(pos, pos + 1, 4, pos + 5));
3609         }
3610         emitByte(0xE8);
3611         emitInt(0);
3612     }
3613 
3614     public final void call(Register src) {
3615         int encode = prefixAndEncode(src.encoding);
3616         emitByte(0xFF);
3617         emitByte(0xD0 | encode);
3618     }
3619 
3620     public final void int3() {
3621         emitByte(0xCC);
3622     }
3623 
3624     public final void pause() {
3625         emitByte(0xF3);
3626         emitByte(0x90);
3627     }
3628 
3629     private void emitx87(int b1, int b2, int i) {
3630         assert 0 <= i && i < 8 : "illegal stack offset";
3631         emitByte(b1);
3632         emitByte(b2 + i);
3633     }
3634 
3635     public final void fldd(AMD64Address src) {
3636         emitByte(0xDD);
3637         emitOperandHelper(0, src, 0);
3638     }
3639 
3640     public final void flds(AMD64Address src) {
3641         emitByte(0xD9);
3642         emitOperandHelper(0, src, 0);
3643     }
3644 
3645     public final void fldln2() {
3646         emitByte(0xD9);
3647         emitByte(0xED);
3648     }
3649 
3650     public final void fldlg2() {
3651         emitByte(0xD9);
3652         emitByte(0xEC);
3653     }
3654 
3655     public final void fyl2x() {
3656         emitByte(0xD9);
3657         emitByte(0xF1);
3658     }
3659 
3660     public final void fstps(AMD64Address src) {
3661         emitByte(0xD9);
3662         emitOperandHelper(3, src, 0);
3663     }
3664 
3665     public final void fstpd(AMD64Address src) {
3666         emitByte(0xDD);
3667         emitOperandHelper(3, src, 0);
3668     }
3669 
3670     private void emitFPUArith(int b1, int b2, int i) {
3671         assert 0 <= i && i < 8 : "illegal FPU register: " + i;
3672         emitByte(b1);
3673         emitByte(b2 + i);
3674     }
3675 
3676     public void ffree(int i) {
3677         emitFPUArith(0xDD, 0xC0, i);
3678     }
3679 
3680     public void fincstp() {
3681         emitByte(0xD9);
3682         emitByte(0xF7);
3683     }
3684 
3685     public void fxch(int i) {
3686         emitFPUArith(0xD9, 0xC8, i);
3687     }
3688 
3689     public void fnstswAX() {
3690         emitByte(0xDF);
3691         emitByte(0xE0);
3692     }
3693 
3694     public void fwait() {
3695         emitByte(0x9B);
3696     }
3697 
3698     public void fprem() {
3699         emitByte(0xD9);
3700         emitByte(0xF8);
3701     }
3702 
3703     public final void fsin() {
3704         emitByte(0xD9);
3705         emitByte(0xFE);
3706     }
3707 
3708     public final void fcos() {
3709         emitByte(0xD9);
3710         emitByte(0xFF);
3711     }
3712 
3713     public final void fptan() {
3714         emitByte(0xD9);
3715         emitByte(0xF2);
3716     }
3717 
3718     public final void fstp(int i) {
3719         emitx87(0xDD, 0xD8, i);
3720     }
3721 
3722     @Override
3723     public AMD64Address makeAddress(Register base, int displacement) {
3724         return new AMD64Address(base, displacement);
3725     }
3726 
3727     @Override
3728     public AMD64Address getPlaceholder(int instructionStartPosition) {
3729         return new AMD64Address(rip, Register.None, Scale.Times1, 0, instructionStartPosition);
3730     }
3731 
3732     private void prefetchPrefix(AMD64Address src) {
3733         prefix(src);
3734         emitByte(0x0F);
3735     }
3736 
3737     public void prefetchnta(AMD64Address src) {
3738         prefetchPrefix(src);
3739         emitByte(0x18);
3740         emitOperandHelper(0, src, 0);
3741     }
3742 
3743     void prefetchr(AMD64Address src) {
3744         assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
3745         prefetchPrefix(src);
3746         emitByte(0x0D);
3747         emitOperandHelper(0, src, 0);
3748     }
3749 
3750     public void prefetcht0(AMD64Address src) {
3751         assert supports(CPUFeature.SSE);
3752         prefetchPrefix(src);
3753         emitByte(0x18);
3754         emitOperandHelper(1, src, 0);
3755     }
3756 
3757     public void prefetcht1(AMD64Address src) {
3758         assert supports(CPUFeature.SSE);
3759         prefetchPrefix(src);
3760         emitByte(0x18);
3761         emitOperandHelper(2, src, 0);
3762     }
3763 
3764     public void prefetcht2(AMD64Address src) {
3765         assert supports(CPUFeature.SSE);
3766         prefix(src);
3767         emitByte(0x0f);
3768         emitByte(0x18);
3769         emitOperandHelper(3, src, 0);
3770     }
3771 
3772     public void prefetchw(AMD64Address src) {
3773         assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
3774         prefix(src);
3775         emitByte(0x0f);
3776         emitByte(0x0D);
3777         emitOperandHelper(1, src, 0);
3778     }
3779 
3780     public void rdtsc() {
3781         emitByte(0x0F);
3782         emitByte(0x31);
3783     }
3784 
3785     /**
3786      * Emits an instruction which is considered to be illegal. This is used if we deliberately want
3787      * to crash the program (debugging etc.).
3788      */
3789     public void illegal() {
3790         emitByte(0x0f);
3791         emitByte(0x0b);
3792     }
3793 }