1 /* 2 * Copyright (c) 2009, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 package org.graalvm.compiler.asm.amd64; 24 25 import static jdk.vm.ci.amd64.AMD64.CPU; 26 import static jdk.vm.ci.amd64.AMD64.XMM; 27 import static jdk.vm.ci.amd64.AMD64.r12; 28 import static jdk.vm.ci.amd64.AMD64.r13; 29 import static jdk.vm.ci.amd64.AMD64.rbp; 30 import static jdk.vm.ci.amd64.AMD64.rip; 31 import static jdk.vm.ci.amd64.AMD64.rsp; 32 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD; 33 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop; 34 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop; 35 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD; 36 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND; 37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP; 38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR; 39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB; 40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB; 41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR; 42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC; 43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC; 44 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG; 45 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT; 46 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.BYTE; 47 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.DWORD; 48 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PD; 49 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PS; 50 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.QWORD; 51 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SD; 52 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SS; 53 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.WORD; 54 import static org.graalvm.compiler.core.common.NumUtil.isByte; 55 import static org.graalvm.compiler.core.common.NumUtil.isInt; 56 import static org.graalvm.compiler.core.common.NumUtil.isShiftCount; 57 import static org.graalvm.compiler.core.common.NumUtil.isUByte; 58 59 import org.graalvm.compiler.asm.Assembler; 60 import org.graalvm.compiler.asm.Label; 61 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 62 import org.graalvm.compiler.core.common.NumUtil; 63 import org.graalvm.compiler.debug.GraalError; 64 65 import jdk.vm.ci.amd64.AMD64; 66 import jdk.vm.ci.amd64.AMD64.CPUFeature; 67 import jdk.vm.ci.amd64.AMD64Kind; 68 import jdk.vm.ci.code.Register; 69 import jdk.vm.ci.code.Register.RegisterCategory; 70 import jdk.vm.ci.code.TargetDescription; 71 import jdk.vm.ci.meta.PlatformKind; 72 73 /** 74 * This class implements an assembler that can encode most X86 instructions. 75 */ 76 public class AMD64Assembler extends Assembler { 77 78 private static final int MinEncodingNeedsRex = 8; 79 80 /** 81 * The x86 condition codes used for conditional jumps/moves. 82 */ 83 public enum ConditionFlag { 84 Zero(0x4, "|zero|"), 85 NotZero(0x5, "|nzero|"), 86 Equal(0x4, "="), 87 NotEqual(0x5, "!="), 88 Less(0xc, "<"), 89 LessEqual(0xe, "<="), 90 Greater(0xf, ">"), 91 GreaterEqual(0xd, ">="), 92 Below(0x2, "|<|"), 93 BelowEqual(0x6, "|<=|"), 94 Above(0x7, "|>|"), 95 AboveEqual(0x3, "|>=|"), 96 Overflow(0x0, "|of|"), 97 NoOverflow(0x1, "|nof|"), 98 CarrySet(0x2, "|carry|"), 99 CarryClear(0x3, "|ncarry|"), 100 Negative(0x8, "|neg|"), 101 Positive(0x9, "|pos|"), 102 Parity(0xa, "|par|"), 103 NoParity(0xb, "|npar|"); 104 105 private final int value; 106 private final String operator; 107 108 ConditionFlag(int value, String operator) { 109 this.value = value; 110 this.operator = operator; 111 } 112 113 public ConditionFlag negate() { 114 switch (this) { 115 case Zero: 116 return NotZero; 117 case NotZero: 118 return Zero; 119 case Equal: 120 return NotEqual; 121 case NotEqual: 122 return Equal; 123 case Less: 124 return GreaterEqual; 125 case LessEqual: 126 return Greater; 127 case Greater: 128 return LessEqual; 129 case GreaterEqual: 130 return Less; 131 case Below: 132 return AboveEqual; 133 case BelowEqual: 134 return Above; 135 case Above: 136 return BelowEqual; 137 case AboveEqual: 138 return Below; 139 case Overflow: 140 return NoOverflow; 141 case NoOverflow: 142 return Overflow; 143 case CarrySet: 144 return CarryClear; 145 case CarryClear: 146 return CarrySet; 147 case Negative: 148 return Positive; 149 case Positive: 150 return Negative; 151 case Parity: 152 return NoParity; 153 case NoParity: 154 return Parity; 155 } 156 throw new IllegalArgumentException(); 157 } 158 159 public int getValue() { 160 return value; 161 } 162 163 @Override 164 public String toString() { 165 return operator; 166 } 167 } 168 169 /** 170 * Constants for X86 prefix bytes. 171 */ 172 private static class Prefix { 173 private static final int REX = 0x40; 174 private static final int REXB = 0x41; 175 private static final int REXX = 0x42; 176 private static final int REXXB = 0x43; 177 private static final int REXR = 0x44; 178 private static final int REXRB = 0x45; 179 private static final int REXRX = 0x46; 180 private static final int REXRXB = 0x47; 181 private static final int REXW = 0x48; 182 private static final int REXWB = 0x49; 183 private static final int REXWX = 0x4A; 184 private static final int REXWXB = 0x4B; 185 private static final int REXWR = 0x4C; 186 private static final int REXWRB = 0x4D; 187 private static final int REXWRX = 0x4E; 188 private static final int REXWRXB = 0x4F; 189 private static final int VEX_3BYTES = 0xC4; 190 private static final int VEX_2BYTES = 0xC5; 191 } 192 193 private static class VexPrefix { 194 private static final int VEX_R = 0x80; 195 private static final int VEX_W = 0x80; 196 } 197 198 private static class AvxVectorLen { 199 private static final int AVX_128bit = 0x0; 200 private static final int AVX_256bit = 0x1; 201 } 202 203 private static class VexSimdPrefix { 204 private static final int VEX_SIMD_NONE = 0x0; 205 private static final int VEX_SIMD_66 = 0x1; 206 private static final int VEX_SIMD_F3 = 0x2; 207 private static final int VEX_SIMD_F2 = 0x3; 208 } 209 210 private static class VexOpcode { 211 private static final int VEX_OPCODE_NONE = 0x0; 212 private static final int VEX_OPCODE_0F = 0x1; 213 private static final int VEX_OPCODE_0F_38 = 0x2; 214 private static final int VEX_OPCODE_0F_3A = 0x3; 215 } 216 217 private AMD64InstructionAttr curAttributes; 218 219 AMD64InstructionAttr getCurAttributes() { 220 return curAttributes; 221 } 222 223 void setCurAttributes(AMD64InstructionAttr attributes) { 224 curAttributes = attributes; 225 } 226 227 /** 228 * The x86 operand sizes. 229 */ 230 public enum OperandSize { 231 BYTE(1, AMD64Kind.BYTE) { 232 @Override 233 protected void emitImmediate(AMD64Assembler asm, int imm) { 234 assert imm == (byte) imm; 235 asm.emitByte(imm); 236 } 237 238 @Override 239 protected int immediateSize() { 240 return 1; 241 } 242 }, 243 244 WORD(2, AMD64Kind.WORD, 0x66) { 245 @Override 246 protected void emitImmediate(AMD64Assembler asm, int imm) { 247 assert imm == (short) imm; 248 asm.emitShort(imm); 249 } 250 251 @Override 252 protected int immediateSize() { 253 return 2; 254 } 255 }, 256 257 DWORD(4, AMD64Kind.DWORD) { 258 @Override 259 protected void emitImmediate(AMD64Assembler asm, int imm) { 260 asm.emitInt(imm); 261 } 262 263 @Override 264 protected int immediateSize() { 265 return 4; 266 } 267 }, 268 269 QWORD(8, AMD64Kind.QWORD) { 270 @Override 271 protected void emitImmediate(AMD64Assembler asm, int imm) { 272 asm.emitInt(imm); 273 } 274 275 @Override 276 protected int immediateSize() { 277 return 4; 278 } 279 }, 280 281 SS(4, AMD64Kind.SINGLE, 0xF3, true), 282 283 SD(8, AMD64Kind.DOUBLE, 0xF2, true), 284 285 PS(16, AMD64Kind.V128_SINGLE, true), 286 287 PD(16, AMD64Kind.V128_DOUBLE, 0x66, true); 288 289 private final int sizePrefix; 290 private final int bytes; 291 private final boolean xmm; 292 private final AMD64Kind kind; 293 294 OperandSize(int bytes, AMD64Kind kind) { 295 this(bytes, kind, 0); 296 } 297 298 OperandSize(int bytes, AMD64Kind kind, int sizePrefix) { 299 this(bytes, kind, sizePrefix, false); 300 } 301 302 OperandSize(int bytes, AMD64Kind kind, boolean xmm) { 303 this(bytes, kind, 0, xmm); 304 } 305 306 OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm) { 307 this.sizePrefix = sizePrefix; 308 this.bytes = bytes; 309 this.kind = kind; 310 this.xmm = xmm; 311 } 312 313 public int getBytes() { 314 return bytes; 315 } 316 317 public boolean isXmmType() { 318 return xmm; 319 } 320 321 public AMD64Kind getKind() { 322 return kind; 323 } 324 325 public static OperandSize get(PlatformKind kind) { 326 for (OperandSize operandSize : OperandSize.values()) { 327 if (operandSize.kind.equals(kind)) { 328 return operandSize; 329 } 330 } 331 throw GraalError.shouldNotReachHere("Unexpected kind: " + kind.toString()); 332 } 333 334 /** 335 * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded 336 * as sign-extended 32-bit values. 337 * 338 * @param asm 339 * @param imm 340 */ 341 protected void emitImmediate(AMD64Assembler asm, int imm) { 342 throw new UnsupportedOperationException(); 343 } 344 345 protected int immediateSize() { 346 throw new UnsupportedOperationException(); 347 } 348 } 349 350 /** 351 * Operand size and register type constraints. 352 */ 353 private enum OpAssertion { 354 ByteAssertion(CPU, CPU, BYTE), 355 IntegerAssertion(CPU, CPU, WORD, DWORD, QWORD), 356 No16BitAssertion(CPU, CPU, DWORD, QWORD), 357 No32BitAssertion(CPU, CPU, WORD, QWORD), 358 QwordOnlyAssertion(CPU, CPU, QWORD), 359 FloatingAssertion(XMM, XMM, SS, SD, PS, PD), 360 PackedFloatingAssertion(XMM, XMM, PS, PD), 361 SingleAssertion(XMM, XMM, SS), 362 DoubleAssertion(XMM, XMM, SD), 363 PackedDoubleAssertion(XMM, XMM, PD), 364 IntToFloatingAssertion(XMM, CPU, DWORD, QWORD), 365 FloatingToIntAssertion(CPU, XMM, DWORD, QWORD); 366 367 private final RegisterCategory resultCategory; 368 private final RegisterCategory inputCategory; 369 private final OperandSize[] allowedSizes; 370 371 OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) { 372 this.resultCategory = resultCategory; 373 this.inputCategory = inputCategory; 374 this.allowedSizes = allowedSizes; 375 } 376 377 protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) { 378 assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op; 379 assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op; 380 381 for (OperandSize s : allowedSizes) { 382 if (size == s) { 383 return true; 384 } 385 } 386 387 assert false : "invalid operand size " + size + " used in " + op; 388 return false; 389 } 390 } 391 392 public abstract static class OperandDataAnnotation extends CodeAnnotation { 393 /** 394 * The position (bytes from the beginning of the method) of the operand. 395 */ 396 public final int operandPosition; 397 /** 398 * The size of the operand, in bytes. 399 */ 400 public final int operandSize; 401 /** 402 * The position (bytes from the beginning of the method) of the next instruction. On AMD64, 403 * RIP-relative operands are relative to this position. 404 */ 405 public final int nextInstructionPosition; 406 407 OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) { 408 super(instructionPosition); 409 410 this.operandPosition = operandPosition; 411 this.operandSize = operandSize; 412 this.nextInstructionPosition = nextInstructionPosition; 413 } 414 415 @Override 416 public String toString() { 417 return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize; 418 } 419 } 420 421 /** 422 * Annotation that stores additional information about the displacement of a 423 * {@link Assembler#getPlaceholder placeholder address} that needs patching. 424 */ 425 public static class AddressDisplacementAnnotation extends OperandDataAnnotation { 426 AddressDisplacementAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) { 427 super(instructionPosition, operandPosition, operndSize, nextInstructionPosition); 428 } 429 } 430 431 /** 432 * Annotation that stores additional information about the immediate operand, e.g., of a call 433 * instruction, that needs patching. 434 */ 435 public static class ImmediateOperandAnnotation extends OperandDataAnnotation { 436 ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) { 437 super(instructionPosition, operandPosition, operndSize, nextInstructionPosition); 438 } 439 } 440 441 /** 442 * Constructs an assembler for the AMD64 architecture. 443 */ 444 public AMD64Assembler(TargetDescription target) { 445 super(target); 446 } 447 448 public boolean supports(CPUFeature feature) { 449 return ((AMD64) target.arch).getFeatures().contains(feature); 450 } 451 452 private static int encode(Register r) { 453 assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding; 454 return r.encoding & 0x7; 455 } 456 457 /** 458 * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a 459 * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm 460 * field. The X bit must be 0. 461 */ 462 protected static int getRXB(Register reg, Register rm) { 463 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 464 rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3; 465 return rxb; 466 } 467 468 /** 469 * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There 470 * are two cases for the memory operand:<br> 471 * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0. 472 * <br> 473 * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base. 474 */ 475 protected static int getRXB(Register reg, AMD64Address rm) { 476 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 477 if (!rm.getIndex().equals(Register.None)) { 478 rxb |= (rm.getIndex().encoding & 0x08) >> 2; 479 } 480 if (!rm.getBase().equals(Register.None)) { 481 rxb |= (rm.getBase().encoding & 0x08) >> 3; 482 } 483 return rxb; 484 } 485 486 /** 487 * Emit the ModR/M byte for one register operand and an opcode extension in the R field. 488 * <p> 489 * Format: [ 11 reg r/m ] 490 */ 491 protected void emitModRM(int reg, Register rm) { 492 assert (reg & 0x07) == reg; 493 emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07)); 494 } 495 496 /** 497 * Emit the ModR/M byte for two register operands. 498 * <p> 499 * Format: [ 11 reg r/m ] 500 */ 501 protected void emitModRM(Register reg, Register rm) { 502 emitModRM(reg.encoding & 0x07, rm); 503 } 504 505 protected void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) { 506 assert !reg.equals(Register.None); 507 emitOperandHelper(encode(reg), addr, false, additionalInstructionSize); 508 } 509 510 /** 511 * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand. 512 * 513 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte 514 */ 515 protected void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) { 516 assert !reg.equals(Register.None); 517 emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize); 518 } 519 520 protected void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) { 521 emitOperandHelper(reg, addr, false, additionalInstructionSize); 522 } 523 524 /** 525 * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode 526 * extension in the R field. 527 * 528 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte 529 * @param additionalInstructionSize the number of bytes that will be emitted after the operand, 530 * so that the start position of the next instruction can be computed even though 531 * this instruction has not been completely emitted yet. 532 */ 533 protected void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) { 534 assert (reg & 0x07) == reg; 535 int regenc = reg << 3; 536 537 Register base = addr.getBase(); 538 Register index = addr.getIndex(); 539 540 AMD64Address.Scale scale = addr.getScale(); 541 int disp = addr.getDisplacement(); 542 543 if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder() 544 // [00 000 101] disp32 545 assert index.equals(Register.None) : "cannot use RIP relative addressing with index register"; 546 emitByte(0x05 | regenc); 547 if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) { 548 codePatchingAnnotationConsumer.accept(new AddressDisplacementAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize)); 549 } 550 emitInt(disp); 551 } else if (base.isValid()) { 552 int baseenc = base.isValid() ? encode(base) : 0; 553 if (index.isValid()) { 554 int indexenc = encode(index) << 3; 555 // [base + indexscale + disp] 556 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 557 // [base + indexscale] 558 // [00 reg 100][ss index base] 559 assert !index.equals(rsp) : "illegal addressing mode"; 560 emitByte(0x04 | regenc); 561 emitByte(scale.log2 << 6 | indexenc | baseenc); 562 } else if (isByte(disp) && !force4Byte) { 563 // [base + indexscale + imm8] 564 // [01 reg 100][ss index base] imm8 565 assert !index.equals(rsp) : "illegal addressing mode"; 566 emitByte(0x44 | regenc); 567 emitByte(scale.log2 << 6 | indexenc | baseenc); 568 emitByte(disp & 0xFF); 569 } else { 570 // [base + indexscale + disp32] 571 // [10 reg 100][ss index base] disp32 572 assert !index.equals(rsp) : "illegal addressing mode"; 573 emitByte(0x84 | regenc); 574 emitByte(scale.log2 << 6 | indexenc | baseenc); 575 emitInt(disp); 576 } 577 } else if (base.equals(rsp) || base.equals(r12)) { 578 // [rsp + disp] 579 if (disp == 0) { 580 // [rsp] 581 // [00 reg 100][00 100 100] 582 emitByte(0x04 | regenc); 583 emitByte(0x24); 584 } else if (isByte(disp) && !force4Byte) { 585 // [rsp + imm8] 586 // [01 reg 100][00 100 100] disp8 587 emitByte(0x44 | regenc); 588 emitByte(0x24); 589 emitByte(disp & 0xFF); 590 } else { 591 // [rsp + imm32] 592 // [10 reg 100][00 100 100] disp32 593 emitByte(0x84 | regenc); 594 emitByte(0x24); 595 emitInt(disp); 596 } 597 } else { 598 // [base + disp] 599 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode"; 600 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 601 // [base] 602 // [00 reg base] 603 emitByte(0x00 | regenc | baseenc); 604 } else if (isByte(disp) && !force4Byte) { 605 // [base + disp8] 606 // [01 reg base] disp8 607 emitByte(0x40 | regenc | baseenc); 608 emitByte(disp & 0xFF); 609 } else { 610 // [base + disp32] 611 // [10 reg base] disp32 612 emitByte(0x80 | regenc | baseenc); 613 emitInt(disp); 614 } 615 } 616 } else { 617 if (index.isValid()) { 618 int indexenc = encode(index) << 3; 619 // [indexscale + disp] 620 // [00 reg 100][ss index 101] disp32 621 assert !index.equals(rsp) : "illegal addressing mode"; 622 emitByte(0x04 | regenc); 623 emitByte(scale.log2 << 6 | indexenc | 0x05); 624 emitInt(disp); 625 } else { 626 // [disp] ABSOLUTE 627 // [00 reg 100][00 100 101] disp32 628 emitByte(0x04 | regenc); 629 emitByte(0x25); 630 emitInt(disp); 631 } 632 } 633 setCurAttributes(null); 634 } 635 636 /** 637 * Base class for AMD64 opcodes. 638 */ 639 public static class AMD64Op { 640 641 protected static final int P_0F = 0x0F; 642 protected static final int P_0F38 = 0x380F; 643 protected static final int P_0F3A = 0x3A0F; 644 645 private final String opcode; 646 647 protected final int prefix1; 648 protected final int prefix2; 649 protected final int op; 650 651 private final boolean dstIsByte; 652 private final boolean srcIsByte; 653 654 private final OpAssertion assertion; 655 private final CPUFeature feature; 656 657 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 658 this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature); 659 } 660 661 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 662 this.opcode = opcode; 663 this.prefix1 = prefix1; 664 this.prefix2 = prefix2; 665 this.op = op; 666 667 this.dstIsByte = dstIsByte; 668 this.srcIsByte = srcIsByte; 669 670 this.assertion = assertion; 671 this.feature = feature; 672 } 673 674 protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) { 675 if (prefix1 != 0) { 676 asm.emitByte(prefix1); 677 } 678 if (size.sizePrefix != 0) { 679 asm.emitByte(size.sizePrefix); 680 } 681 int rexPrefix = 0x40 | rxb; 682 if (size == QWORD) { 683 rexPrefix |= 0x08; 684 } 685 if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) { 686 asm.emitByte(rexPrefix); 687 } 688 if (prefix2 > 0xFF) { 689 asm.emitShort(prefix2); 690 } else if (prefix2 > 0) { 691 asm.emitByte(prefix2); 692 } 693 asm.emitByte(op); 694 } 695 696 protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) { 697 assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode); 698 assert assertion.checkOperands(this, size, resultReg, inputReg); 699 return true; 700 } 701 702 @Override 703 public String toString() { 704 return opcode; 705 } 706 } 707 708 /** 709 * Base class for AMD64 opcodes with immediate operands. 710 */ 711 public static class AMD64ImmOp extends AMD64Op { 712 713 private final boolean immIsByte; 714 715 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) { 716 super(opcode, 0, prefix, op, assertion, null); 717 this.immIsByte = immIsByte; 718 } 719 720 protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) { 721 if (immIsByte) { 722 assert imm == (byte) imm; 723 asm.emitByte(imm); 724 } else { 725 size.emitImmediate(asm, imm); 726 } 727 } 728 729 protected final int immediateSize(OperandSize size) { 730 if (immIsByte) { 731 return 1; 732 } else { 733 return size.bytes; 734 } 735 } 736 } 737 738 /** 739 * Opcode with operand order of either RM or MR for 2 address forms. 740 */ 741 public abstract static class AMD64RROp extends AMD64Op { 742 743 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 744 super(opcode, prefix1, prefix2, op, assertion, feature); 745 } 746 747 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 748 super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature); 749 } 750 751 public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src); 752 } 753 754 /** 755 * Opcode with operand order of either RM or MR for 3 address forms. 756 */ 757 public abstract static class AMD64RRROp extends AMD64Op { 758 759 protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 760 super(opcode, prefix1, prefix2, op, assertion, feature); 761 } 762 763 protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 764 super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature); 765 } 766 767 public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src); 768 } 769 770 /** 771 * Opcode with operand order of RM. 772 */ 773 public static class AMD64RMOp extends AMD64RROp { 774 // @formatter:off 775 public static final AMD64RMOp IMUL = new AMD64RMOp("IMUL", P_0F, 0xAF); 776 public static final AMD64RMOp BSF = new AMD64RMOp("BSF", P_0F, 0xBC); 777 public static final AMD64RMOp BSR = new AMD64RMOp("BSR", P_0F, 0xBD); 778 public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, CPUFeature.POPCNT); 779 public static final AMD64RMOp TZCNT = new AMD64RMOp("TZCNT", 0xF3, P_0F, 0xBC, CPUFeature.BMI1); 780 public static final AMD64RMOp LZCNT = new AMD64RMOp("LZCNT", 0xF3, P_0F, 0xBD, CPUFeature.LZCNT); 781 public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB", P_0F, 0xB6, false, true, OpAssertion.IntegerAssertion); 782 public static final AMD64RMOp MOVZX = new AMD64RMOp("MOVZX", P_0F, 0xB7, OpAssertion.No16BitAssertion); 783 public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB", P_0F, 0xBE, false, true, OpAssertion.IntegerAssertion); 784 public static final AMD64RMOp MOVSX = new AMD64RMOp("MOVSX", P_0F, 0xBF, OpAssertion.No16BitAssertion); 785 public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD", 0x63, OpAssertion.QwordOnlyAssertion); 786 public static final AMD64RMOp MOVB = new AMD64RMOp("MOVB", 0x8A, OpAssertion.ByteAssertion); 787 public static final AMD64RMOp MOV = new AMD64RMOp("MOV", 0x8B); 788 789 // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix 790 public static final AMD64RMOp MOVD = new AMD64RMOp("MOVD", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2); 791 public static final AMD64RMOp MOVQ = new AMD64RMOp("MOVQ", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2); 792 public static final AMD64RMOp MOVSS = new AMD64RMOp("MOVSS", P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE); 793 public static final AMD64RMOp MOVSD = new AMD64RMOp("MOVSD", P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE); 794 795 // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient. 796 public static final AMD64RMOp TESTB = new AMD64RMOp("TEST", 0x84, OpAssertion.ByteAssertion); 797 public static final AMD64RMOp TEST = new AMD64RMOp("TEST", 0x85); 798 // @formatter:on 799 800 protected AMD64RMOp(String opcode, int op) { 801 this(opcode, 0, op); 802 } 803 804 protected AMD64RMOp(String opcode, int op, OpAssertion assertion) { 805 this(opcode, 0, op, assertion); 806 } 807 808 protected AMD64RMOp(String opcode, int prefix, int op) { 809 this(opcode, 0, prefix, op, null); 810 } 811 812 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) { 813 this(opcode, 0, prefix, op, assertion, null); 814 } 815 816 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 817 this(opcode, 0, prefix, op, assertion, feature); 818 } 819 820 protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) { 821 super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null); 822 } 823 824 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) { 825 this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature); 826 } 827 828 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 829 super(opcode, prefix1, prefix2, op, assertion, feature); 830 } 831 832 @Override 833 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 834 assert verify(asm, size, dst, src); 835 boolean isSimd = false; 836 boolean noNds = false; 837 838 switch (op) { 839 case 0x2A: 840 case 0x2C: 841 case 0x2E: 842 case 0x5A: 843 case 0x6E: 844 isSimd = true; 845 noNds = true; 846 break; 847 case 0x10: 848 case 0x51: 849 case 0x54: 850 case 0x55: 851 case 0x56: 852 case 0x57: 853 case 0x58: 854 case 0x59: 855 case 0x5C: 856 case 0x5D: 857 case 0x5E: 858 case 0x5F: 859 isSimd = true; 860 break; 861 } 862 863 if (isSimd) { 864 int pre; 865 int opc; 866 boolean rexVexW = (size == QWORD) ? true : false; 867 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 868 int curPrefix = size.sizePrefix | prefix1; 869 switch (curPrefix) { 870 case 0x66: 871 pre = VexSimdPrefix.VEX_SIMD_66; 872 break; 873 case 0xF2: 874 pre = VexSimdPrefix.VEX_SIMD_F2; 875 break; 876 case 0xF3: 877 pre = VexSimdPrefix.VEX_SIMD_F3; 878 break; 879 default: 880 pre = VexSimdPrefix.VEX_SIMD_NONE; 881 break; 882 } 883 switch (prefix2) { 884 case P_0F: 885 opc = VexOpcode.VEX_OPCODE_0F; 886 break; 887 case P_0F38: 888 opc = VexOpcode.VEX_OPCODE_0F_38; 889 break; 890 case P_0F3A: 891 opc = VexOpcode.VEX_OPCODE_0F_3A; 892 break; 893 default: 894 opc = VexOpcode.VEX_OPCODE_NONE; 895 break; 896 } 897 int encode; 898 if (noNds) { 899 encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes); 900 } else { 901 encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes); 902 } 903 asm.emitByte(op); 904 asm.emitByte(0xC0 | encode); 905 } else { 906 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 907 asm.emitModRM(dst, src); 908 } 909 } 910 911 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) { 912 assert verify(asm, size, dst, null); 913 boolean isSimd = false; 914 boolean noNds = false; 915 916 switch (op) { 917 case 0x10: 918 case 0x2A: 919 case 0x2C: 920 case 0x2E: 921 case 0x6E: 922 isSimd = true; 923 noNds = true; 924 break; 925 case 0x51: 926 case 0x54: 927 case 0x55: 928 case 0x56: 929 case 0x57: 930 case 0x58: 931 case 0x59: 932 case 0x5C: 933 case 0x5D: 934 case 0x5E: 935 case 0x5F: 936 isSimd = true; 937 break; 938 } 939 940 if (isSimd) { 941 int pre; 942 int opc; 943 boolean rexVexW = (size == QWORD) ? true : false; 944 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 945 int curPrefix = size.sizePrefix | prefix1; 946 switch (curPrefix) { 947 case 0x66: 948 pre = VexSimdPrefix.VEX_SIMD_66; 949 break; 950 case 0xF2: 951 pre = VexSimdPrefix.VEX_SIMD_F2; 952 break; 953 case 0xF3: 954 pre = VexSimdPrefix.VEX_SIMD_F3; 955 break; 956 default: 957 pre = VexSimdPrefix.VEX_SIMD_NONE; 958 break; 959 } 960 switch (prefix2) { 961 case P_0F: 962 opc = VexOpcode.VEX_OPCODE_0F; 963 break; 964 case P_0F38: 965 opc = VexOpcode.VEX_OPCODE_0F_38; 966 break; 967 case P_0F3A: 968 opc = VexOpcode.VEX_OPCODE_0F_3A; 969 break; 970 default: 971 opc = VexOpcode.VEX_OPCODE_NONE; 972 break; 973 } 974 if (noNds) { 975 asm.simdPrefix(dst, Register.None, src, pre, opc, attributes); 976 } else { 977 asm.simdPrefix(dst, dst, src, pre, opc, attributes); 978 } 979 asm.emitByte(op); 980 asm.emitOperandHelper(dst, src, 0); 981 } else { 982 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 983 asm.emitOperandHelper(dst, src, 0); 984 } 985 } 986 } 987 988 /** 989 * Opcode with operand order of RM. 990 */ 991 public static class AMD64RRMOp extends AMD64RRROp { 992 protected AMD64RRMOp(String opcode, int op) { 993 this(opcode, 0, op); 994 } 995 996 protected AMD64RRMOp(String opcode, int op, OpAssertion assertion) { 997 this(opcode, 0, op, assertion); 998 } 999 1000 protected AMD64RRMOp(String opcode, int prefix, int op) { 1001 this(opcode, 0, prefix, op, null); 1002 } 1003 1004 protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion) { 1005 this(opcode, 0, prefix, op, assertion, null); 1006 } 1007 1008 protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 1009 this(opcode, 0, prefix, op, assertion, feature); 1010 } 1011 1012 protected AMD64RRMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) { 1013 super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null); 1014 } 1015 1016 protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) { 1017 this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature); 1018 } 1019 1020 protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 1021 super(opcode, prefix1, prefix2, op, assertion, feature); 1022 } 1023 1024 @Override 1025 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src) { 1026 assert verify(asm, size, dst, src); 1027 int pre; 1028 int opc; 1029 boolean rexVexW = (size == QWORD) ? true : false; 1030 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1031 int curPrefix = size.sizePrefix | prefix1; 1032 switch (curPrefix) { 1033 case 0x66: 1034 pre = VexSimdPrefix.VEX_SIMD_66; 1035 break; 1036 case 0xF2: 1037 pre = VexSimdPrefix.VEX_SIMD_F2; 1038 break; 1039 case 0xF3: 1040 pre = VexSimdPrefix.VEX_SIMD_F3; 1041 break; 1042 default: 1043 pre = VexSimdPrefix.VEX_SIMD_NONE; 1044 break; 1045 } 1046 switch (prefix2) { 1047 case P_0F: 1048 opc = VexOpcode.VEX_OPCODE_0F; 1049 break; 1050 case P_0F38: 1051 opc = VexOpcode.VEX_OPCODE_0F_38; 1052 break; 1053 case P_0F3A: 1054 opc = VexOpcode.VEX_OPCODE_0F_3A; 1055 break; 1056 default: 1057 opc = VexOpcode.VEX_OPCODE_NONE; 1058 break; 1059 } 1060 int encode; 1061 encode = asm.simdPrefixAndEncode(dst, nds, src, pre, opc, attributes); 1062 asm.emitByte(op); 1063 asm.emitByte(0xC0 | encode); 1064 } 1065 1066 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, AMD64Address src) { 1067 assert verify(asm, size, dst, null); 1068 int pre; 1069 int opc; 1070 boolean rexVexW = (size == QWORD) ? true : false; 1071 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1072 int curPrefix = size.sizePrefix | prefix1; 1073 switch (curPrefix) { 1074 case 0x66: 1075 pre = VexSimdPrefix.VEX_SIMD_66; 1076 break; 1077 case 0xF2: 1078 pre = VexSimdPrefix.VEX_SIMD_F2; 1079 break; 1080 case 0xF3: 1081 pre = VexSimdPrefix.VEX_SIMD_F3; 1082 break; 1083 default: 1084 pre = VexSimdPrefix.VEX_SIMD_NONE; 1085 break; 1086 } 1087 switch (prefix2) { 1088 case P_0F: 1089 opc = VexOpcode.VEX_OPCODE_0F; 1090 break; 1091 case P_0F38: 1092 opc = VexOpcode.VEX_OPCODE_0F_38; 1093 break; 1094 case P_0F3A: 1095 opc = VexOpcode.VEX_OPCODE_0F_3A; 1096 break; 1097 default: 1098 opc = VexOpcode.VEX_OPCODE_NONE; 1099 break; 1100 } 1101 asm.simdPrefix(dst, nds, src, pre, opc, attributes); 1102 asm.emitByte(op); 1103 asm.emitOperandHelper(dst, src, 0); 1104 } 1105 } 1106 1107 /** 1108 * Opcode with operand order of MR. 1109 */ 1110 public static class AMD64MROp extends AMD64RROp { 1111 // @formatter:off 1112 public static final AMD64MROp MOVB = new AMD64MROp("MOVB", 0x88, OpAssertion.ByteAssertion); 1113 public static final AMD64MROp MOV = new AMD64MROp("MOV", 0x89); 1114 1115 // MOVD and MOVQ are the same opcode, just with different operand size prefix 1116 // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used. 1117 public static final AMD64MROp MOVD = new AMD64MROp("MOVD", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2); 1118 public static final AMD64MROp MOVQ = new AMD64MROp("MOVQ", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2); 1119 1120 // MOVSS and MOVSD are the same opcode, just with different operand size prefix 1121 public static final AMD64MROp MOVSS = new AMD64MROp("MOVSS", P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE); 1122 public static final AMD64MROp MOVSD = new AMD64MROp("MOVSD", P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE); 1123 // @formatter:on 1124 1125 protected AMD64MROp(String opcode, int op) { 1126 this(opcode, 0, op); 1127 } 1128 1129 protected AMD64MROp(String opcode, int op, OpAssertion assertion) { 1130 this(opcode, 0, op, assertion); 1131 } 1132 1133 protected AMD64MROp(String opcode, int prefix, int op) { 1134 this(opcode, prefix, op, OpAssertion.IntegerAssertion); 1135 } 1136 1137 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) { 1138 this(opcode, prefix, op, assertion, null); 1139 } 1140 1141 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 1142 this(opcode, 0, prefix, op, assertion, feature); 1143 } 1144 1145 protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 1146 super(opcode, prefix1, prefix2, op, assertion, feature); 1147 } 1148 1149 @Override 1150 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 1151 assert verify(asm, size, src, dst); 1152 boolean isSimd = false; 1153 boolean noNds = false; 1154 1155 switch (op) { 1156 case 0x7E: 1157 isSimd = true; 1158 noNds = true; 1159 break; 1160 case 0x11: 1161 isSimd = true; 1162 break; 1163 } 1164 1165 if (isSimd) { 1166 int pre; 1167 int opc; 1168 boolean rexVexW = (size == QWORD) ? true : false; 1169 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1170 int curPrefix = size.sizePrefix | prefix1; 1171 switch (curPrefix) { 1172 case 0x66: 1173 pre = VexSimdPrefix.VEX_SIMD_66; 1174 break; 1175 case 0xF2: 1176 pre = VexSimdPrefix.VEX_SIMD_F2; 1177 break; 1178 case 0xF3: 1179 pre = VexSimdPrefix.VEX_SIMD_F3; 1180 break; 1181 default: 1182 pre = VexSimdPrefix.VEX_SIMD_NONE; 1183 break; 1184 } 1185 switch (prefix2) { 1186 case P_0F: 1187 opc = VexOpcode.VEX_OPCODE_0F; 1188 break; 1189 case P_0F38: 1190 opc = VexOpcode.VEX_OPCODE_0F_38; 1191 break; 1192 case P_0F3A: 1193 opc = VexOpcode.VEX_OPCODE_0F_3A; 1194 break; 1195 default: 1196 opc = VexOpcode.VEX_OPCODE_NONE; 1197 break; 1198 } 1199 int encode; 1200 if (noNds) { 1201 encode = asm.simdPrefixAndEncode(src, Register.None, dst, pre, opc, attributes); 1202 } else { 1203 encode = asm.simdPrefixAndEncode(src, src, dst, pre, opc, attributes); 1204 } 1205 asm.emitByte(op); 1206 asm.emitByte(0xC0 | encode); 1207 } else { 1208 emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding); 1209 asm.emitModRM(src, dst); 1210 } 1211 } 1212 1213 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) { 1214 assert verify(asm, size, null, src); 1215 boolean isSimd = false; 1216 1217 switch (op) { 1218 case 0x7E: 1219 case 0x11: 1220 isSimd = true; 1221 break; 1222 } 1223 1224 if (isSimd) { 1225 int pre; 1226 int opc; 1227 boolean rexVexW = (size == QWORD) ? true : false; 1228 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1229 int curPrefix = size.sizePrefix | prefix1; 1230 switch (curPrefix) { 1231 case 0x66: 1232 pre = VexSimdPrefix.VEX_SIMD_66; 1233 break; 1234 case 0xF2: 1235 pre = VexSimdPrefix.VEX_SIMD_F2; 1236 break; 1237 case 0xF3: 1238 pre = VexSimdPrefix.VEX_SIMD_F3; 1239 break; 1240 default: 1241 pre = VexSimdPrefix.VEX_SIMD_NONE; 1242 break; 1243 } 1244 switch (prefix2) { 1245 case P_0F: 1246 opc = VexOpcode.VEX_OPCODE_0F; 1247 break; 1248 case P_0F38: 1249 opc = VexOpcode.VEX_OPCODE_0F_38; 1250 break; 1251 case P_0F3A: 1252 opc = VexOpcode.VEX_OPCODE_0F_3A; 1253 break; 1254 default: 1255 opc = VexOpcode.VEX_OPCODE_NONE; 1256 break; 1257 } 1258 asm.simdPrefix(src, Register.None, dst, pre, opc, attributes); 1259 asm.emitByte(op); 1260 asm.emitOperandHelper(src, dst, 0); 1261 } else { 1262 emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0); 1263 asm.emitOperandHelper(src, dst, 0); 1264 } 1265 } 1266 } 1267 1268 /** 1269 * Opcodes with operand order of M. 1270 */ 1271 public static class AMD64MOp extends AMD64Op { 1272 // @formatter:off 1273 public static final AMD64MOp NOT = new AMD64MOp("NOT", 0xF7, 2); 1274 public static final AMD64MOp NEG = new AMD64MOp("NEG", 0xF7, 3); 1275 public static final AMD64MOp MUL = new AMD64MOp("MUL", 0xF7, 4); 1276 public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5); 1277 public static final AMD64MOp DIV = new AMD64MOp("DIV", 0xF7, 6); 1278 public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7); 1279 public static final AMD64MOp INC = new AMD64MOp("INC", 0xFF, 0); 1280 public static final AMD64MOp DEC = new AMD64MOp("DEC", 0xFF, 1); 1281 public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6); 1282 public static final AMD64MOp POP = new AMD64MOp("POP", 0x8F, 0, OpAssertion.No32BitAssertion); 1283 // @formatter:on 1284 1285 private final int ext; 1286 1287 protected AMD64MOp(String opcode, int op, int ext) { 1288 this(opcode, 0, op, ext); 1289 } 1290 1291 protected AMD64MOp(String opcode, int prefix, int op, int ext) { 1292 this(opcode, prefix, op, ext, OpAssertion.IntegerAssertion); 1293 } 1294 1295 protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) { 1296 this(opcode, 0, op, ext, assertion); 1297 } 1298 1299 protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) { 1300 super(opcode, 0, prefix, op, assertion, null); 1301 this.ext = ext; 1302 } 1303 1304 public final void emit(AMD64Assembler asm, OperandSize size, Register dst) { 1305 assert verify(asm, size, dst, null); 1306 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 1307 asm.emitModRM(ext, dst); 1308 } 1309 1310 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) { 1311 assert verify(asm, size, null, null); 1312 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 1313 asm.emitOperandHelper(ext, dst, 0); 1314 } 1315 } 1316 1317 /** 1318 * Opcodes with operand order of MI. 1319 */ 1320 public static class AMD64MIOp extends AMD64ImmOp { 1321 // @formatter:off 1322 public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true, 0xC6, 0, OpAssertion.ByteAssertion); 1323 public static final AMD64MIOp MOV = new AMD64MIOp("MOV", false, 0xC7, 0); 1324 public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0); 1325 // @formatter:on 1326 1327 private final int ext; 1328 1329 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) { 1330 this(opcode, immIsByte, op, ext, OpAssertion.IntegerAssertion); 1331 } 1332 1333 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) { 1334 this(opcode, immIsByte, 0, op, ext, assertion); 1335 } 1336 1337 protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) { 1338 super(opcode, immIsByte, prefix, op, assertion); 1339 this.ext = ext; 1340 } 1341 1342 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) { 1343 assert verify(asm, size, dst, null); 1344 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 1345 asm.emitModRM(ext, dst); 1346 emitImmediate(asm, size, imm); 1347 } 1348 1349 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) { 1350 assert verify(asm, size, null, null); 1351 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 1352 asm.emitOperandHelper(ext, dst, immediateSize(size)); 1353 emitImmediate(asm, size, imm); 1354 } 1355 } 1356 1357 /** 1358 * Opcodes with operand order of RMI. 1359 * 1360 * We only have one form of round as the operation is always treated with single variant input, 1361 * making its extension to 3 address forms redundant. 1362 */ 1363 public static class AMD64RMIOp extends AMD64ImmOp { 1364 // @formatter:off 1365 public static final AMD64RMIOp IMUL = new AMD64RMIOp("IMUL", false, 0x69); 1366 public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true, 0x6B); 1367 public static final AMD64RMIOp ROUNDSS = new AMD64RMIOp("ROUNDSS", true, P_0F3A, 0x0A, OpAssertion.PackedDoubleAssertion); 1368 public static final AMD64RMIOp ROUNDSD = new AMD64RMIOp("ROUNDSD", true, P_0F3A, 0x0B, OpAssertion.PackedDoubleAssertion); 1369 // @formatter:on 1370 1371 protected AMD64RMIOp(String opcode, boolean immIsByte, int op) { 1372 this(opcode, immIsByte, 0, op, OpAssertion.IntegerAssertion); 1373 } 1374 1375 protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) { 1376 super(opcode, immIsByte, prefix, op, assertion); 1377 } 1378 1379 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) { 1380 assert verify(asm, size, dst, src); 1381 boolean isSimd = false; 1382 boolean noNds = false; 1383 1384 switch (op) { 1385 case 0x0A: 1386 case 0x0B: 1387 isSimd = true; 1388 noNds = true; 1389 break; 1390 } 1391 1392 if (isSimd) { 1393 int pre; 1394 int opc; 1395 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1396 int curPrefix = size.sizePrefix | prefix1; 1397 switch (curPrefix) { 1398 case 0x66: 1399 pre = VexSimdPrefix.VEX_SIMD_66; 1400 break; 1401 case 0xF2: 1402 pre = VexSimdPrefix.VEX_SIMD_F2; 1403 break; 1404 case 0xF3: 1405 pre = VexSimdPrefix.VEX_SIMD_F3; 1406 break; 1407 default: 1408 pre = VexSimdPrefix.VEX_SIMD_NONE; 1409 break; 1410 } 1411 switch (prefix2) { 1412 case P_0F: 1413 opc = VexOpcode.VEX_OPCODE_0F; 1414 break; 1415 case P_0F38: 1416 opc = VexOpcode.VEX_OPCODE_0F_38; 1417 break; 1418 case P_0F3A: 1419 opc = VexOpcode.VEX_OPCODE_0F_3A; 1420 break; 1421 default: 1422 opc = VexOpcode.VEX_OPCODE_NONE; 1423 break; 1424 } 1425 int encode; 1426 if (noNds) { 1427 encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes); 1428 } else { 1429 encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes); 1430 } 1431 asm.emitByte(op); 1432 asm.emitByte(0xC0 | encode); 1433 emitImmediate(asm, size, imm); 1434 } else { 1435 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 1436 asm.emitModRM(dst, src); 1437 emitImmediate(asm, size, imm); 1438 } 1439 } 1440 1441 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) { 1442 assert verify(asm, size, dst, null); 1443 1444 boolean isSimd = false; 1445 boolean noNds = false; 1446 1447 switch (op) { 1448 case 0x0A: 1449 case 0x0B: 1450 isSimd = true; 1451 noNds = true; 1452 break; 1453 } 1454 1455 if (isSimd) { 1456 int pre; 1457 int opc; 1458 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1459 int curPrefix = size.sizePrefix | prefix1; 1460 switch (curPrefix) { 1461 case 0x66: 1462 pre = VexSimdPrefix.VEX_SIMD_66; 1463 break; 1464 case 0xF2: 1465 pre = VexSimdPrefix.VEX_SIMD_F2; 1466 break; 1467 case 0xF3: 1468 pre = VexSimdPrefix.VEX_SIMD_F3; 1469 break; 1470 default: 1471 pre = VexSimdPrefix.VEX_SIMD_NONE; 1472 break; 1473 } 1474 switch (prefix2) { 1475 case P_0F: 1476 opc = VexOpcode.VEX_OPCODE_0F; 1477 break; 1478 case P_0F38: 1479 opc = VexOpcode.VEX_OPCODE_0F_38; 1480 break; 1481 case P_0F3A: 1482 opc = VexOpcode.VEX_OPCODE_0F_3A; 1483 break; 1484 default: 1485 opc = VexOpcode.VEX_OPCODE_NONE; 1486 break; 1487 } 1488 1489 if (noNds) { 1490 asm.simdPrefix(dst, Register.None, src, pre, opc, attributes); 1491 } else { 1492 asm.simdPrefix(dst, dst, src, pre, opc, attributes); 1493 } 1494 asm.emitByte(op); 1495 asm.emitOperandHelper(dst, src, immediateSize(size)); 1496 emitImmediate(asm, size, imm); 1497 } else { 1498 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 1499 asm.emitOperandHelper(dst, src, immediateSize(size)); 1500 emitImmediate(asm, size, imm); 1501 } 1502 } 1503 } 1504 1505 public static class SSEOp extends AMD64RMOp { 1506 // @formatter:off 1507 public static final SSEOp CVTSI2SS = new SSEOp("CVTSI2SS", 0xF3, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion); 1508 public static final SSEOp CVTSI2SD = new SSEOp("CVTSI2SS", 0xF2, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion); 1509 public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion); 1510 public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion); 1511 public static final SSEOp UCOMIS = new SSEOp("UCOMIS", P_0F, 0x2E, OpAssertion.PackedFloatingAssertion); 1512 public static final SSEOp SQRT = new SSEOp("SQRT", P_0F, 0x51); 1513 public static final SSEOp AND = new SSEOp("AND", P_0F, 0x54, OpAssertion.PackedFloatingAssertion); 1514 public static final SSEOp ANDN = new SSEOp("ANDN", P_0F, 0x55, OpAssertion.PackedFloatingAssertion); 1515 public static final SSEOp OR = new SSEOp("OR", P_0F, 0x56, OpAssertion.PackedFloatingAssertion); 1516 public static final SSEOp XOR = new SSEOp("XOR", P_0F, 0x57, OpAssertion.PackedFloatingAssertion); 1517 public static final SSEOp ADD = new SSEOp("ADD", P_0F, 0x58); 1518 public static final SSEOp MUL = new SSEOp("MUL", P_0F, 0x59); 1519 public static final SSEOp CVTSS2SD = new SSEOp("CVTSS2SD", P_0F, 0x5A, OpAssertion.SingleAssertion); 1520 public static final SSEOp CVTSD2SS = new SSEOp("CVTSD2SS", P_0F, 0x5A, OpAssertion.DoubleAssertion); 1521 public static final SSEOp SUB = new SSEOp("SUB", P_0F, 0x5C); 1522 public static final SSEOp MIN = new SSEOp("MIN", P_0F, 0x5D); 1523 public static final SSEOp DIV = new SSEOp("DIV", P_0F, 0x5E); 1524 public static final SSEOp MAX = new SSEOp("MAX", P_0F, 0x5F); 1525 // @formatter:on 1526 1527 protected SSEOp(String opcode, int prefix, int op) { 1528 this(opcode, prefix, op, OpAssertion.FloatingAssertion); 1529 } 1530 1531 protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) { 1532 this(opcode, 0, prefix, op, assertion); 1533 } 1534 1535 protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) { 1536 super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2); 1537 } 1538 } 1539 1540 public static class AVXOp extends AMD64RRMOp { 1541 // @formatter:off 1542 public static final AVXOp AND = new AVXOp("AND", P_0F, 0x54, OpAssertion.PackedFloatingAssertion); 1543 public static final AVXOp ANDN = new AVXOp("ANDN", P_0F, 0x55, OpAssertion.PackedFloatingAssertion); 1544 public static final AVXOp OR = new AVXOp("OR", P_0F, 0x56, OpAssertion.PackedFloatingAssertion); 1545 public static final AVXOp XOR = new AVXOp("XOR", P_0F, 0x57, OpAssertion.PackedFloatingAssertion); 1546 public static final AVXOp ADD = new AVXOp("ADD", P_0F, 0x58); 1547 public static final AVXOp MUL = new AVXOp("MUL", P_0F, 0x59); 1548 public static final AVXOp SUB = new AVXOp("SUB", P_0F, 0x5C); 1549 public static final AVXOp MIN = new AVXOp("MIN", P_0F, 0x5D); 1550 public static final AVXOp DIV = new AVXOp("DIV", P_0F, 0x5E); 1551 public static final AVXOp MAX = new AVXOp("MAX", P_0F, 0x5F); 1552 // @formatter:on 1553 1554 protected AVXOp(String opcode, int prefix, int op) { 1555 this(opcode, prefix, op, OpAssertion.FloatingAssertion); 1556 } 1557 1558 protected AVXOp(String opcode, int prefix, int op, OpAssertion assertion) { 1559 this(opcode, 0, prefix, op, assertion); 1560 } 1561 1562 protected AVXOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) { 1563 super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.AVX); 1564 } 1565 } 1566 1567 /** 1568 * Arithmetic operation with operand order of RM, MR or MI. 1569 */ 1570 public static final class AMD64BinaryArithmetic { 1571 // @formatter:off 1572 public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0); 1573 public static final AMD64BinaryArithmetic OR = new AMD64BinaryArithmetic("OR", 1); 1574 public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2); 1575 public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3); 1576 public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4); 1577 public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5); 1578 public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6); 1579 public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7); 1580 // @formatter:on 1581 1582 private final AMD64MIOp byteImmOp; 1583 private final AMD64MROp byteMrOp; 1584 private final AMD64RMOp byteRmOp; 1585 1586 private final AMD64MIOp immOp; 1587 private final AMD64MIOp immSxOp; 1588 private final AMD64MROp mrOp; 1589 private final AMD64RMOp rmOp; 1590 1591 private AMD64BinaryArithmetic(String opcode, int code) { 1592 int baseOp = code << 3; 1593 1594 byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion); 1595 byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion); 1596 byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion); 1597 1598 immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.IntegerAssertion); 1599 immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.IntegerAssertion); 1600 mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.IntegerAssertion); 1601 rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.IntegerAssertion); 1602 } 1603 1604 public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) { 1605 if (size == BYTE) { 1606 return byteImmOp; 1607 } else if (sx) { 1608 return immSxOp; 1609 } else { 1610 return immOp; 1611 } 1612 } 1613 1614 public AMD64MROp getMROpcode(OperandSize size) { 1615 if (size == BYTE) { 1616 return byteMrOp; 1617 } else { 1618 return mrOp; 1619 } 1620 } 1621 1622 public AMD64RMOp getRMOpcode(OperandSize size) { 1623 if (size == BYTE) { 1624 return byteRmOp; 1625 } else { 1626 return rmOp; 1627 } 1628 } 1629 } 1630 1631 /** 1632 * Shift operation with operand order of M1, MC or MI. 1633 */ 1634 public static final class AMD64Shift { 1635 // @formatter:off 1636 public static final AMD64Shift ROL = new AMD64Shift("ROL", 0); 1637 public static final AMD64Shift ROR = new AMD64Shift("ROR", 1); 1638 public static final AMD64Shift RCL = new AMD64Shift("RCL", 2); 1639 public static final AMD64Shift RCR = new AMD64Shift("RCR", 3); 1640 public static final AMD64Shift SHL = new AMD64Shift("SHL", 4); 1641 public static final AMD64Shift SHR = new AMD64Shift("SHR", 5); 1642 public static final AMD64Shift SAR = new AMD64Shift("SAR", 7); 1643 // @formatter:on 1644 1645 public final AMD64MOp m1Op; 1646 public final AMD64MOp mcOp; 1647 public final AMD64MIOp miOp; 1648 1649 private AMD64Shift(String opcode, int code) { 1650 m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.IntegerAssertion); 1651 mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.IntegerAssertion); 1652 miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.IntegerAssertion); 1653 } 1654 } 1655 1656 public final void addl(AMD64Address dst, int imm32) { 1657 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1658 } 1659 1660 public final void addl(Register dst, int imm32) { 1661 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1662 } 1663 1664 public final void addl(Register dst, Register src) { 1665 ADD.rmOp.emit(this, DWORD, dst, src); 1666 } 1667 1668 public final void addpd(Register dst, Register src) { 1669 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 1670 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1671 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 1672 emitByte(0x58); 1673 emitByte(0xC0 | encode); 1674 } 1675 1676 public final void addpd(Register dst, AMD64Address src) { 1677 assert dst.getRegisterCategory().equals(AMD64.XMM); 1678 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1679 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 1680 emitByte(0x58); 1681 emitOperandHelper(dst, src, 0); 1682 } 1683 1684 public final void addsd(Register dst, Register src) { 1685 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 1686 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1687 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 1688 emitByte(0x58); 1689 emitByte(0xC0 | encode); 1690 } 1691 1692 public final void addsd(Register dst, AMD64Address src) { 1693 assert dst.getRegisterCategory().equals(AMD64.XMM); 1694 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1695 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 1696 emitByte(0x58); 1697 emitOperandHelper(dst, src, 0); 1698 } 1699 1700 private void addrNop4() { 1701 // 4 bytes: NOP DWORD PTR [EAX+0] 1702 emitByte(0x0F); 1703 emitByte(0x1F); 1704 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc); 1705 emitByte(0); // 8-bits offset (1 byte) 1706 } 1707 1708 private void addrNop5() { 1709 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 1710 emitByte(0x0F); 1711 emitByte(0x1F); 1712 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4); 1713 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1714 emitByte(0); // 8-bits offset (1 byte) 1715 } 1716 1717 private void addrNop7() { 1718 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 1719 emitByte(0x0F); 1720 emitByte(0x1F); 1721 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc); 1722 emitInt(0); // 32-bits offset (4 bytes) 1723 } 1724 1725 private void addrNop8() { 1726 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 1727 emitByte(0x0F); 1728 emitByte(0x1F); 1729 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4); 1730 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1731 emitInt(0); // 32-bits offset (4 bytes) 1732 } 1733 1734 public final void andl(Register dst, int imm32) { 1735 AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1736 } 1737 1738 public final void andl(Register dst, Register src) { 1739 AND.rmOp.emit(this, DWORD, dst, src); 1740 } 1741 1742 public final void andpd(Register dst, Register src) { 1743 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 1744 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1745 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 1746 emitByte(0x54); 1747 emitByte(0xC0 | encode); 1748 } 1749 1750 public final void andpd(Register dst, AMD64Address src) { 1751 assert dst.getRegisterCategory().equals(AMD64.XMM); 1752 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1753 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 1754 emitByte(0x54); 1755 emitOperandHelper(dst, src, 0); 1756 } 1757 1758 public final void bsrl(Register dst, Register src) { 1759 int encode = prefixAndEncode(dst.encoding(), src.encoding()); 1760 emitByte(0x0F); 1761 emitByte(0xBD); 1762 emitByte(0xC0 | encode); 1763 } 1764 1765 public final void bswapl(Register reg) { 1766 int encode = prefixAndEncode(reg.encoding); 1767 emitByte(0x0F); 1768 emitByte(0xC8 | encode); 1769 } 1770 1771 public final void cdql() { 1772 emitByte(0x99); 1773 } 1774 1775 public final void cmovl(ConditionFlag cc, Register dst, Register src) { 1776 int encode = prefixAndEncode(dst.encoding, src.encoding); 1777 emitByte(0x0F); 1778 emitByte(0x40 | cc.getValue()); 1779 emitByte(0xC0 | encode); 1780 } 1781 1782 public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) { 1783 prefix(src, dst); 1784 emitByte(0x0F); 1785 emitByte(0x40 | cc.getValue()); 1786 emitOperandHelper(dst, src, 0); 1787 } 1788 1789 public final void cmpl(Register dst, int imm32) { 1790 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1791 } 1792 1793 public final void cmpl(Register dst, Register src) { 1794 CMP.rmOp.emit(this, DWORD, dst, src); 1795 } 1796 1797 public final void cmpl(Register dst, AMD64Address src) { 1798 CMP.rmOp.emit(this, DWORD, dst, src); 1799 } 1800 1801 public final void cmpl(AMD64Address dst, int imm32) { 1802 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1803 } 1804 1805 // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax, 1806 // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,. 1807 // The ZF is set if the compared values were equal, and cleared otherwise. 1808 public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg 1809 prefix(adr, reg); 1810 emitByte(0x0F); 1811 emitByte(0xB1); 1812 emitOperandHelper(reg, adr, 0); 1813 } 1814 1815 public final void cvtsi2sdl(Register dst, Register src) { 1816 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU); 1817 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1818 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 1819 emitByte(0x2A); 1820 emitByte(0xC0 | encode); 1821 } 1822 1823 public final void cvttsd2sil(Register dst, Register src) { 1824 assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM); 1825 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1826 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 1827 emitByte(0x2C); 1828 emitByte(0xC0 | encode); 1829 } 1830 1831 protected final void decl(AMD64Address dst) { 1832 prefix(dst); 1833 emitByte(0xFF); 1834 emitOperandHelper(1, dst, 0); 1835 } 1836 1837 public final void divsd(Register dst, Register src) { 1838 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 1839 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1840 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 1841 emitByte(0x5E); 1842 emitByte(0xC0 | encode); 1843 } 1844 1845 public final void hlt() { 1846 emitByte(0xF4); 1847 } 1848 1849 public final void imull(Register dst, Register src, int value) { 1850 if (isByte(value)) { 1851 AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value); 1852 } else { 1853 AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value); 1854 } 1855 } 1856 1857 protected final void incl(AMD64Address dst) { 1858 prefix(dst); 1859 emitByte(0xFF); 1860 emitOperandHelper(0, dst, 0); 1861 } 1862 1863 public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { 1864 int shortSize = 2; 1865 int longSize = 6; 1866 long disp = jumpTarget - position(); 1867 if (!forceDisp32 && isByte(disp - shortSize)) { 1868 // 0111 tttn #8-bit disp 1869 emitByte(0x70 | cc.getValue()); 1870 emitByte((int) ((disp - shortSize) & 0xFF)); 1871 } else { 1872 // 0000 1111 1000 tttn #32-bit disp 1873 assert isInt(disp - longSize) : "must be 32bit offset (call4)"; 1874 emitByte(0x0F); 1875 emitByte(0x80 | cc.getValue()); 1876 emitInt((int) (disp - longSize)); 1877 } 1878 } 1879 1880 public final void jcc(ConditionFlag cc, Label l) { 1881 assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc"; 1882 if (l.isBound()) { 1883 jcc(cc, l.position(), false); 1884 } else { 1885 // Note: could eliminate cond. jumps to this jump if condition 1886 // is the same however, seems to be rather unlikely case. 1887 // Note: use jccb() if label to be bound is very close to get 1888 // an 8-bit displacement 1889 l.addPatchAt(position()); 1890 emitByte(0x0F); 1891 emitByte(0x80 | cc.getValue()); 1892 emitInt(0); 1893 } 1894 1895 } 1896 1897 public final void jccb(ConditionFlag cc, Label l) { 1898 if (l.isBound()) { 1899 int shortSize = 2; 1900 int entry = l.position(); 1901 assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp"; 1902 long disp = entry - position(); 1903 // 0111 tttn #8-bit disp 1904 emitByte(0x70 | cc.getValue()); 1905 emitByte((int) ((disp - shortSize) & 0xFF)); 1906 } else { 1907 l.addPatchAt(position()); 1908 emitByte(0x70 | cc.getValue()); 1909 emitByte(0); 1910 } 1911 } 1912 1913 public final void jmp(int jumpTarget, boolean forceDisp32) { 1914 int shortSize = 2; 1915 int longSize = 5; 1916 long disp = jumpTarget - position(); 1917 if (!forceDisp32 && isByte(disp - shortSize)) { 1918 emitByte(0xEB); 1919 emitByte((int) ((disp - shortSize) & 0xFF)); 1920 } else { 1921 emitByte(0xE9); 1922 emitInt((int) (disp - longSize)); 1923 } 1924 } 1925 1926 @Override 1927 public final void jmp(Label l) { 1928 if (l.isBound()) { 1929 jmp(l.position(), false); 1930 } else { 1931 // By default, forward jumps are always 32-bit displacements, since 1932 // we can't yet know where the label will be bound. If you're sure that 1933 // the forward jump will not run beyond 256 bytes, use jmpb to 1934 // force an 8-bit displacement. 1935 1936 l.addPatchAt(position()); 1937 emitByte(0xE9); 1938 emitInt(0); 1939 } 1940 } 1941 1942 public final void jmp(Register entry) { 1943 int encode = prefixAndEncode(entry.encoding); 1944 emitByte(0xFF); 1945 emitByte(0xE0 | encode); 1946 } 1947 1948 public final void jmp(AMD64Address adr) { 1949 prefix(adr); 1950 emitByte(0xFF); 1951 emitOperandHelper(rsp, adr, 0); 1952 } 1953 1954 public final void jmpb(Label l) { 1955 if (l.isBound()) { 1956 int shortSize = 2; 1957 int entry = l.position(); 1958 assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp"; 1959 long offs = entry - position(); 1960 emitByte(0xEB); 1961 emitByte((int) ((offs - shortSize) & 0xFF)); 1962 } else { 1963 1964 l.addPatchAt(position()); 1965 emitByte(0xEB); 1966 emitByte(0); 1967 } 1968 } 1969 1970 public final void leaq(Register dst, AMD64Address src) { 1971 prefixq(src, dst); 1972 emitByte(0x8D); 1973 emitOperandHelper(dst, src, 0); 1974 } 1975 1976 public final void leave() { 1977 emitByte(0xC9); 1978 } 1979 1980 public final void lock() { 1981 emitByte(0xF0); 1982 } 1983 1984 public final void movapd(Register dst, Register src) { 1985 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 1986 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1987 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 1988 emitByte(0x28); 1989 emitByte(0xC0 | encode); 1990 } 1991 1992 public final void movaps(Register dst, Register src) { 1993 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 1994 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1995 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes); 1996 emitByte(0x28); 1997 emitByte(0xC0 | encode); 1998 } 1999 2000 public final void movb(AMD64Address dst, int imm8) { 2001 prefix(dst); 2002 emitByte(0xC6); 2003 emitOperandHelper(0, dst, 1); 2004 emitByte(imm8); 2005 } 2006 2007 public final void movb(AMD64Address dst, Register src) { 2008 assert src.getRegisterCategory().equals(AMD64.CPU) : "must have byte register"; 2009 prefix(dst, src, true); 2010 emitByte(0x88); 2011 emitOperandHelper(src, dst, 0); 2012 } 2013 2014 public final void movl(Register dst, int imm32) { 2015 int encode = prefixAndEncode(dst.encoding); 2016 emitByte(0xB8 | encode); 2017 emitInt(imm32); 2018 } 2019 2020 public final void movl(Register dst, Register src) { 2021 int encode = prefixAndEncode(dst.encoding, src.encoding); 2022 emitByte(0x8B); 2023 emitByte(0xC0 | encode); 2024 } 2025 2026 public final void movl(Register dst, AMD64Address src) { 2027 prefix(src, dst); 2028 emitByte(0x8B); 2029 emitOperandHelper(dst, src, 0); 2030 } 2031 2032 public final void movl(AMD64Address dst, int imm32) { 2033 prefix(dst); 2034 emitByte(0xC7); 2035 emitOperandHelper(0, dst, 4); 2036 emitInt(imm32); 2037 } 2038 2039 public final void movl(AMD64Address dst, Register src) { 2040 prefix(dst, src); 2041 emitByte(0x89); 2042 emitOperandHelper(src, dst, 0); 2043 } 2044 2045 /** 2046 * New CPUs require use of movsd and movss to avoid partial register stall when loading from 2047 * memory. But for old Opteron use movlpd instead of movsd. The selection is done in 2048 * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and 2049 * {@link AMD64MacroAssembler#movflt(Register, Register)}. 2050 */ 2051 public final void movlpd(Register dst, AMD64Address src) { 2052 assert dst.getRegisterCategory().equals(AMD64.XMM); 2053 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2054 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2055 emitByte(0x12); 2056 emitOperandHelper(dst, src, 0); 2057 } 2058 2059 public final void movlhps(Register dst, Register src) { 2060 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2061 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2062 int encode = simdPrefixAndEncode(dst, src, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes); 2063 emitByte(0x16); 2064 emitByte(0xC0 | encode); 2065 } 2066 2067 public final void movq(Register dst, AMD64Address src) { 2068 movq(dst, src, false); 2069 } 2070 2071 public final void movq(Register dst, AMD64Address src, boolean wide) { 2072 if (dst.getRegisterCategory().equals(AMD64.XMM)) { 2073 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ wide, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2074 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 2075 emitByte(0x7E); 2076 emitOperandHelper(dst, src, wide, 0); 2077 } else { 2078 // gpr version of movq 2079 prefixq(src, dst); 2080 emitByte(0x8B); 2081 emitOperandHelper(dst, src, wide, 0); 2082 } 2083 } 2084 2085 public final void movq(Register dst, Register src) { 2086 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2087 emitByte(0x8B); 2088 emitByte(0xC0 | encode); 2089 } 2090 2091 public final void movq(AMD64Address dst, Register src) { 2092 if (src.getRegisterCategory().equals(AMD64.XMM)) { 2093 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2094 simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2095 emitByte(0xD6); 2096 emitOperandHelper(src, dst, 0); 2097 } else { 2098 // gpr version of movq 2099 prefixq(dst, src); 2100 emitByte(0x89); 2101 emitOperandHelper(src, dst, 0); 2102 } 2103 } 2104 2105 public final void movsbl(Register dst, AMD64Address src) { 2106 prefix(src, dst); 2107 emitByte(0x0F); 2108 emitByte(0xBE); 2109 emitOperandHelper(dst, src, 0); 2110 } 2111 2112 public final void movsbl(Register dst, Register src) { 2113 int encode = prefixAndEncode(dst.encoding, false, src.encoding, true); 2114 emitByte(0x0F); 2115 emitByte(0xBE); 2116 emitByte(0xC0 | encode); 2117 } 2118 2119 public final void movsbq(Register dst, AMD64Address src) { 2120 prefixq(src, dst); 2121 emitByte(0x0F); 2122 emitByte(0xBE); 2123 emitOperandHelper(dst, src, 0); 2124 } 2125 2126 public final void movsbq(Register dst, Register src) { 2127 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2128 emitByte(0x0F); 2129 emitByte(0xBE); 2130 emitByte(0xC0 | encode); 2131 } 2132 2133 public final void movsd(Register dst, Register src) { 2134 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2135 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2136 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2137 emitByte(0x10); 2138 emitByte(0xC0 | encode); 2139 } 2140 2141 public final void movsd(Register dst, AMD64Address src) { 2142 assert dst.getRegisterCategory().equals(AMD64.XMM); 2143 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2144 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2145 emitByte(0x10); 2146 emitOperandHelper(dst, src, 0); 2147 } 2148 2149 public final void movsd(AMD64Address dst, Register src) { 2150 assert src.getRegisterCategory().equals(AMD64.XMM); 2151 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2152 simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2153 emitByte(0x11); 2154 emitOperandHelper(src, dst, 0); 2155 } 2156 2157 public final void movss(Register dst, Register src) { 2158 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2159 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2160 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 2161 emitByte(0x10); 2162 emitByte(0xC0 | encode); 2163 } 2164 2165 public final void movss(Register dst, AMD64Address src) { 2166 assert dst.getRegisterCategory().equals(AMD64.XMM); 2167 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2168 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 2169 emitByte(0x10); 2170 emitOperandHelper(dst, src, 0); 2171 } 2172 2173 public final void movss(AMD64Address dst, Register src) { 2174 assert src.getRegisterCategory().equals(AMD64.XMM); 2175 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2176 simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 2177 emitByte(0x11); 2178 emitOperandHelper(src, dst, 0); 2179 } 2180 2181 public final void mulpd(Register dst, Register src) { 2182 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2183 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2184 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2185 emitByte(0x59); 2186 emitByte(0xC0 | encode); 2187 } 2188 2189 public final void mulpd(Register dst, AMD64Address src) { 2190 assert dst.getRegisterCategory().equals(AMD64.XMM); 2191 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2192 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2193 emitByte(0x59); 2194 emitOperandHelper(dst, src, 0); 2195 } 2196 2197 public final void mulsd(Register dst, Register src) { 2198 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2199 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2200 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2201 emitByte(0x59); 2202 emitByte(0xC0 | encode); 2203 } 2204 2205 public final void mulsd(Register dst, AMD64Address src) { 2206 assert dst.getRegisterCategory().equals(AMD64.XMM); 2207 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2208 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2209 emitByte(0x59); 2210 emitOperandHelper(dst, src, 0); 2211 } 2212 2213 public final void mulss(Register dst, Register src) { 2214 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2215 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2216 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 2217 emitByte(0x59); 2218 emitByte(0xC0 | encode); 2219 } 2220 2221 public final void movswl(Register dst, AMD64Address src) { 2222 prefix(src, dst); 2223 emitByte(0x0F); 2224 emitByte(0xBF); 2225 emitOperandHelper(dst, src, 0); 2226 } 2227 2228 public final void movw(AMD64Address dst, int imm16) { 2229 emitByte(0x66); // switch to 16-bit mode 2230 prefix(dst); 2231 emitByte(0xC7); 2232 emitOperandHelper(0, dst, 2); 2233 emitShort(imm16); 2234 } 2235 2236 public final void movw(AMD64Address dst, Register src) { 2237 emitByte(0x66); 2238 prefix(dst, src); 2239 emitByte(0x89); 2240 emitOperandHelper(src, dst, 0); 2241 } 2242 2243 public final void movzbl(Register dst, AMD64Address src) { 2244 prefix(src, dst); 2245 emitByte(0x0F); 2246 emitByte(0xB6); 2247 emitOperandHelper(dst, src, 0); 2248 } 2249 2250 public final void movzbl(Register dst, Register src) { 2251 AMD64RMOp.MOVZXB.emit(this, OperandSize.DWORD, dst, src); 2252 } 2253 2254 public final void movzbq(Register dst, Register src) { 2255 AMD64RMOp.MOVZXB.emit(this, OperandSize.QWORD, dst, src); 2256 } 2257 2258 public final void movzwl(Register dst, AMD64Address src) { 2259 prefix(src, dst); 2260 emitByte(0x0F); 2261 emitByte(0xB7); 2262 emitOperandHelper(dst, src, 0); 2263 } 2264 2265 public final void negl(Register dst) { 2266 NEG.emit(this, DWORD, dst); 2267 } 2268 2269 public final void notl(Register dst) { 2270 NOT.emit(this, DWORD, dst); 2271 } 2272 2273 @Override 2274 public final void ensureUniquePC() { 2275 nop(); 2276 } 2277 2278 public final void nop() { 2279 nop(1); 2280 } 2281 2282 public void nop(int count) { 2283 int i = count; 2284 if (UseNormalNop) { 2285 assert i > 0 : " "; 2286 // The fancy nops aren't currently recognized by debuggers making it a 2287 // pain to disassemble code while debugging. If assert are on clearly 2288 // speed is not an issue so simply use the single byte traditional nop 2289 // to do alignment. 2290 2291 for (; i > 0; i--) { 2292 emitByte(0x90); 2293 } 2294 return; 2295 } 2296 2297 if (UseAddressNop) { 2298 // 2299 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD. 2300 // 1: 0x90 2301 // 2: 0x66 0x90 2302 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2303 // 4: 0x0F 0x1F 0x40 0x00 2304 // 5: 0x0F 0x1F 0x44 0x00 0x00 2305 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2306 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2307 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2308 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2309 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2310 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2311 2312 // The rest coding is AMD specific - use consecutive Address nops 2313 2314 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2315 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2316 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2317 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2318 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2319 // Size prefixes (0x66) are added for larger sizes 2320 2321 while (i >= 22) { 2322 i -= 11; 2323 emitByte(0x66); // size prefix 2324 emitByte(0x66); // size prefix 2325 emitByte(0x66); // size prefix 2326 addrNop8(); 2327 } 2328 // Generate first nop for size between 21-12 2329 switch (i) { 2330 case 21: 2331 i -= 11; 2332 emitByte(0x66); // size prefix 2333 emitByte(0x66); // size prefix 2334 emitByte(0x66); // size prefix 2335 addrNop8(); 2336 break; 2337 case 20: 2338 case 19: 2339 i -= 10; 2340 emitByte(0x66); // size prefix 2341 emitByte(0x66); // size prefix 2342 addrNop8(); 2343 break; 2344 case 18: 2345 case 17: 2346 i -= 9; 2347 emitByte(0x66); // size prefix 2348 addrNop8(); 2349 break; 2350 case 16: 2351 case 15: 2352 i -= 8; 2353 addrNop8(); 2354 break; 2355 case 14: 2356 case 13: 2357 i -= 7; 2358 addrNop7(); 2359 break; 2360 case 12: 2361 i -= 6; 2362 emitByte(0x66); // size prefix 2363 addrNop5(); 2364 break; 2365 default: 2366 assert i < 12; 2367 } 2368 2369 // Generate second nop for size between 11-1 2370 switch (i) { 2371 case 11: 2372 emitByte(0x66); // size prefix 2373 emitByte(0x66); // size prefix 2374 emitByte(0x66); // size prefix 2375 addrNop8(); 2376 break; 2377 case 10: 2378 emitByte(0x66); // size prefix 2379 emitByte(0x66); // size prefix 2380 addrNop8(); 2381 break; 2382 case 9: 2383 emitByte(0x66); // size prefix 2384 addrNop8(); 2385 break; 2386 case 8: 2387 addrNop8(); 2388 break; 2389 case 7: 2390 addrNop7(); 2391 break; 2392 case 6: 2393 emitByte(0x66); // size prefix 2394 addrNop5(); 2395 break; 2396 case 5: 2397 addrNop5(); 2398 break; 2399 case 4: 2400 addrNop4(); 2401 break; 2402 case 3: 2403 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2404 emitByte(0x66); // size prefix 2405 emitByte(0x66); // size prefix 2406 emitByte(0x90); // nop 2407 break; 2408 case 2: 2409 emitByte(0x66); // size prefix 2410 emitByte(0x90); // nop 2411 break; 2412 case 1: 2413 emitByte(0x90); // nop 2414 break; 2415 default: 2416 assert i == 0; 2417 } 2418 return; 2419 } 2420 2421 // Using nops with size prefixes "0x66 0x90". 2422 // From AMD Optimization Guide: 2423 // 1: 0x90 2424 // 2: 0x66 0x90 2425 // 3: 0x66 0x66 0x90 2426 // 4: 0x66 0x66 0x66 0x90 2427 // 5: 0x66 0x66 0x90 0x66 0x90 2428 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2429 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2430 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2431 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2432 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2433 // 2434 while (i > 12) { 2435 i -= 4; 2436 emitByte(0x66); // size prefix 2437 emitByte(0x66); 2438 emitByte(0x66); 2439 emitByte(0x90); // nop 2440 } 2441 // 1 - 12 nops 2442 if (i > 8) { 2443 if (i > 9) { 2444 i -= 1; 2445 emitByte(0x66); 2446 } 2447 i -= 3; 2448 emitByte(0x66); 2449 emitByte(0x66); 2450 emitByte(0x90); 2451 } 2452 // 1 - 8 nops 2453 if (i > 4) { 2454 if (i > 6) { 2455 i -= 1; 2456 emitByte(0x66); 2457 } 2458 i -= 3; 2459 emitByte(0x66); 2460 emitByte(0x66); 2461 emitByte(0x90); 2462 } 2463 switch (i) { 2464 case 4: 2465 emitByte(0x66); 2466 emitByte(0x66); 2467 emitByte(0x66); 2468 emitByte(0x90); 2469 break; 2470 case 3: 2471 emitByte(0x66); 2472 emitByte(0x66); 2473 emitByte(0x90); 2474 break; 2475 case 2: 2476 emitByte(0x66); 2477 emitByte(0x90); 2478 break; 2479 case 1: 2480 emitByte(0x90); 2481 break; 2482 default: 2483 assert i == 0; 2484 } 2485 } 2486 2487 public final void orl(Register dst, Register src) { 2488 OR.rmOp.emit(this, DWORD, dst, src); 2489 } 2490 2491 public final void orl(Register dst, int imm32) { 2492 OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2493 } 2494 2495 public final void pop(Register dst) { 2496 int encode = prefixAndEncode(dst.encoding); 2497 emitByte(0x58 | encode); 2498 } 2499 2500 public void popfq() { 2501 emitByte(0x9D); 2502 } 2503 2504 public final void ptest(Register dst, Register src) { 2505 assert supports(CPUFeature.SSE4_1); 2506 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2507 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2508 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes); 2509 emitByte(0x17); 2510 emitByte(0xC0 | encode); 2511 } 2512 2513 public final void vptest(Register dst, Register src) { 2514 assert supports(CPUFeature.AVX); 2515 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2516 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2517 int encode = vexPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes); 2518 emitByte(0x17); 2519 emitByte(0xC0 | encode); 2520 } 2521 2522 void pcmpestri(Register dst, AMD64Address src, int imm8) { 2523 assert supports(CPUFeature.SSE4_2); 2524 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2525 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_3A, attributes); 2526 emitByte(0x61); 2527 emitOperandHelper(dst, src, 0); 2528 emitByte(imm8); 2529 } 2530 2531 void pcmpestri(Register dst, Register src, int imm8) { 2532 assert supports(CPUFeature.SSE4_2); 2533 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2534 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_3A, attributes); 2535 emitByte(0x61); 2536 emitByte(0xC0 | encode); 2537 emitByte(imm8); 2538 } 2539 2540 public final void push(Register src) { 2541 int encode = prefixAndEncode(src.encoding); 2542 emitByte(0x50 | encode); 2543 } 2544 2545 public void pushfq() { 2546 emitByte(0x9c); 2547 } 2548 2549 public final void paddd(Register dst, Register src) { 2550 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2551 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2552 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2553 emitByte(0xFE); 2554 emitByte(0xC0 | encode); 2555 } 2556 2557 public final void paddq(Register dst, Register src) { 2558 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2559 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2560 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2561 emitByte(0xD4); 2562 emitByte(0xC0 | encode); 2563 } 2564 2565 public final void pextrw(Register dst, Register src, int imm8) { 2566 assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM); 2567 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2568 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2569 emitByte(0xC5); 2570 emitByte(0xC0 | encode); 2571 emitByte(imm8); 2572 } 2573 2574 public final void pinsrw(Register dst, Register src, int imm8) { 2575 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU); 2576 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2577 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2578 emitByte(0xC4); 2579 emitByte(0xC0 | encode); 2580 emitByte(imm8); 2581 } 2582 2583 public final void por(Register dst, Register src) { 2584 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2585 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2586 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2587 emitByte(0xEB); 2588 emitByte(0xC0 | encode); 2589 } 2590 2591 public final void pand(Register dst, Register src) { 2592 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2593 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2594 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2595 emitByte(0xDB); 2596 emitByte(0xC0 | encode); 2597 } 2598 2599 public final void pxor(Register dst, Register src) { 2600 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2601 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2602 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2603 emitByte(0xEF); 2604 emitByte(0xC0 | encode); 2605 } 2606 2607 public final void vpxor(Register dst, Register nds, Register src) { 2608 assert supports(CPUFeature.AVX); 2609 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2610 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2611 int encode = vexPrefixAndEncode(dst, nds, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2612 emitByte(0xEF); 2613 emitByte(0xC0 | encode); 2614 } 2615 2616 public final void pslld(Register dst, int imm8) { 2617 assert isUByte(imm8) : "invalid value"; 2618 assert dst.getRegisterCategory().equals(AMD64.XMM); 2619 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2620 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 2621 int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2622 emitByte(0x72); 2623 emitByte(0xC0 | encode); 2624 emitByte(imm8 & 0xFF); 2625 } 2626 2627 public final void psllq(Register dst, Register shift) { 2628 assert dst.getRegisterCategory().equals(AMD64.XMM) && shift.getRegisterCategory().equals(AMD64.XMM); 2629 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2630 int encode = simdPrefixAndEncode(dst, dst, shift, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2631 emitByte(0xF3); 2632 emitByte(0xC0 | encode); 2633 } 2634 2635 public final void psllq(Register dst, int imm8) { 2636 assert isUByte(imm8) : "invalid value"; 2637 assert dst.getRegisterCategory().equals(AMD64.XMM); 2638 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2639 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 2640 int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2641 emitByte(0x73); 2642 emitByte(0xC0 | encode); 2643 emitByte(imm8); 2644 } 2645 2646 public final void psrad(Register dst, int imm8) { 2647 assert isUByte(imm8) : "invalid value"; 2648 assert dst.getRegisterCategory().equals(AMD64.XMM); 2649 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2650 // XMM4 is for /2 encoding: 66 0F 72 /4 ib 2651 int encode = simdPrefixAndEncode(AMD64.xmm4, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2652 emitByte(0x72); 2653 emitByte(0xC0 | encode); 2654 emitByte(imm8); 2655 } 2656 2657 public final void psrld(Register dst, int imm8) { 2658 assert isUByte(imm8) : "invalid value"; 2659 assert dst.getRegisterCategory().equals(AMD64.XMM); 2660 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2661 // XMM2 is for /2 encoding: 66 0F 72 /2 ib 2662 int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2663 emitByte(0x72); 2664 emitByte(0xC0 | encode); 2665 emitByte(imm8); 2666 } 2667 2668 public final void psrlq(Register dst, int imm8) { 2669 assert isUByte(imm8) : "invalid value"; 2670 assert dst.getRegisterCategory().equals(AMD64.XMM); 2671 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2672 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 2673 int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2674 emitByte(0x73); 2675 emitByte(0xC0 | encode); 2676 emitByte(imm8); 2677 } 2678 2679 public final void psrldq(Register dst, int imm8) { 2680 assert isUByte(imm8) : "invalid value"; 2681 assert dst.getRegisterCategory().equals(AMD64.XMM); 2682 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2683 int encode = simdPrefixAndEncode(AMD64.xmm3, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2684 emitByte(0x73); 2685 emitByte(0xC0 | encode); 2686 emitByte(imm8); 2687 } 2688 2689 public final void pshufd(Register dst, Register src, int imm8) { 2690 assert isUByte(imm8) : "invalid value"; 2691 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2692 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2693 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2694 emitByte(0x70); 2695 emitByte(0xC0 | encode); 2696 emitByte(imm8); 2697 } 2698 2699 public final void psubd(Register dst, Register src) { 2700 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2701 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2702 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2703 emitByte(0xFA); 2704 emitByte(0xC0 | encode); 2705 } 2706 2707 public final void rcpps(Register dst, Register src) { 2708 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2709 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ true, /* noMaskReg */ false, /* usesVl */ false, target); 2710 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes); 2711 emitByte(0x53); 2712 emitByte(0xC0 | encode); 2713 } 2714 2715 public final void ret(int imm16) { 2716 if (imm16 == 0) { 2717 emitByte(0xC3); 2718 } else { 2719 emitByte(0xC2); 2720 emitShort(imm16); 2721 } 2722 } 2723 2724 public final void sarl(Register dst, int imm8) { 2725 int encode = prefixAndEncode(dst.encoding); 2726 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2727 if (imm8 == 1) { 2728 emitByte(0xD1); 2729 emitByte(0xF8 | encode); 2730 } else { 2731 emitByte(0xC1); 2732 emitByte(0xF8 | encode); 2733 emitByte(imm8); 2734 } 2735 } 2736 2737 public final void shll(Register dst, int imm8) { 2738 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2739 int encode = prefixAndEncode(dst.encoding); 2740 if (imm8 == 1) { 2741 emitByte(0xD1); 2742 emitByte(0xE0 | encode); 2743 } else { 2744 emitByte(0xC1); 2745 emitByte(0xE0 | encode); 2746 emitByte(imm8); 2747 } 2748 } 2749 2750 public final void shll(Register dst) { 2751 int encode = prefixAndEncode(dst.encoding); 2752 emitByte(0xD3); 2753 emitByte(0xE0 | encode); 2754 } 2755 2756 public final void shrl(Register dst, int imm8) { 2757 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2758 int encode = prefixAndEncode(dst.encoding); 2759 emitByte(0xC1); 2760 emitByte(0xE8 | encode); 2761 emitByte(imm8); 2762 } 2763 2764 public final void shrl(Register dst) { 2765 int encode = prefixAndEncode(dst.encoding); 2766 emitByte(0xD3); 2767 emitByte(0xE8 | encode); 2768 } 2769 2770 public final void subl(AMD64Address dst, int imm32) { 2771 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2772 } 2773 2774 public final void subl(Register dst, int imm32) { 2775 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2776 } 2777 2778 public final void subl(Register dst, Register src) { 2779 SUB.rmOp.emit(this, DWORD, dst, src); 2780 } 2781 2782 public final void subpd(Register dst, Register src) { 2783 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2784 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2785 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2786 emitByte(0x5C); 2787 emitByte(0xC0 | encode); 2788 } 2789 2790 public final void subsd(Register dst, Register src) { 2791 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2792 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2793 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2794 emitByte(0x5C); 2795 emitByte(0xC0 | encode); 2796 } 2797 2798 public final void subsd(Register dst, AMD64Address src) { 2799 assert dst.getRegisterCategory().equals(AMD64.XMM); 2800 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2801 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2802 emitByte(0x5C); 2803 emitOperandHelper(dst, src, 0); 2804 } 2805 2806 public final void testl(Register dst, int imm32) { 2807 // not using emitArith because test 2808 // doesn't support sign-extension of 2809 // 8bit operands 2810 int encode = dst.encoding; 2811 if (encode == 0) { 2812 emitByte(0xA9); 2813 } else { 2814 encode = prefixAndEncode(encode); 2815 emitByte(0xF7); 2816 emitByte(0xC0 | encode); 2817 } 2818 emitInt(imm32); 2819 } 2820 2821 public final void testl(Register dst, Register src) { 2822 int encode = prefixAndEncode(dst.encoding, src.encoding); 2823 emitByte(0x85); 2824 emitByte(0xC0 | encode); 2825 } 2826 2827 public final void testl(Register dst, AMD64Address src) { 2828 prefix(src, dst); 2829 emitByte(0x85); 2830 emitOperandHelper(dst, src, 0); 2831 } 2832 2833 public final void unpckhpd(Register dst, Register src) { 2834 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2835 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2836 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2837 emitByte(0x15); 2838 emitByte(0xC0 | encode); 2839 } 2840 2841 public final void unpcklpd(Register dst, Register src) { 2842 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2843 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2844 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2845 emitByte(0x14); 2846 emitByte(0xC0 | encode); 2847 } 2848 2849 public final void xorl(Register dst, Register src) { 2850 XOR.rmOp.emit(this, DWORD, dst, src); 2851 } 2852 2853 public final void xorpd(Register dst, Register src) { 2854 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2855 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2856 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2857 emitByte(0x57); 2858 emitByte(0xC0 | encode); 2859 } 2860 2861 public final void xorps(Register dst, Register src) { 2862 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2863 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2864 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes); 2865 emitByte(0x57); 2866 emitByte(0xC0 | encode); 2867 } 2868 2869 protected final void decl(Register dst) { 2870 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 2871 int encode = prefixAndEncode(dst.encoding); 2872 emitByte(0xFF); 2873 emitByte(0xC8 | encode); 2874 } 2875 2876 protected final void incl(Register dst) { 2877 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 2878 int encode = prefixAndEncode(dst.encoding); 2879 emitByte(0xFF); 2880 emitByte(0xC0 | encode); 2881 } 2882 2883 private int prefixAndEncode(int regEnc) { 2884 return prefixAndEncode(regEnc, false); 2885 } 2886 2887 private int prefixAndEncode(int regEnc, boolean byteinst) { 2888 if (regEnc >= 8) { 2889 emitByte(Prefix.REXB); 2890 return regEnc - 8; 2891 } else if (byteinst && regEnc >= 4) { 2892 emitByte(Prefix.REX); 2893 } 2894 return regEnc; 2895 } 2896 2897 private int prefixqAndEncode(int regEnc) { 2898 if (regEnc < 8) { 2899 emitByte(Prefix.REXW); 2900 return regEnc; 2901 } else { 2902 emitByte(Prefix.REXWB); 2903 return regEnc - 8; 2904 } 2905 } 2906 2907 private int prefixAndEncode(int dstEnc, int srcEnc) { 2908 return prefixAndEncode(dstEnc, false, srcEnc, false); 2909 } 2910 2911 private int prefixAndEncode(int dstEncoding, boolean dstIsByte, int srcEncoding, boolean srcIsByte) { 2912 int srcEnc = srcEncoding; 2913 int dstEnc = dstEncoding; 2914 if (dstEnc < 8) { 2915 if (srcEnc >= 8) { 2916 emitByte(Prefix.REXB); 2917 srcEnc -= 8; 2918 } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) { 2919 emitByte(Prefix.REX); 2920 } 2921 } else { 2922 if (srcEnc < 8) { 2923 emitByte(Prefix.REXR); 2924 } else { 2925 emitByte(Prefix.REXRB); 2926 srcEnc -= 8; 2927 } 2928 dstEnc -= 8; 2929 } 2930 return dstEnc << 3 | srcEnc; 2931 } 2932 2933 /** 2934 * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand 2935 * prefix. If the given operands exceed 3 bits, the 4th bit is encoded in the prefix. 2936 * 2937 * @param regEncoding the encoding of the register part of the ModRM-Byte 2938 * @param rmEncoding the encoding of the r/m part of the ModRM-Byte 2939 * @return the lower 6 bits of the ModRM-Byte that should be emitted 2940 */ 2941 private int prefixqAndEncode(int regEncoding, int rmEncoding) { 2942 int rmEnc = rmEncoding; 2943 int regEnc = regEncoding; 2944 if (regEnc < 8) { 2945 if (rmEnc < 8) { 2946 emitByte(Prefix.REXW); 2947 } else { 2948 emitByte(Prefix.REXWB); 2949 rmEnc -= 8; 2950 } 2951 } else { 2952 if (rmEnc < 8) { 2953 emitByte(Prefix.REXWR); 2954 } else { 2955 emitByte(Prefix.REXWRB); 2956 rmEnc -= 8; 2957 } 2958 regEnc -= 8; 2959 } 2960 return regEnc << 3 | rmEnc; 2961 } 2962 2963 private void vexPrefix(int rxb, int ndsEncoding, int pre, int opc, AMD64InstructionAttr attributes) { 2964 int vectorLen = attributes.getVectorLen(); 2965 boolean vexW = attributes.isRexVexW(); 2966 boolean isXorB = ((rxb & 0x3) > 0); 2967 if (isXorB || vexW || (opc == VexOpcode.VEX_OPCODE_0F_38) || (opc == VexOpcode.VEX_OPCODE_0F_3A)) { 2968 emitByte(Prefix.VEX_3BYTES); 2969 2970 int byte1 = (rxb << 5); 2971 byte1 = ((~byte1) & 0xE0) | opc; 2972 emitByte(byte1); 2973 2974 int byte2 = ((~ndsEncoding) & 0xf) << 3; 2975 byte2 |= (vexW ? VexPrefix.VEX_W : 0) | ((vectorLen > 0) ? 4 : 0) | pre; 2976 emitByte(byte2); 2977 } else { 2978 emitByte(Prefix.VEX_2BYTES); 2979 2980 int byte1 = ((rxb & 0x4) > 0) ? VexPrefix.VEX_R : 0; 2981 byte1 = (~byte1) & 0x80; 2982 byte1 |= ((~ndsEncoding) & 0xf) << 3; 2983 byte1 |= ((vectorLen > 0) ? 4 : 0) | pre; 2984 emitByte(byte1); 2985 } 2986 } 2987 2988 private void vexPrefix(AMD64Address adr, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) { 2989 int rxb = getRXB(src, adr); 2990 int ndsEncoding = nds.isValid() ? nds.encoding : 0; 2991 vexPrefix(rxb, ndsEncoding, pre, opc, attributes); 2992 setCurAttributes(attributes); 2993 } 2994 2995 private int vexPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) { 2996 int rxb = getRXB(dst, src); 2997 int ndsEncoding = nds.isValid() ? nds.encoding : 0; 2998 vexPrefix(rxb, ndsEncoding, pre, opc, attributes); 2999 // return modrm byte components for operands 3000 return (((dst.encoding & 7) << 3) | (src.encoding & 7)); 3001 } 3002 3003 private void simdPrefix(Register xreg, Register nds, AMD64Address adr, int pre, int opc, AMD64InstructionAttr attributes) { 3004 if (supports(CPUFeature.AVX)) { 3005 vexPrefix(adr, nds, xreg, pre, opc, attributes); 3006 } else { 3007 switch (pre) { 3008 case VexSimdPrefix.VEX_SIMD_66: 3009 emitByte(0x66); 3010 break; 3011 case VexSimdPrefix.VEX_SIMD_F2: 3012 emitByte(0xF2); 3013 break; 3014 case VexSimdPrefix.VEX_SIMD_F3: 3015 emitByte(0xF3); 3016 break; 3017 } 3018 if (attributes.isRexVexW()) { 3019 prefixq(adr, xreg); 3020 } else { 3021 prefix(adr, xreg); 3022 } 3023 switch (opc) { 3024 case VexOpcode.VEX_OPCODE_0F: 3025 emitByte(0x0F); 3026 break; 3027 case VexOpcode.VEX_OPCODE_0F_38: 3028 emitByte(0x0F); 3029 emitByte(0x38); 3030 break; 3031 case VexOpcode.VEX_OPCODE_0F_3A: 3032 emitByte(0x0F); 3033 emitByte(0x3A); 3034 break; 3035 } 3036 } 3037 } 3038 3039 private int simdPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) { 3040 if (supports(CPUFeature.AVX)) { 3041 return vexPrefixAndEncode(dst, nds, src, pre, opc, attributes); 3042 } else { 3043 switch (pre) { 3044 case VexSimdPrefix.VEX_SIMD_66: 3045 emitByte(0x66); 3046 break; 3047 case VexSimdPrefix.VEX_SIMD_F2: 3048 emitByte(0xF2); 3049 break; 3050 case VexSimdPrefix.VEX_SIMD_F3: 3051 emitByte(0xF3); 3052 break; 3053 } 3054 int encode; 3055 int dstEncoding = dst.encoding; 3056 int srcEncoding = src.encoding; 3057 if (attributes.isRexVexW()) { 3058 encode = prefixqAndEncode(dstEncoding, srcEncoding); 3059 } else { 3060 encode = prefixAndEncode(dstEncoding, srcEncoding); 3061 } 3062 switch (opc) { 3063 case VexOpcode.VEX_OPCODE_0F: 3064 emitByte(0x0F); 3065 break; 3066 case VexOpcode.VEX_OPCODE_0F_38: 3067 emitByte(0x0F); 3068 emitByte(0x38); 3069 break; 3070 case VexOpcode.VEX_OPCODE_0F_3A: 3071 emitByte(0x0F); 3072 emitByte(0x3A); 3073 break; 3074 } 3075 return encode; 3076 } 3077 } 3078 3079 private static boolean needsRex(Register reg) { 3080 return reg.encoding >= MinEncodingNeedsRex; 3081 } 3082 3083 private void prefix(AMD64Address adr) { 3084 if (needsRex(adr.getBase())) { 3085 if (needsRex(adr.getIndex())) { 3086 emitByte(Prefix.REXXB); 3087 } else { 3088 emitByte(Prefix.REXB); 3089 } 3090 } else { 3091 if (needsRex(adr.getIndex())) { 3092 emitByte(Prefix.REXX); 3093 } 3094 } 3095 } 3096 3097 private void prefixq(AMD64Address adr) { 3098 if (needsRex(adr.getBase())) { 3099 if (needsRex(adr.getIndex())) { 3100 emitByte(Prefix.REXWXB); 3101 } else { 3102 emitByte(Prefix.REXWB); 3103 } 3104 } else { 3105 if (needsRex(adr.getIndex())) { 3106 emitByte(Prefix.REXWX); 3107 } else { 3108 emitByte(Prefix.REXW); 3109 } 3110 } 3111 } 3112 3113 private void prefix(AMD64Address adr, Register reg) { 3114 prefix(adr, reg, false); 3115 } 3116 3117 private void prefix(AMD64Address adr, Register reg, boolean byteinst) { 3118 if (reg.encoding < 8) { 3119 if (needsRex(adr.getBase())) { 3120 if (needsRex(adr.getIndex())) { 3121 emitByte(Prefix.REXXB); 3122 } else { 3123 emitByte(Prefix.REXB); 3124 } 3125 } else { 3126 if (needsRex(adr.getIndex())) { 3127 emitByte(Prefix.REXX); 3128 } else if (byteinst && reg.encoding >= 4) { 3129 emitByte(Prefix.REX); 3130 } 3131 } 3132 } else { 3133 if (needsRex(adr.getBase())) { 3134 if (needsRex(adr.getIndex())) { 3135 emitByte(Prefix.REXRXB); 3136 } else { 3137 emitByte(Prefix.REXRB); 3138 } 3139 } else { 3140 if (needsRex(adr.getIndex())) { 3141 emitByte(Prefix.REXRX); 3142 } else { 3143 emitByte(Prefix.REXR); 3144 } 3145 } 3146 } 3147 } 3148 3149 private void prefixq(AMD64Address adr, Register src) { 3150 if (src.encoding < 8) { 3151 if (needsRex(adr.getBase())) { 3152 if (needsRex(adr.getIndex())) { 3153 emitByte(Prefix.REXWXB); 3154 } else { 3155 emitByte(Prefix.REXWB); 3156 } 3157 } else { 3158 if (needsRex(adr.getIndex())) { 3159 emitByte(Prefix.REXWX); 3160 } else { 3161 emitByte(Prefix.REXW); 3162 } 3163 } 3164 } else { 3165 if (needsRex(adr.getBase())) { 3166 if (needsRex(adr.getIndex())) { 3167 emitByte(Prefix.REXWRXB); 3168 } else { 3169 emitByte(Prefix.REXWRB); 3170 } 3171 } else { 3172 if (needsRex(adr.getIndex())) { 3173 emitByte(Prefix.REXWRX); 3174 } else { 3175 emitByte(Prefix.REXWR); 3176 } 3177 } 3178 } 3179 } 3180 3181 public final void addq(Register dst, int imm32) { 3182 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3183 } 3184 3185 public final void addq(AMD64Address dst, int imm32) { 3186 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3187 } 3188 3189 public final void addq(Register dst, Register src) { 3190 ADD.rmOp.emit(this, QWORD, dst, src); 3191 } 3192 3193 public final void addq(AMD64Address dst, Register src) { 3194 ADD.mrOp.emit(this, QWORD, dst, src); 3195 } 3196 3197 public final void andq(Register dst, int imm32) { 3198 AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3199 } 3200 3201 public final void bsrq(Register dst, Register src) { 3202 int encode = prefixqAndEncode(dst.encoding(), src.encoding()); 3203 emitByte(0x0F); 3204 emitByte(0xBD); 3205 emitByte(0xC0 | encode); 3206 } 3207 3208 public final void bswapq(Register reg) { 3209 int encode = prefixqAndEncode(reg.encoding); 3210 emitByte(0x0F); 3211 emitByte(0xC8 | encode); 3212 } 3213 3214 public final void cdqq() { 3215 emitByte(Prefix.REXW); 3216 emitByte(0x99); 3217 } 3218 3219 public final void cmovq(ConditionFlag cc, Register dst, Register src) { 3220 int encode = prefixqAndEncode(dst.encoding, src.encoding); 3221 emitByte(0x0F); 3222 emitByte(0x40 | cc.getValue()); 3223 emitByte(0xC0 | encode); 3224 } 3225 3226 public final void setb(ConditionFlag cc, Register dst) { 3227 int encode = prefixAndEncode(dst.encoding, true); 3228 emitByte(0x0F); 3229 emitByte(0x90 | cc.getValue()); 3230 emitByte(0xC0 | encode); 3231 } 3232 3233 public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) { 3234 prefixq(src, dst); 3235 emitByte(0x0F); 3236 emitByte(0x40 | cc.getValue()); 3237 emitOperandHelper(dst, src, 0); 3238 } 3239 3240 public final void cmpq(Register dst, int imm32) { 3241 CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3242 } 3243 3244 public final void cmpq(Register dst, Register src) { 3245 CMP.rmOp.emit(this, QWORD, dst, src); 3246 } 3247 3248 public final void cmpq(Register dst, AMD64Address src) { 3249 CMP.rmOp.emit(this, QWORD, dst, src); 3250 } 3251 3252 public final void cmpxchgq(Register reg, AMD64Address adr) { 3253 prefixq(adr, reg); 3254 emitByte(0x0F); 3255 emitByte(0xB1); 3256 emitOperandHelper(reg, adr, 0); 3257 } 3258 3259 public final void cvtdq2pd(Register dst, Register src) { 3260 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 3261 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3262 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 3263 emitByte(0xE6); 3264 emitByte(0xC0 | encode); 3265 } 3266 3267 public final void cvtsi2sdq(Register dst, Register src) { 3268 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU); 3269 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3270 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 3271 emitByte(0x2A); 3272 emitByte(0xC0 | encode); 3273 } 3274 3275 public final void cvttsd2siq(Register dst, Register src) { 3276 assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM); 3277 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3278 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 3279 emitByte(0x2C); 3280 emitByte(0xC0 | encode); 3281 } 3282 3283 public final void cvttpd2dq(Register dst, Register src) { 3284 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 3285 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3286 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3287 emitByte(0xE6); 3288 emitByte(0xC0 | encode); 3289 } 3290 3291 protected final void decq(Register dst) { 3292 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3293 int encode = prefixqAndEncode(dst.encoding); 3294 emitByte(0xFF); 3295 emitByte(0xC8 | encode); 3296 } 3297 3298 public final void decq(AMD64Address dst) { 3299 DEC.emit(this, QWORD, dst); 3300 } 3301 3302 public final void imulq(Register dst, Register src) { 3303 int encode = prefixqAndEncode(dst.encoding, src.encoding); 3304 emitByte(0x0F); 3305 emitByte(0xAF); 3306 emitByte(0xC0 | encode); 3307 } 3308 3309 public final void incq(Register dst) { 3310 // Don't use it directly. Use Macroincrementq() instead. 3311 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3312 int encode = prefixqAndEncode(dst.encoding); 3313 emitByte(0xFF); 3314 emitByte(0xC0 | encode); 3315 } 3316 3317 public final void incq(AMD64Address dst) { 3318 INC.emit(this, QWORD, dst); 3319 } 3320 3321 public final void movq(Register dst, long imm64) { 3322 int encode = prefixqAndEncode(dst.encoding); 3323 emitByte(0xB8 | encode); 3324 emitLong(imm64); 3325 } 3326 3327 public final void movslq(Register dst, int imm32) { 3328 int encode = prefixqAndEncode(dst.encoding); 3329 emitByte(0xC7); 3330 emitByte(0xC0 | encode); 3331 emitInt(imm32); 3332 } 3333 3334 public final void movdq(Register dst, AMD64Address src) { 3335 assert dst.getRegisterCategory().equals(AMD64.XMM); 3336 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3337 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3338 emitByte(0x6E); 3339 emitOperandHelper(dst, src, 0); 3340 } 3341 3342 public final void movdq(AMD64Address dst, Register src) { 3343 assert src.getRegisterCategory().equals(AMD64.XMM); 3344 // swap src/dst to get correct prefix 3345 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3346 simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3347 emitByte(0x7E); 3348 emitOperandHelper(src, dst, 0); 3349 } 3350 3351 public final void movdq(Register dst, Register src) { 3352 if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) { 3353 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3354 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3355 emitByte(0x6E); 3356 emitByte(0xC0 | encode); 3357 } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) { 3358 // swap src/dst to get correct prefix 3359 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3360 int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3361 emitByte(0x7E); 3362 emitByte(0xC0 | encode); 3363 } else { 3364 throw new InternalError("should not reach here"); 3365 } 3366 } 3367 3368 public final void movdl(Register dst, Register src) { 3369 if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) { 3370 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3371 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3372 emitByte(0x6E); 3373 emitByte(0xC0 | encode); 3374 } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) { 3375 // swap src/dst to get correct prefix 3376 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3377 int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3378 emitByte(0x7E); 3379 emitByte(0xC0 | encode); 3380 } else { 3381 throw new InternalError("should not reach here"); 3382 } 3383 } 3384 3385 public final void movdl(Register dst, AMD64Address src) { 3386 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3387 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3388 emitByte(0x6E); 3389 emitOperandHelper(dst, src, 0); 3390 } 3391 3392 public final void movddup(Register dst, Register src) { 3393 assert supports(CPUFeature.SSE3); 3394 assert dst.getRegisterCategory().equals(AMD64.XMM); 3395 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3396 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 3397 emitByte(0x12); 3398 emitByte(0xC0 | encode); 3399 } 3400 3401 public final void movdqu(Register dst, AMD64Address src) { 3402 assert dst.getRegisterCategory().equals(AMD64.XMM); 3403 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3404 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 3405 emitByte(0x6F); 3406 emitOperandHelper(dst, src, 0); 3407 } 3408 3409 public final void movdqu(Register dst, Register src) { 3410 assert dst.getRegisterCategory().equals(AMD64.XMM); 3411 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3412 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 3413 emitByte(0x6F); 3414 emitByte(0xC0 | encode); 3415 } 3416 3417 public final void vmovdqu(Register dst, AMD64Address src) { 3418 assert supports(CPUFeature.AVX); 3419 assert dst.getRegisterCategory().equals(AMD64.XMM); 3420 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3421 vexPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 3422 emitByte(0x6F); 3423 emitOperandHelper(dst, src, 0); 3424 } 3425 3426 public final void vzeroupper() { 3427 assert supports(CPUFeature.AVX); 3428 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3429 vexPrefixAndEncode(AMD64.xmm0, AMD64.xmm0, AMD64.xmm0, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes); 3430 emitByte(0x77); 3431 } 3432 3433 public final void movslq(AMD64Address dst, int imm32) { 3434 prefixq(dst); 3435 emitByte(0xC7); 3436 emitOperandHelper(0, dst, 4); 3437 emitInt(imm32); 3438 } 3439 3440 public final void movslq(Register dst, AMD64Address src) { 3441 prefixq(src, dst); 3442 emitByte(0x63); 3443 emitOperandHelper(dst, src, 0); 3444 } 3445 3446 public final void movslq(Register dst, Register src) { 3447 int encode = prefixqAndEncode(dst.encoding, src.encoding); 3448 emitByte(0x63); 3449 emitByte(0xC0 | encode); 3450 } 3451 3452 public final void negq(Register dst) { 3453 int encode = prefixqAndEncode(dst.encoding); 3454 emitByte(0xF7); 3455 emitByte(0xD8 | encode); 3456 } 3457 3458 public final void orq(Register dst, Register src) { 3459 OR.rmOp.emit(this, QWORD, dst, src); 3460 } 3461 3462 public final void shlq(Register dst, int imm8) { 3463 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3464 int encode = prefixqAndEncode(dst.encoding); 3465 if (imm8 == 1) { 3466 emitByte(0xD1); 3467 emitByte(0xE0 | encode); 3468 } else { 3469 emitByte(0xC1); 3470 emitByte(0xE0 | encode); 3471 emitByte(imm8); 3472 } 3473 } 3474 3475 public final void shlq(Register dst) { 3476 int encode = prefixqAndEncode(dst.encoding); 3477 emitByte(0xD3); 3478 emitByte(0xE0 | encode); 3479 } 3480 3481 public final void shrq(Register dst, int imm8) { 3482 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3483 int encode = prefixqAndEncode(dst.encoding); 3484 if (imm8 == 1) { 3485 emitByte(0xD1); 3486 emitByte(0xE8 | encode); 3487 } else { 3488 emitByte(0xC1); 3489 emitByte(0xE8 | encode); 3490 emitByte(imm8); 3491 } 3492 } 3493 3494 public final void shrq(Register dst) { 3495 int encode = prefixqAndEncode(dst.encoding); 3496 emitByte(0xD3); 3497 emitByte(0xE8 | encode); 3498 } 3499 3500 public final void sbbq(Register dst, Register src) { 3501 SBB.rmOp.emit(this, QWORD, dst, src); 3502 } 3503 3504 public final void subq(Register dst, int imm32) { 3505 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3506 } 3507 3508 public final void subq(AMD64Address dst, int imm32) { 3509 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3510 } 3511 3512 public final void subqWide(Register dst, int imm32) { 3513 // don't use the sign-extending version, forcing a 32-bit immediate 3514 SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32); 3515 } 3516 3517 public final void subq(Register dst, Register src) { 3518 SUB.rmOp.emit(this, QWORD, dst, src); 3519 } 3520 3521 public final void testq(Register dst, Register src) { 3522 int encode = prefixqAndEncode(dst.encoding, src.encoding); 3523 emitByte(0x85); 3524 emitByte(0xC0 | encode); 3525 } 3526 3527 public final void btrq(Register src, int imm8) { 3528 int encode = prefixqAndEncode(src.encoding); 3529 emitByte(0x0F); 3530 emitByte(0xBA); 3531 emitByte(0xF0 | encode); 3532 emitByte(imm8); 3533 } 3534 3535 public final void xaddl(AMD64Address dst, Register src) { 3536 prefix(dst, src); 3537 emitByte(0x0F); 3538 emitByte(0xC1); 3539 emitOperandHelper(src, dst, 0); 3540 } 3541 3542 public final void xaddq(AMD64Address dst, Register src) { 3543 prefixq(dst, src); 3544 emitByte(0x0F); 3545 emitByte(0xC1); 3546 emitOperandHelper(src, dst, 0); 3547 } 3548 3549 public final void xchgl(Register dst, AMD64Address src) { 3550 prefix(src, dst); 3551 emitByte(0x87); 3552 emitOperandHelper(dst, src, 0); 3553 } 3554 3555 public final void xchgq(Register dst, AMD64Address src) { 3556 prefixq(src, dst); 3557 emitByte(0x87); 3558 emitOperandHelper(dst, src, 0); 3559 } 3560 3561 public final void membar(int barriers) { 3562 if (target.isMP) { 3563 // We only have to handle StoreLoad 3564 if ((barriers & STORE_LOAD) != 0) { 3565 // All usable chips support "locked" instructions which suffice 3566 // as barriers, and are much faster than the alternative of 3567 // using cpuid instruction. We use here a locked add [rsp],0. 3568 // This is conveniently otherwise a no-op except for blowing 3569 // flags. 3570 // Any change to this code may need to revisit other places in 3571 // the code where this idiom is used, in particular the 3572 // orderAccess code. 3573 lock(); 3574 addl(new AMD64Address(rsp, 0), 0); // Assert the lock# signal here 3575 } 3576 } 3577 } 3578 3579 @Override 3580 protected final void patchJumpTarget(int branch, int branchTarget) { 3581 int op = getByte(branch); 3582 assert op == 0xE8 // call 3583 || 3584 op == 0x00 // jump table entry 3585 || op == 0xE9 // jmp 3586 || op == 0xEB // short jmp 3587 || (op & 0xF0) == 0x70 // short jcc 3588 || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc 3589 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op; 3590 3591 if (op == 0x00) { 3592 int offsetToJumpTableBase = getShort(branch + 1); 3593 int jumpTableBase = branch - offsetToJumpTableBase; 3594 int imm32 = branchTarget - jumpTableBase; 3595 emitInt(imm32, branch); 3596 } else if (op == 0xEB || (op & 0xF0) == 0x70) { 3597 3598 // short offset operators (jmp and jcc) 3599 final int imm8 = branchTarget - (branch + 2); 3600 /* 3601 * Since a wrongly patched short branch can potentially lead to working but really bad 3602 * behaving code we should always fail with an exception instead of having an assert. 3603 */ 3604 if (!NumUtil.isByte(imm8)) { 3605 throw new InternalError("branch displacement out of range: " + imm8); 3606 } 3607 emitByte(imm8, branch + 1); 3608 3609 } else { 3610 3611 int off = 1; 3612 if (op == 0x0F) { 3613 off = 2; 3614 } 3615 3616 int imm32 = branchTarget - (branch + 4 + off); 3617 emitInt(imm32, branch + off); 3618 } 3619 } 3620 3621 public void nullCheck(AMD64Address address) { 3622 testl(AMD64.rax, address); 3623 } 3624 3625 @Override 3626 public void align(int modulus) { 3627 if (position() % modulus != 0) { 3628 nop(modulus - (position() % modulus)); 3629 } 3630 } 3631 3632 /** 3633 * Emits a direct call instruction. Note that the actual call target is not specified, because 3634 * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is 3635 * responsible to add the call address to the appropriate patching tables. 3636 */ 3637 public final void call() { 3638 if (codePatchingAnnotationConsumer != null) { 3639 int pos = position(); 3640 codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(pos, pos + 1, 4, pos + 5)); 3641 } 3642 emitByte(0xE8); 3643 emitInt(0); 3644 } 3645 3646 public final void call(Register src) { 3647 int encode = prefixAndEncode(src.encoding); 3648 emitByte(0xFF); 3649 emitByte(0xD0 | encode); 3650 } 3651 3652 public final void int3() { 3653 emitByte(0xCC); 3654 } 3655 3656 public final void pause() { 3657 emitByte(0xF3); 3658 emitByte(0x90); 3659 } 3660 3661 private void emitx87(int b1, int b2, int i) { 3662 assert 0 <= i && i < 8 : "illegal stack offset"; 3663 emitByte(b1); 3664 emitByte(b2 + i); 3665 } 3666 3667 public final void fldd(AMD64Address src) { 3668 emitByte(0xDD); 3669 emitOperandHelper(0, src, 0); 3670 } 3671 3672 public final void flds(AMD64Address src) { 3673 emitByte(0xD9); 3674 emitOperandHelper(0, src, 0); 3675 } 3676 3677 public final void fldln2() { 3678 emitByte(0xD9); 3679 emitByte(0xED); 3680 } 3681 3682 public final void fldlg2() { 3683 emitByte(0xD9); 3684 emitByte(0xEC); 3685 } 3686 3687 public final void fyl2x() { 3688 emitByte(0xD9); 3689 emitByte(0xF1); 3690 } 3691 3692 public final void fstps(AMD64Address src) { 3693 emitByte(0xD9); 3694 emitOperandHelper(3, src, 0); 3695 } 3696 3697 public final void fstpd(AMD64Address src) { 3698 emitByte(0xDD); 3699 emitOperandHelper(3, src, 0); 3700 } 3701 3702 private void emitFPUArith(int b1, int b2, int i) { 3703 assert 0 <= i && i < 8 : "illegal FPU register: " + i; 3704 emitByte(b1); 3705 emitByte(b2 + i); 3706 } 3707 3708 public void ffree(int i) { 3709 emitFPUArith(0xDD, 0xC0, i); 3710 } 3711 3712 public void fincstp() { 3713 emitByte(0xD9); 3714 emitByte(0xF7); 3715 } 3716 3717 public void fxch(int i) { 3718 emitFPUArith(0xD9, 0xC8, i); 3719 } 3720 3721 public void fnstswAX() { 3722 emitByte(0xDF); 3723 emitByte(0xE0); 3724 } 3725 3726 public void fwait() { 3727 emitByte(0x9B); 3728 } 3729 3730 public void fprem() { 3731 emitByte(0xD9); 3732 emitByte(0xF8); 3733 } 3734 3735 public final void fsin() { 3736 emitByte(0xD9); 3737 emitByte(0xFE); 3738 } 3739 3740 public final void fcos() { 3741 emitByte(0xD9); 3742 emitByte(0xFF); 3743 } 3744 3745 public final void fptan() { 3746 emitByte(0xD9); 3747 emitByte(0xF2); 3748 } 3749 3750 public final void fstp(int i) { 3751 emitx87(0xDD, 0xD8, i); 3752 } 3753 3754 @Override 3755 public AMD64Address makeAddress(Register base, int displacement) { 3756 return new AMD64Address(base, displacement); 3757 } 3758 3759 @Override 3760 public AMD64Address getPlaceholder(int instructionStartPosition) { 3761 return new AMD64Address(rip, Register.None, Scale.Times1, 0, instructionStartPosition); 3762 } 3763 3764 private void prefetchPrefix(AMD64Address src) { 3765 prefix(src); 3766 emitByte(0x0F); 3767 } 3768 3769 public void prefetchnta(AMD64Address src) { 3770 prefetchPrefix(src); 3771 emitByte(0x18); 3772 emitOperandHelper(0, src, 0); 3773 } 3774 3775 void prefetchr(AMD64Address src) { 3776 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3777 prefetchPrefix(src); 3778 emitByte(0x0D); 3779 emitOperandHelper(0, src, 0); 3780 } 3781 3782 public void prefetcht0(AMD64Address src) { 3783 assert supports(CPUFeature.SSE); 3784 prefetchPrefix(src); 3785 emitByte(0x18); 3786 emitOperandHelper(1, src, 0); 3787 } 3788 3789 public void prefetcht1(AMD64Address src) { 3790 assert supports(CPUFeature.SSE); 3791 prefetchPrefix(src); 3792 emitByte(0x18); 3793 emitOperandHelper(2, src, 0); 3794 } 3795 3796 public void prefetcht2(AMD64Address src) { 3797 assert supports(CPUFeature.SSE); 3798 prefix(src); 3799 emitByte(0x0f); 3800 emitByte(0x18); 3801 emitOperandHelper(3, src, 0); 3802 } 3803 3804 public void prefetchw(AMD64Address src) { 3805 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3806 prefix(src); 3807 emitByte(0x0f); 3808 emitByte(0x0D); 3809 emitOperandHelper(1, src, 0); 3810 } 3811 3812 public void rdtsc() { 3813 emitByte(0x0F); 3814 emitByte(0x31); 3815 } 3816 3817 /** 3818 * Emits an instruction which is considered to be illegal. This is used if we deliberately want 3819 * to crash the program (debugging etc.). 3820 */ 3821 public void illegal() { 3822 emitByte(0x0f); 3823 emitByte(0x0b); 3824 } 3825 }