1 /* 2 * Copyright (c) 2009, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 package org.graalvm.compiler.asm.amd64; 24 25 import static org.graalvm.compiler.asm.NumUtil.isByte; 26 import static org.graalvm.compiler.asm.NumUtil.isInt; 27 import static org.graalvm.compiler.asm.NumUtil.isShiftCount; 28 import static org.graalvm.compiler.asm.NumUtil.isUByte; 29 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop; 30 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop; 31 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD; 32 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND; 33 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP; 34 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR; 35 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB; 36 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB; 37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR; 38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC; 39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC; 40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG; 41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT; 42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.BYTE; 43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.DWORD; 44 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PD; 45 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PS; 46 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.QWORD; 47 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SD; 48 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SS; 49 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.WORD; 50 import static jdk.vm.ci.amd64.AMD64.CPU; 51 import static jdk.vm.ci.amd64.AMD64.XMM; 52 import static jdk.vm.ci.amd64.AMD64.r12; 53 import static jdk.vm.ci.amd64.AMD64.r13; 54 import static jdk.vm.ci.amd64.AMD64.rbp; 55 import static jdk.vm.ci.amd64.AMD64.rip; 56 import static jdk.vm.ci.amd64.AMD64.rsp; 57 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD; 58 59 import org.graalvm.compiler.asm.Assembler; 60 import org.graalvm.compiler.asm.Label; 61 import org.graalvm.compiler.asm.NumUtil; 62 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 63 64 import jdk.vm.ci.amd64.AMD64; 65 import jdk.vm.ci.amd64.AMD64.CPUFeature; 66 import jdk.vm.ci.code.Register; 67 import jdk.vm.ci.code.Register.RegisterCategory; 68 import jdk.vm.ci.code.TargetDescription; 69 70 /** 71 * This class implements an assembler that can encode most X86 instructions. 72 */ 73 public class AMD64Assembler extends Assembler { 74 75 private static final int MinEncodingNeedsRex = 8; 76 77 /** 78 * The x86 condition codes used for conditional jumps/moves. 79 */ 80 public enum ConditionFlag { 81 Zero(0x4, "|zero|"), 82 NotZero(0x5, "|nzero|"), 83 Equal(0x4, "="), 84 NotEqual(0x5, "!="), 85 Less(0xc, "<"), 86 LessEqual(0xe, "<="), 87 Greater(0xf, ">"), 88 GreaterEqual(0xd, ">="), 89 Below(0x2, "|<|"), 90 BelowEqual(0x6, "|<=|"), 91 Above(0x7, "|>|"), 92 AboveEqual(0x3, "|>=|"), 93 Overflow(0x0, "|of|"), 94 NoOverflow(0x1, "|nof|"), 95 CarrySet(0x2, "|carry|"), 96 CarryClear(0x3, "|ncarry|"), 97 Negative(0x8, "|neg|"), 98 Positive(0x9, "|pos|"), 99 Parity(0xa, "|par|"), 100 NoParity(0xb, "|npar|"); 101 102 private final int value; 103 private final String operator; 104 105 ConditionFlag(int value, String operator) { 106 this.value = value; 107 this.operator = operator; 108 } 109 110 public ConditionFlag negate() { 111 switch (this) { 112 case Zero: 113 return NotZero; 114 case NotZero: 115 return Zero; 116 case Equal: 117 return NotEqual; 118 case NotEqual: 119 return Equal; 120 case Less: 121 return GreaterEqual; 122 case LessEqual: 123 return Greater; 124 case Greater: 125 return LessEqual; 126 case GreaterEqual: 127 return Less; 128 case Below: 129 return AboveEqual; 130 case BelowEqual: 131 return Above; 132 case Above: 133 return BelowEqual; 134 case AboveEqual: 135 return Below; 136 case Overflow: 137 return NoOverflow; 138 case NoOverflow: 139 return Overflow; 140 case CarrySet: 141 return CarryClear; 142 case CarryClear: 143 return CarrySet; 144 case Negative: 145 return Positive; 146 case Positive: 147 return Negative; 148 case Parity: 149 return NoParity; 150 case NoParity: 151 return Parity; 152 } 153 throw new IllegalArgumentException(); 154 } 155 156 public int getValue() { 157 return value; 158 } 159 160 @Override 161 public String toString() { 162 return operator; 163 } 164 } 165 166 /** 167 * Constants for X86 prefix bytes. 168 */ 169 private static class Prefix { 170 private static final int REX = 0x40; 171 private static final int REXB = 0x41; 172 private static final int REXX = 0x42; 173 private static final int REXXB = 0x43; 174 private static final int REXR = 0x44; 175 private static final int REXRB = 0x45; 176 private static final int REXRX = 0x46; 177 private static final int REXRXB = 0x47; 178 private static final int REXW = 0x48; 179 private static final int REXWB = 0x49; 180 private static final int REXWX = 0x4A; 181 private static final int REXWXB = 0x4B; 182 private static final int REXWR = 0x4C; 183 private static final int REXWRB = 0x4D; 184 private static final int REXWRX = 0x4E; 185 private static final int REXWRXB = 0x4F; 186 private static final int VEX_3BYTES = 0xC4; 187 private static final int VEX_2BYTES = 0xC5; 188 } 189 190 private static class VexPrefix { 191 private static final int VEX_R = 0x80; 192 private static final int VEX_W = 0x80; 193 } 194 195 private static class AvxVectorLen { 196 private static final int AVX_128bit = 0x0; 197 private static final int AVX_256bit = 0x1; 198 } 199 200 private static class VexSimdPrefix { 201 private static final int VEX_SIMD_NONE = 0x0; 202 private static final int VEX_SIMD_66 = 0x1; 203 private static final int VEX_SIMD_F3 = 0x2; 204 private static final int VEX_SIMD_F2 = 0x3; 205 } 206 207 private static class VexOpcode { 208 private static final int VEX_OPCODE_NONE = 0x0; 209 private static final int VEX_OPCODE_0F = 0x1; 210 private static final int VEX_OPCODE_0F_38 = 0x2; 211 private static final int VEX_OPCODE_0F_3A = 0x3; 212 } 213 214 private AMD64InstructionAttr curAttributes; 215 216 AMD64InstructionAttr getCurAttributes() { 217 return curAttributes; 218 } 219 220 void setCurAttributes(AMD64InstructionAttr attributes) { 221 curAttributes = attributes; 222 } 223 224 /** 225 * The x86 operand sizes. 226 */ 227 public enum OperandSize { 228 BYTE(1) { 229 @Override 230 protected void emitImmediate(AMD64Assembler asm, int imm) { 231 assert imm == (byte) imm; 232 asm.emitByte(imm); 233 } 234 235 @Override 236 protected int immediateSize() { 237 return 1; 238 } 239 }, 240 241 WORD(2, 0x66) { 242 @Override 243 protected void emitImmediate(AMD64Assembler asm, int imm) { 244 assert imm == (short) imm; 245 asm.emitShort(imm); 246 } 247 248 @Override 249 protected int immediateSize() { 250 return 2; 251 } 252 }, 253 254 DWORD(4) { 255 @Override 256 protected void emitImmediate(AMD64Assembler asm, int imm) { 257 asm.emitInt(imm); 258 } 259 260 @Override 261 protected int immediateSize() { 262 return 4; 263 } 264 }, 265 266 QWORD(8) { 267 @Override 268 protected void emitImmediate(AMD64Assembler asm, int imm) { 269 asm.emitInt(imm); 270 } 271 272 @Override 273 protected int immediateSize() { 274 return 4; 275 } 276 }, 277 278 SS(4, 0xF3, true), 279 280 SD(8, 0xF2, true), 281 282 PS(16, true), 283 284 PD(16, 0x66, true); 285 286 private final int sizePrefix; 287 288 private final int bytes; 289 private final boolean xmm; 290 291 OperandSize(int bytes) { 292 this(bytes, 0); 293 } 294 295 OperandSize(int bytes, int sizePrefix) { 296 this(bytes, sizePrefix, false); 297 } 298 299 OperandSize(int bytes, boolean xmm) { 300 this(bytes, 0, xmm); 301 } 302 303 OperandSize(int bytes, int sizePrefix, boolean xmm) { 304 this.sizePrefix = sizePrefix; 305 this.bytes = bytes; 306 this.xmm = xmm; 307 } 308 309 public int getBytes() { 310 return bytes; 311 } 312 313 public boolean isXmmType() { 314 return xmm; 315 } 316 317 /** 318 * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded 319 * as sign-extended 32-bit values. 320 * 321 * @param asm 322 * @param imm 323 */ 324 protected void emitImmediate(AMD64Assembler asm, int imm) { 325 throw new UnsupportedOperationException(); 326 } 327 328 protected int immediateSize() { 329 throw new UnsupportedOperationException(); 330 } 331 } 332 333 /** 334 * Operand size and register type constraints. 335 */ 336 private enum OpAssertion { 337 ByteAssertion(CPU, CPU, BYTE), 338 IntegerAssertion(CPU, CPU, WORD, DWORD, QWORD), 339 No16BitAssertion(CPU, CPU, DWORD, QWORD), 340 No32BitAssertion(CPU, CPU, WORD, QWORD), 341 QwordOnlyAssertion(CPU, CPU, QWORD), 342 FloatingAssertion(XMM, XMM, SS, SD, PS, PD), 343 PackedFloatingAssertion(XMM, XMM, PS, PD), 344 SingleAssertion(XMM, XMM, SS), 345 DoubleAssertion(XMM, XMM, SD), 346 PackedDoubleAssertion(XMM, XMM, PD), 347 IntToFloatingAssertion(XMM, CPU, DWORD, QWORD), 348 FloatingToIntAssertion(CPU, XMM, DWORD, QWORD); 349 350 private final RegisterCategory resultCategory; 351 private final RegisterCategory inputCategory; 352 private final OperandSize[] allowedSizes; 353 354 OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) { 355 this.resultCategory = resultCategory; 356 this.inputCategory = inputCategory; 357 this.allowedSizes = allowedSizes; 358 } 359 360 protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) { 361 assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op; 362 assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op; 363 364 for (OperandSize s : allowedSizes) { 365 if (size == s) { 366 return true; 367 } 368 } 369 370 assert false : "invalid operand size " + size + " used in " + op; 371 return false; 372 } 373 } 374 375 public abstract static class OperandDataAnnotation extends CodeAnnotation { 376 /** 377 * The position (bytes from the beginning of the method) of the operand. 378 */ 379 public final int operandPosition; 380 /** 381 * The size of the operand, in bytes. 382 */ 383 public final int operandSize; 384 /** 385 * The position (bytes from the beginning of the method) of the next instruction. On AMD64, 386 * RIP-relative operands are relative to this position. 387 */ 388 public final int nextInstructionPosition; 389 390 OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) { 391 super(instructionPosition); 392 393 this.operandPosition = operandPosition; 394 this.operandSize = operandSize; 395 this.nextInstructionPosition = nextInstructionPosition; 396 } 397 398 @Override 399 public String toString() { 400 return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize; 401 } 402 } 403 404 /** 405 * Annotation that stores additional information about the displacement of a 406 * {@link Assembler#getPlaceholder placeholder address} that needs patching. 407 */ 408 public static class AddressDisplacementAnnotation extends OperandDataAnnotation { 409 AddressDisplacementAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) { 410 super(instructionPosition, operandPosition, operndSize, nextInstructionPosition); 411 } 412 } 413 414 /** 415 * Annotation that stores additional information about the immediate operand, e.g., of a call 416 * instruction, that needs patching. 417 */ 418 public static class ImmediateOperandAnnotation extends OperandDataAnnotation { 419 ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) { 420 super(instructionPosition, operandPosition, operndSize, nextInstructionPosition); 421 } 422 } 423 424 /** 425 * Constructs an assembler for the AMD64 architecture. 426 */ 427 public AMD64Assembler(TargetDescription target) { 428 super(target); 429 } 430 431 public boolean supports(CPUFeature feature) { 432 return ((AMD64) target.arch).getFeatures().contains(feature); 433 } 434 435 private static int encode(Register r) { 436 assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding; 437 return r.encoding & 0x7; 438 } 439 440 /** 441 * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a 442 * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm 443 * field. The X bit must be 0. 444 */ 445 protected static int getRXB(Register reg, Register rm) { 446 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 447 rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3; 448 return rxb; 449 } 450 451 /** 452 * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There 453 * are two cases for the memory operand:<br> 454 * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0. 455 * <br> 456 * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base. 457 */ 458 protected static int getRXB(Register reg, AMD64Address rm) { 459 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 460 if (!rm.getIndex().equals(Register.None)) { 461 rxb |= (rm.getIndex().encoding & 0x08) >> 2; 462 } 463 if (!rm.getBase().equals(Register.None)) { 464 rxb |= (rm.getBase().encoding & 0x08) >> 3; 465 } 466 return rxb; 467 } 468 469 /** 470 * Emit the ModR/M byte for one register operand and an opcode extension in the R field. 471 * <p> 472 * Format: [ 11 reg r/m ] 473 */ 474 protected void emitModRM(int reg, Register rm) { 475 assert (reg & 0x07) == reg; 476 emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07)); 477 } 478 479 /** 480 * Emit the ModR/M byte for two register operands. 481 * <p> 482 * Format: [ 11 reg r/m ] 483 */ 484 protected void emitModRM(Register reg, Register rm) { 485 emitModRM(reg.encoding & 0x07, rm); 486 } 487 488 protected void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) { 489 assert !reg.equals(Register.None); 490 emitOperandHelper(encode(reg), addr, false, additionalInstructionSize); 491 } 492 493 /** 494 * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand. 495 * 496 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte 497 */ 498 protected void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) { 499 assert !reg.equals(Register.None); 500 emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize); 501 } 502 503 protected void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) { 504 emitOperandHelper(reg, addr, false, additionalInstructionSize); 505 } 506 507 /** 508 * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode 509 * extension in the R field. 510 * 511 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte 512 * @param additionalInstructionSize the number of bytes that will be emitted after the operand, 513 * so that the start position of the next instruction can be computed even though 514 * this instruction has not been completely emitted yet. 515 */ 516 protected void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) { 517 assert (reg & 0x07) == reg; 518 int regenc = reg << 3; 519 520 Register base = addr.getBase(); 521 Register index = addr.getIndex(); 522 523 AMD64Address.Scale scale = addr.getScale(); 524 int disp = addr.getDisplacement(); 525 526 if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder() 527 // [00 000 101] disp32 528 assert index.equals(Register.None) : "cannot use RIP relative addressing with index register"; 529 emitByte(0x05 | regenc); 530 if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) { 531 codePatchingAnnotationConsumer.accept(new AddressDisplacementAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize)); 532 } 533 emitInt(disp); 534 } else if (base.isValid()) { 535 int baseenc = base.isValid() ? encode(base) : 0; 536 if (index.isValid()) { 537 int indexenc = encode(index) << 3; 538 // [base + indexscale + disp] 539 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 540 // [base + indexscale] 541 // [00 reg 100][ss index base] 542 assert !index.equals(rsp) : "illegal addressing mode"; 543 emitByte(0x04 | regenc); 544 emitByte(scale.log2 << 6 | indexenc | baseenc); 545 } else if (isByte(disp) && !force4Byte) { 546 // [base + indexscale + imm8] 547 // [01 reg 100][ss index base] imm8 548 assert !index.equals(rsp) : "illegal addressing mode"; 549 emitByte(0x44 | regenc); 550 emitByte(scale.log2 << 6 | indexenc | baseenc); 551 emitByte(disp & 0xFF); 552 } else { 553 // [base + indexscale + disp32] 554 // [10 reg 100][ss index base] disp32 555 assert !index.equals(rsp) : "illegal addressing mode"; 556 emitByte(0x84 | regenc); 557 emitByte(scale.log2 << 6 | indexenc | baseenc); 558 emitInt(disp); 559 } 560 } else if (base.equals(rsp) || base.equals(r12)) { 561 // [rsp + disp] 562 if (disp == 0) { 563 // [rsp] 564 // [00 reg 100][00 100 100] 565 emitByte(0x04 | regenc); 566 emitByte(0x24); 567 } else if (isByte(disp) && !force4Byte) { 568 // [rsp + imm8] 569 // [01 reg 100][00 100 100] disp8 570 emitByte(0x44 | regenc); 571 emitByte(0x24); 572 emitByte(disp & 0xFF); 573 } else { 574 // [rsp + imm32] 575 // [10 reg 100][00 100 100] disp32 576 emitByte(0x84 | regenc); 577 emitByte(0x24); 578 emitInt(disp); 579 } 580 } else { 581 // [base + disp] 582 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode"; 583 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 584 // [base] 585 // [00 reg base] 586 emitByte(0x00 | regenc | baseenc); 587 } else if (isByte(disp) && !force4Byte) { 588 // [base + disp8] 589 // [01 reg base] disp8 590 emitByte(0x40 | regenc | baseenc); 591 emitByte(disp & 0xFF); 592 } else { 593 // [base + disp32] 594 // [10 reg base] disp32 595 emitByte(0x80 | regenc | baseenc); 596 emitInt(disp); 597 } 598 } 599 } else { 600 if (index.isValid()) { 601 int indexenc = encode(index) << 3; 602 // [indexscale + disp] 603 // [00 reg 100][ss index 101] disp32 604 assert !index.equals(rsp) : "illegal addressing mode"; 605 emitByte(0x04 | regenc); 606 emitByte(scale.log2 << 6 | indexenc | 0x05); 607 emitInt(disp); 608 } else { 609 // [disp] ABSOLUTE 610 // [00 reg 100][00 100 101] disp32 611 emitByte(0x04 | regenc); 612 emitByte(0x25); 613 emitInt(disp); 614 } 615 } 616 setCurAttributes(null); 617 } 618 619 /** 620 * Base class for AMD64 opcodes. 621 */ 622 public static class AMD64Op { 623 624 protected static final int P_0F = 0x0F; 625 protected static final int P_0F38 = 0x380F; 626 protected static final int P_0F3A = 0x3A0F; 627 628 private final String opcode; 629 630 protected final int prefix1; 631 protected final int prefix2; 632 protected final int op; 633 634 private final boolean dstIsByte; 635 private final boolean srcIsByte; 636 637 private final OpAssertion assertion; 638 private final CPUFeature feature; 639 640 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 641 this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature); 642 } 643 644 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 645 this.opcode = opcode; 646 this.prefix1 = prefix1; 647 this.prefix2 = prefix2; 648 this.op = op; 649 650 this.dstIsByte = dstIsByte; 651 this.srcIsByte = srcIsByte; 652 653 this.assertion = assertion; 654 this.feature = feature; 655 } 656 657 protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) { 658 if (prefix1 != 0) { 659 asm.emitByte(prefix1); 660 } 661 if (size.sizePrefix != 0) { 662 asm.emitByte(size.sizePrefix); 663 } 664 int rexPrefix = 0x40 | rxb; 665 if (size == QWORD) { 666 rexPrefix |= 0x08; 667 } 668 if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) { 669 asm.emitByte(rexPrefix); 670 } 671 if (prefix2 > 0xFF) { 672 asm.emitShort(prefix2); 673 } else if (prefix2 > 0) { 674 asm.emitByte(prefix2); 675 } 676 asm.emitByte(op); 677 } 678 679 protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) { 680 assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode); 681 assert assertion.checkOperands(this, size, resultReg, inputReg); 682 return true; 683 } 684 685 @Override 686 public String toString() { 687 return opcode; 688 } 689 } 690 691 /** 692 * Base class for AMD64 opcodes with immediate operands. 693 */ 694 public static class AMD64ImmOp extends AMD64Op { 695 696 private final boolean immIsByte; 697 698 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) { 699 super(opcode, 0, prefix, op, assertion, null); 700 this.immIsByte = immIsByte; 701 } 702 703 protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) { 704 if (immIsByte) { 705 assert imm == (byte) imm; 706 asm.emitByte(imm); 707 } else { 708 size.emitImmediate(asm, imm); 709 } 710 } 711 712 protected final int immediateSize(OperandSize size) { 713 if (immIsByte) { 714 return 1; 715 } else { 716 return size.bytes; 717 } 718 } 719 } 720 721 /** 722 * Opcode with operand order of either RM or MR for 2 address forms. 723 */ 724 public abstract static class AMD64RROp extends AMD64Op { 725 726 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 727 super(opcode, prefix1, prefix2, op, assertion, feature); 728 } 729 730 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 731 super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature); 732 } 733 734 public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src); 735 } 736 737 /** 738 * Opcode with operand order of either RM or MR for 3 address forms. 739 */ 740 public abstract static class AMD64RRROp extends AMD64Op { 741 742 protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 743 super(opcode, prefix1, prefix2, op, assertion, feature); 744 } 745 746 protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 747 super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature); 748 } 749 750 public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src); 751 } 752 753 /** 754 * Opcode with operand order of RM. 755 */ 756 public static class AMD64RMOp extends AMD64RROp { 757 // @formatter:off 758 public static final AMD64RMOp IMUL = new AMD64RMOp("IMUL", P_0F, 0xAF); 759 public static final AMD64RMOp BSF = new AMD64RMOp("BSF", P_0F, 0xBC); 760 public static final AMD64RMOp BSR = new AMD64RMOp("BSR", P_0F, 0xBD); 761 public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, CPUFeature.POPCNT); 762 public static final AMD64RMOp TZCNT = new AMD64RMOp("TZCNT", 0xF3, P_0F, 0xBC, CPUFeature.BMI1); 763 public static final AMD64RMOp LZCNT = new AMD64RMOp("LZCNT", 0xF3, P_0F, 0xBD, CPUFeature.LZCNT); 764 public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB", P_0F, 0xB6, false, true, OpAssertion.IntegerAssertion); 765 public static final AMD64RMOp MOVZX = new AMD64RMOp("MOVZX", P_0F, 0xB7, OpAssertion.No16BitAssertion); 766 public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB", P_0F, 0xBE, false, true, OpAssertion.IntegerAssertion); 767 public static final AMD64RMOp MOVSX = new AMD64RMOp("MOVSX", P_0F, 0xBF, OpAssertion.No16BitAssertion); 768 public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD", 0x63, OpAssertion.QwordOnlyAssertion); 769 public static final AMD64RMOp MOVB = new AMD64RMOp("MOVB", 0x8A, OpAssertion.ByteAssertion); 770 public static final AMD64RMOp MOV = new AMD64RMOp("MOV", 0x8B); 771 772 // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix 773 public static final AMD64RMOp MOVD = new AMD64RMOp("MOVD", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2); 774 public static final AMD64RMOp MOVQ = new AMD64RMOp("MOVQ", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2); 775 public static final AMD64RMOp MOVSS = new AMD64RMOp("MOVSS", P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE); 776 public static final AMD64RMOp MOVSD = new AMD64RMOp("MOVSD", P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE); 777 778 // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient. 779 public static final AMD64RMOp TESTB = new AMD64RMOp("TEST", 0x84, OpAssertion.ByteAssertion); 780 public static final AMD64RMOp TEST = new AMD64RMOp("TEST", 0x85); 781 // @formatter:on 782 783 protected AMD64RMOp(String opcode, int op) { 784 this(opcode, 0, op); 785 } 786 787 protected AMD64RMOp(String opcode, int op, OpAssertion assertion) { 788 this(opcode, 0, op, assertion); 789 } 790 791 protected AMD64RMOp(String opcode, int prefix, int op) { 792 this(opcode, 0, prefix, op, null); 793 } 794 795 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) { 796 this(opcode, 0, prefix, op, assertion, null); 797 } 798 799 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 800 this(opcode, 0, prefix, op, assertion, feature); 801 } 802 803 protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) { 804 super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null); 805 } 806 807 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) { 808 this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature); 809 } 810 811 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 812 super(opcode, prefix1, prefix2, op, assertion, feature); 813 } 814 815 @Override 816 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 817 assert verify(asm, size, dst, src); 818 boolean isSimd = false; 819 boolean noNds = false; 820 821 switch (op) { 822 case 0x2A: 823 case 0x2C: 824 case 0x2E: 825 case 0x5A: 826 case 0x6E: 827 isSimd = true; 828 noNds = true; 829 break; 830 case 0x10: 831 case 0x51: 832 case 0x54: 833 case 0x55: 834 case 0x56: 835 case 0x57: 836 case 0x58: 837 case 0x59: 838 case 0x5C: 839 case 0x5D: 840 case 0x5E: 841 case 0x5F: 842 isSimd = true; 843 break; 844 } 845 846 if (isSimd) { 847 int pre; 848 int opc; 849 boolean rexVexW = (size == QWORD) ? true : false; 850 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 851 int curPrefix = size.sizePrefix | prefix1; 852 switch (curPrefix) { 853 case 0x66: 854 pre = VexSimdPrefix.VEX_SIMD_66; 855 break; 856 case 0xF2: 857 pre = VexSimdPrefix.VEX_SIMD_F2; 858 break; 859 case 0xF3: 860 pre = VexSimdPrefix.VEX_SIMD_F3; 861 break; 862 default: 863 pre = VexSimdPrefix.VEX_SIMD_NONE; 864 break; 865 } 866 switch (prefix2) { 867 case P_0F: 868 opc = VexOpcode.VEX_OPCODE_0F; 869 break; 870 case P_0F38: 871 opc = VexOpcode.VEX_OPCODE_0F_38; 872 break; 873 case P_0F3A: 874 opc = VexOpcode.VEX_OPCODE_0F_3A; 875 break; 876 default: 877 opc = VexOpcode.VEX_OPCODE_NONE; 878 break; 879 } 880 int encode; 881 if (noNds) { 882 encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes); 883 } else { 884 encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes); 885 } 886 asm.emitByte(op); 887 asm.emitByte(0xC0 | encode); 888 } else { 889 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 890 asm.emitModRM(dst, src); 891 } 892 } 893 894 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) { 895 assert verify(asm, size, dst, null); 896 boolean isSimd = false; 897 boolean noNds = false; 898 899 switch (op) { 900 case 0x10: 901 case 0x2A: 902 case 0x2C: 903 case 0x2E: 904 case 0x6E: 905 isSimd = true; 906 noNds = true; 907 break; 908 case 0x51: 909 case 0x54: 910 case 0x55: 911 case 0x56: 912 case 0x57: 913 case 0x58: 914 case 0x59: 915 case 0x5C: 916 case 0x5D: 917 case 0x5E: 918 case 0x5F: 919 isSimd = true; 920 break; 921 } 922 923 if (isSimd) { 924 int pre; 925 int opc; 926 boolean rexVexW = (size == QWORD) ? true : false; 927 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 928 int curPrefix = size.sizePrefix | prefix1; 929 switch (curPrefix) { 930 case 0x66: 931 pre = VexSimdPrefix.VEX_SIMD_66; 932 break; 933 case 0xF2: 934 pre = VexSimdPrefix.VEX_SIMD_F2; 935 break; 936 case 0xF3: 937 pre = VexSimdPrefix.VEX_SIMD_F3; 938 break; 939 default: 940 pre = VexSimdPrefix.VEX_SIMD_NONE; 941 break; 942 } 943 switch (prefix2) { 944 case P_0F: 945 opc = VexOpcode.VEX_OPCODE_0F; 946 break; 947 case P_0F38: 948 opc = VexOpcode.VEX_OPCODE_0F_38; 949 break; 950 case P_0F3A: 951 opc = VexOpcode.VEX_OPCODE_0F_3A; 952 break; 953 default: 954 opc = VexOpcode.VEX_OPCODE_NONE; 955 break; 956 } 957 if (noNds) { 958 asm.simdPrefix(dst, Register.None, src, pre, opc, attributes); 959 } else { 960 asm.simdPrefix(dst, dst, src, pre, opc, attributes); 961 } 962 asm.emitByte(op); 963 asm.emitOperandHelper(dst, src, 0); 964 } else { 965 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 966 asm.emitOperandHelper(dst, src, 0); 967 } 968 } 969 } 970 971 /** 972 * Opcode with operand order of RM. 973 */ 974 public static class AMD64RRMOp extends AMD64RRROp { 975 protected AMD64RRMOp(String opcode, int op) { 976 this(opcode, 0, op); 977 } 978 979 protected AMD64RRMOp(String opcode, int op, OpAssertion assertion) { 980 this(opcode, 0, op, assertion); 981 } 982 983 protected AMD64RRMOp(String opcode, int prefix, int op) { 984 this(opcode, 0, prefix, op, null); 985 } 986 987 protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion) { 988 this(opcode, 0, prefix, op, assertion, null); 989 } 990 991 protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 992 this(opcode, 0, prefix, op, assertion, feature); 993 } 994 995 protected AMD64RRMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) { 996 super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null); 997 } 998 999 protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) { 1000 this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature); 1001 } 1002 1003 protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 1004 super(opcode, prefix1, prefix2, op, assertion, feature); 1005 } 1006 1007 @Override 1008 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src) { 1009 assert verify(asm, size, dst, src); 1010 int pre; 1011 int opc; 1012 boolean rexVexW = (size == QWORD) ? true : false; 1013 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1014 int curPrefix = size.sizePrefix | prefix1; 1015 switch (curPrefix) { 1016 case 0x66: 1017 pre = VexSimdPrefix.VEX_SIMD_66; 1018 break; 1019 case 0xF2: 1020 pre = VexSimdPrefix.VEX_SIMD_F2; 1021 break; 1022 case 0xF3: 1023 pre = VexSimdPrefix.VEX_SIMD_F3; 1024 break; 1025 default: 1026 pre = VexSimdPrefix.VEX_SIMD_NONE; 1027 break; 1028 } 1029 switch (prefix2) { 1030 case P_0F: 1031 opc = VexOpcode.VEX_OPCODE_0F; 1032 break; 1033 case P_0F38: 1034 opc = VexOpcode.VEX_OPCODE_0F_38; 1035 break; 1036 case P_0F3A: 1037 opc = VexOpcode.VEX_OPCODE_0F_3A; 1038 break; 1039 default: 1040 opc = VexOpcode.VEX_OPCODE_NONE; 1041 break; 1042 } 1043 int encode; 1044 encode = asm.simdPrefixAndEncode(dst, nds, src, pre, opc, attributes); 1045 asm.emitByte(op); 1046 asm.emitByte(0xC0 | encode); 1047 } 1048 1049 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, AMD64Address src) { 1050 assert verify(asm, size, dst, null); 1051 int pre; 1052 int opc; 1053 boolean rexVexW = (size == QWORD) ? true : false; 1054 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1055 int curPrefix = size.sizePrefix | prefix1; 1056 switch (curPrefix) { 1057 case 0x66: 1058 pre = VexSimdPrefix.VEX_SIMD_66; 1059 break; 1060 case 0xF2: 1061 pre = VexSimdPrefix.VEX_SIMD_F2; 1062 break; 1063 case 0xF3: 1064 pre = VexSimdPrefix.VEX_SIMD_F3; 1065 break; 1066 default: 1067 pre = VexSimdPrefix.VEX_SIMD_NONE; 1068 break; 1069 } 1070 switch (prefix2) { 1071 case P_0F: 1072 opc = VexOpcode.VEX_OPCODE_0F; 1073 break; 1074 case P_0F38: 1075 opc = VexOpcode.VEX_OPCODE_0F_38; 1076 break; 1077 case P_0F3A: 1078 opc = VexOpcode.VEX_OPCODE_0F_3A; 1079 break; 1080 default: 1081 opc = VexOpcode.VEX_OPCODE_NONE; 1082 break; 1083 } 1084 asm.simdPrefix(dst, nds, src, pre, opc, attributes); 1085 asm.emitByte(op); 1086 asm.emitOperandHelper(dst, src, 0); 1087 } 1088 } 1089 1090 /** 1091 * Opcode with operand order of MR. 1092 */ 1093 public static class AMD64MROp extends AMD64RROp { 1094 // @formatter:off 1095 public static final AMD64MROp MOVB = new AMD64MROp("MOVB", 0x88, OpAssertion.ByteAssertion); 1096 public static final AMD64MROp MOV = new AMD64MROp("MOV", 0x89); 1097 1098 // MOVD and MOVQ are the same opcode, just with different operand size prefix 1099 // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used. 1100 public static final AMD64MROp MOVD = new AMD64MROp("MOVD", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2); 1101 public static final AMD64MROp MOVQ = new AMD64MROp("MOVQ", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2); 1102 1103 // MOVSS and MOVSD are the same opcode, just with different operand size prefix 1104 public static final AMD64MROp MOVSS = new AMD64MROp("MOVSS", P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE); 1105 public static final AMD64MROp MOVSD = new AMD64MROp("MOVSD", P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE); 1106 // @formatter:on 1107 1108 protected AMD64MROp(String opcode, int op) { 1109 this(opcode, 0, op); 1110 } 1111 1112 protected AMD64MROp(String opcode, int op, OpAssertion assertion) { 1113 this(opcode, 0, op, assertion); 1114 } 1115 1116 protected AMD64MROp(String opcode, int prefix, int op) { 1117 this(opcode, prefix, op, OpAssertion.IntegerAssertion); 1118 } 1119 1120 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) { 1121 this(opcode, prefix, op, assertion, null); 1122 } 1123 1124 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 1125 this(opcode, 0, prefix, op, assertion, feature); 1126 } 1127 1128 protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 1129 super(opcode, prefix1, prefix2, op, assertion, feature); 1130 } 1131 1132 @Override 1133 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 1134 assert verify(asm, size, src, dst); 1135 boolean isSimd = false; 1136 boolean noNds = false; 1137 1138 switch (op) { 1139 case 0x7E: 1140 isSimd = true; 1141 noNds = true; 1142 break; 1143 case 0x11: 1144 isSimd = true; 1145 break; 1146 } 1147 1148 if (isSimd) { 1149 int pre; 1150 int opc; 1151 boolean rexVexW = (size == QWORD) ? true : false; 1152 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1153 int curPrefix = size.sizePrefix | prefix1; 1154 switch (curPrefix) { 1155 case 0x66: 1156 pre = VexSimdPrefix.VEX_SIMD_66; 1157 break; 1158 case 0xF2: 1159 pre = VexSimdPrefix.VEX_SIMD_F2; 1160 break; 1161 case 0xF3: 1162 pre = VexSimdPrefix.VEX_SIMD_F3; 1163 break; 1164 default: 1165 pre = VexSimdPrefix.VEX_SIMD_NONE; 1166 break; 1167 } 1168 switch (prefix2) { 1169 case P_0F: 1170 opc = VexOpcode.VEX_OPCODE_0F; 1171 break; 1172 case P_0F38: 1173 opc = VexOpcode.VEX_OPCODE_0F_38; 1174 break; 1175 case P_0F3A: 1176 opc = VexOpcode.VEX_OPCODE_0F_3A; 1177 break; 1178 default: 1179 opc = VexOpcode.VEX_OPCODE_NONE; 1180 break; 1181 } 1182 int encode; 1183 if (noNds) { 1184 encode = asm.simdPrefixAndEncode(src, Register.None, dst, pre, opc, attributes); 1185 } else { 1186 encode = asm.simdPrefixAndEncode(src, src, dst, pre, opc, attributes); 1187 } 1188 asm.emitByte(op); 1189 asm.emitByte(0xC0 | encode); 1190 } else { 1191 emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding); 1192 asm.emitModRM(src, dst); 1193 } 1194 } 1195 1196 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) { 1197 assert verify(asm, size, null, src); 1198 boolean isSimd = false; 1199 1200 switch (op) { 1201 case 0x7E: 1202 case 0x11: 1203 isSimd = true; 1204 break; 1205 } 1206 1207 if (isSimd) { 1208 int pre; 1209 int opc; 1210 boolean rexVexW = (size == QWORD) ? true : false; 1211 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1212 int curPrefix = size.sizePrefix | prefix1; 1213 switch (curPrefix) { 1214 case 0x66: 1215 pre = VexSimdPrefix.VEX_SIMD_66; 1216 break; 1217 case 0xF2: 1218 pre = VexSimdPrefix.VEX_SIMD_F2; 1219 break; 1220 case 0xF3: 1221 pre = VexSimdPrefix.VEX_SIMD_F3; 1222 break; 1223 default: 1224 pre = VexSimdPrefix.VEX_SIMD_NONE; 1225 break; 1226 } 1227 switch (prefix2) { 1228 case P_0F: 1229 opc = VexOpcode.VEX_OPCODE_0F; 1230 break; 1231 case P_0F38: 1232 opc = VexOpcode.VEX_OPCODE_0F_38; 1233 break; 1234 case P_0F3A: 1235 opc = VexOpcode.VEX_OPCODE_0F_3A; 1236 break; 1237 default: 1238 opc = VexOpcode.VEX_OPCODE_NONE; 1239 break; 1240 } 1241 asm.simdPrefix(src, Register.None, dst, pre, opc, attributes); 1242 asm.emitByte(op); 1243 asm.emitOperandHelper(src, dst, 0); 1244 } else { 1245 emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0); 1246 asm.emitOperandHelper(src, dst, 0); 1247 } 1248 } 1249 } 1250 1251 /** 1252 * Opcodes with operand order of M. 1253 */ 1254 public static class AMD64MOp extends AMD64Op { 1255 // @formatter:off 1256 public static final AMD64MOp NOT = new AMD64MOp("NOT", 0xF7, 2); 1257 public static final AMD64MOp NEG = new AMD64MOp("NEG", 0xF7, 3); 1258 public static final AMD64MOp MUL = new AMD64MOp("MUL", 0xF7, 4); 1259 public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5); 1260 public static final AMD64MOp DIV = new AMD64MOp("DIV", 0xF7, 6); 1261 public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7); 1262 public static final AMD64MOp INC = new AMD64MOp("INC", 0xFF, 0); 1263 public static final AMD64MOp DEC = new AMD64MOp("DEC", 0xFF, 1); 1264 public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6); 1265 public static final AMD64MOp POP = new AMD64MOp("POP", 0x8F, 0, OpAssertion.No32BitAssertion); 1266 // @formatter:on 1267 1268 private final int ext; 1269 1270 protected AMD64MOp(String opcode, int op, int ext) { 1271 this(opcode, 0, op, ext); 1272 } 1273 1274 protected AMD64MOp(String opcode, int prefix, int op, int ext) { 1275 this(opcode, prefix, op, ext, OpAssertion.IntegerAssertion); 1276 } 1277 1278 protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) { 1279 this(opcode, 0, op, ext, assertion); 1280 } 1281 1282 protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) { 1283 super(opcode, 0, prefix, op, assertion, null); 1284 this.ext = ext; 1285 } 1286 1287 public final void emit(AMD64Assembler asm, OperandSize size, Register dst) { 1288 assert verify(asm, size, dst, null); 1289 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 1290 asm.emitModRM(ext, dst); 1291 } 1292 1293 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) { 1294 assert verify(asm, size, null, null); 1295 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 1296 asm.emitOperandHelper(ext, dst, 0); 1297 } 1298 } 1299 1300 /** 1301 * Opcodes with operand order of MI. 1302 */ 1303 public static class AMD64MIOp extends AMD64ImmOp { 1304 // @formatter:off 1305 public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true, 0xC6, 0, OpAssertion.ByteAssertion); 1306 public static final AMD64MIOp MOV = new AMD64MIOp("MOV", false, 0xC7, 0); 1307 public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0); 1308 // @formatter:on 1309 1310 private final int ext; 1311 1312 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) { 1313 this(opcode, immIsByte, op, ext, OpAssertion.IntegerAssertion); 1314 } 1315 1316 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) { 1317 this(opcode, immIsByte, 0, op, ext, assertion); 1318 } 1319 1320 protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) { 1321 super(opcode, immIsByte, prefix, op, assertion); 1322 this.ext = ext; 1323 } 1324 1325 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) { 1326 assert verify(asm, size, dst, null); 1327 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 1328 asm.emitModRM(ext, dst); 1329 emitImmediate(asm, size, imm); 1330 } 1331 1332 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) { 1333 assert verify(asm, size, null, null); 1334 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 1335 asm.emitOperandHelper(ext, dst, immediateSize(size)); 1336 emitImmediate(asm, size, imm); 1337 } 1338 } 1339 1340 /** 1341 * Opcodes with operand order of RMI. 1342 * 1343 * We only have one form of round as the operation is always treated with single variant input, 1344 * making its extension to 3 address forms redundant. 1345 */ 1346 public static class AMD64RMIOp extends AMD64ImmOp { 1347 // @formatter:off 1348 public static final AMD64RMIOp IMUL = new AMD64RMIOp("IMUL", false, 0x69); 1349 public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true, 0x6B); 1350 public static final AMD64RMIOp ROUNDSS = new AMD64RMIOp("ROUNDSS", true, P_0F3A, 0x0A, OpAssertion.PackedDoubleAssertion); 1351 public static final AMD64RMIOp ROUNDSD = new AMD64RMIOp("ROUNDSD", true, P_0F3A, 0x0B, OpAssertion.PackedDoubleAssertion); 1352 // @formatter:on 1353 1354 protected AMD64RMIOp(String opcode, boolean immIsByte, int op) { 1355 this(opcode, immIsByte, 0, op, OpAssertion.IntegerAssertion); 1356 } 1357 1358 protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) { 1359 super(opcode, immIsByte, prefix, op, assertion); 1360 } 1361 1362 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) { 1363 assert verify(asm, size, dst, src); 1364 boolean isSimd = false; 1365 boolean noNds = false; 1366 1367 switch (op) { 1368 case 0x0A: 1369 case 0x0B: 1370 isSimd = true; 1371 noNds = true; 1372 break; 1373 } 1374 1375 if (isSimd) { 1376 int pre; 1377 int opc; 1378 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1379 int curPrefix = size.sizePrefix | prefix1; 1380 switch (curPrefix) { 1381 case 0x66: 1382 pre = VexSimdPrefix.VEX_SIMD_66; 1383 break; 1384 case 0xF2: 1385 pre = VexSimdPrefix.VEX_SIMD_F2; 1386 break; 1387 case 0xF3: 1388 pre = VexSimdPrefix.VEX_SIMD_F3; 1389 break; 1390 default: 1391 pre = VexSimdPrefix.VEX_SIMD_NONE; 1392 break; 1393 } 1394 switch (prefix2) { 1395 case P_0F: 1396 opc = VexOpcode.VEX_OPCODE_0F; 1397 break; 1398 case P_0F38: 1399 opc = VexOpcode.VEX_OPCODE_0F_38; 1400 break; 1401 case P_0F3A: 1402 opc = VexOpcode.VEX_OPCODE_0F_3A; 1403 break; 1404 default: 1405 opc = VexOpcode.VEX_OPCODE_NONE; 1406 break; 1407 } 1408 int encode; 1409 if (noNds) { 1410 encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes); 1411 } else { 1412 encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes); 1413 } 1414 asm.emitByte(op); 1415 asm.emitByte(0xC0 | encode); 1416 emitImmediate(asm, size, imm); 1417 } else { 1418 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 1419 asm.emitModRM(dst, src); 1420 emitImmediate(asm, size, imm); 1421 } 1422 } 1423 1424 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) { 1425 assert verify(asm, size, dst, null); 1426 1427 boolean isSimd = false; 1428 boolean noNds = false; 1429 1430 switch (op) { 1431 case 0x0A: 1432 case 0x0B: 1433 isSimd = true; 1434 noNds = true; 1435 break; 1436 } 1437 1438 if (isSimd) { 1439 int pre; 1440 int opc; 1441 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1442 int curPrefix = size.sizePrefix | prefix1; 1443 switch (curPrefix) { 1444 case 0x66: 1445 pre = VexSimdPrefix.VEX_SIMD_66; 1446 break; 1447 case 0xF2: 1448 pre = VexSimdPrefix.VEX_SIMD_F2; 1449 break; 1450 case 0xF3: 1451 pre = VexSimdPrefix.VEX_SIMD_F3; 1452 break; 1453 default: 1454 pre = VexSimdPrefix.VEX_SIMD_NONE; 1455 break; 1456 } 1457 switch (prefix2) { 1458 case P_0F: 1459 opc = VexOpcode.VEX_OPCODE_0F; 1460 break; 1461 case P_0F38: 1462 opc = VexOpcode.VEX_OPCODE_0F_38; 1463 break; 1464 case P_0F3A: 1465 opc = VexOpcode.VEX_OPCODE_0F_3A; 1466 break; 1467 default: 1468 opc = VexOpcode.VEX_OPCODE_NONE; 1469 break; 1470 } 1471 1472 if (noNds) { 1473 asm.simdPrefix(dst, Register.None, src, pre, opc, attributes); 1474 } else { 1475 asm.simdPrefix(dst, dst, src, pre, opc, attributes); 1476 } 1477 asm.emitByte(op); 1478 asm.emitOperandHelper(dst, src, immediateSize(size)); 1479 emitImmediate(asm, size, imm); 1480 } else { 1481 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 1482 asm.emitOperandHelper(dst, src, immediateSize(size)); 1483 emitImmediate(asm, size, imm); 1484 } 1485 } 1486 } 1487 1488 public static class SSEOp extends AMD64RMOp { 1489 // @formatter:off 1490 public static final SSEOp CVTSI2SS = new SSEOp("CVTSI2SS", 0xF3, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion); 1491 public static final SSEOp CVTSI2SD = new SSEOp("CVTSI2SS", 0xF2, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion); 1492 public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion); 1493 public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion); 1494 public static final SSEOp UCOMIS = new SSEOp("UCOMIS", P_0F, 0x2E, OpAssertion.PackedFloatingAssertion); 1495 public static final SSEOp SQRT = new SSEOp("SQRT", P_0F, 0x51); 1496 public static final SSEOp AND = new SSEOp("AND", P_0F, 0x54, OpAssertion.PackedFloatingAssertion); 1497 public static final SSEOp ANDN = new SSEOp("ANDN", P_0F, 0x55, OpAssertion.PackedFloatingAssertion); 1498 public static final SSEOp OR = new SSEOp("OR", P_0F, 0x56, OpAssertion.PackedFloatingAssertion); 1499 public static final SSEOp XOR = new SSEOp("XOR", P_0F, 0x57, OpAssertion.PackedFloatingAssertion); 1500 public static final SSEOp ADD = new SSEOp("ADD", P_0F, 0x58); 1501 public static final SSEOp MUL = new SSEOp("MUL", P_0F, 0x59); 1502 public static final SSEOp CVTSS2SD = new SSEOp("CVTSS2SD", P_0F, 0x5A, OpAssertion.SingleAssertion); 1503 public static final SSEOp CVTSD2SS = new SSEOp("CVTSD2SS", P_0F, 0x5A, OpAssertion.DoubleAssertion); 1504 public static final SSEOp SUB = new SSEOp("SUB", P_0F, 0x5C); 1505 public static final SSEOp MIN = new SSEOp("MIN", P_0F, 0x5D); 1506 public static final SSEOp DIV = new SSEOp("DIV", P_0F, 0x5E); 1507 public static final SSEOp MAX = new SSEOp("MAX", P_0F, 0x5F); 1508 // @formatter:on 1509 1510 protected SSEOp(String opcode, int prefix, int op) { 1511 this(opcode, prefix, op, OpAssertion.FloatingAssertion); 1512 } 1513 1514 protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) { 1515 this(opcode, 0, prefix, op, assertion); 1516 } 1517 1518 protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) { 1519 super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2); 1520 } 1521 } 1522 1523 public static class AVXOp extends AMD64RRMOp { 1524 // @formatter:off 1525 public static final AVXOp AND = new AVXOp("AND", P_0F, 0x54, OpAssertion.PackedFloatingAssertion); 1526 public static final AVXOp ANDN = new AVXOp("ANDN", P_0F, 0x55, OpAssertion.PackedFloatingAssertion); 1527 public static final AVXOp OR = new AVXOp("OR", P_0F, 0x56, OpAssertion.PackedFloatingAssertion); 1528 public static final AVXOp XOR = new AVXOp("XOR", P_0F, 0x57, OpAssertion.PackedFloatingAssertion); 1529 public static final AVXOp ADD = new AVXOp("ADD", P_0F, 0x58); 1530 public static final AVXOp MUL = new AVXOp("MUL", P_0F, 0x59); 1531 public static final AVXOp SUB = new AVXOp("SUB", P_0F, 0x5C); 1532 public static final AVXOp MIN = new AVXOp("MIN", P_0F, 0x5D); 1533 public static final AVXOp DIV = new AVXOp("DIV", P_0F, 0x5E); 1534 public static final AVXOp MAX = new AVXOp("MAX", P_0F, 0x5F); 1535 // @formatter:on 1536 1537 protected AVXOp(String opcode, int prefix, int op) { 1538 this(opcode, prefix, op, OpAssertion.FloatingAssertion); 1539 } 1540 1541 protected AVXOp(String opcode, int prefix, int op, OpAssertion assertion) { 1542 this(opcode, 0, prefix, op, assertion); 1543 } 1544 1545 protected AVXOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) { 1546 super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.AVX); 1547 } 1548 } 1549 1550 /** 1551 * Arithmetic operation with operand order of RM, MR or MI. 1552 */ 1553 public static final class AMD64BinaryArithmetic { 1554 // @formatter:off 1555 public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0); 1556 public static final AMD64BinaryArithmetic OR = new AMD64BinaryArithmetic("OR", 1); 1557 public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2); 1558 public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3); 1559 public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4); 1560 public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5); 1561 public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6); 1562 public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7); 1563 // @formatter:on 1564 1565 private final AMD64MIOp byteImmOp; 1566 private final AMD64MROp byteMrOp; 1567 private final AMD64RMOp byteRmOp; 1568 1569 private final AMD64MIOp immOp; 1570 private final AMD64MIOp immSxOp; 1571 private final AMD64MROp mrOp; 1572 private final AMD64RMOp rmOp; 1573 1574 private AMD64BinaryArithmetic(String opcode, int code) { 1575 int baseOp = code << 3; 1576 1577 byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion); 1578 byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion); 1579 byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion); 1580 1581 immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.IntegerAssertion); 1582 immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.IntegerAssertion); 1583 mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.IntegerAssertion); 1584 rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.IntegerAssertion); 1585 } 1586 1587 public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) { 1588 if (size == BYTE) { 1589 return byteImmOp; 1590 } else if (sx) { 1591 return immSxOp; 1592 } else { 1593 return immOp; 1594 } 1595 } 1596 1597 public AMD64MROp getMROpcode(OperandSize size) { 1598 if (size == BYTE) { 1599 return byteMrOp; 1600 } else { 1601 return mrOp; 1602 } 1603 } 1604 1605 public AMD64RMOp getRMOpcode(OperandSize size) { 1606 if (size == BYTE) { 1607 return byteRmOp; 1608 } else { 1609 return rmOp; 1610 } 1611 } 1612 } 1613 1614 /** 1615 * Shift operation with operand order of M1, MC or MI. 1616 */ 1617 public static final class AMD64Shift { 1618 // @formatter:off 1619 public static final AMD64Shift ROL = new AMD64Shift("ROL", 0); 1620 public static final AMD64Shift ROR = new AMD64Shift("ROR", 1); 1621 public static final AMD64Shift RCL = new AMD64Shift("RCL", 2); 1622 public static final AMD64Shift RCR = new AMD64Shift("RCR", 3); 1623 public static final AMD64Shift SHL = new AMD64Shift("SHL", 4); 1624 public static final AMD64Shift SHR = new AMD64Shift("SHR", 5); 1625 public static final AMD64Shift SAR = new AMD64Shift("SAR", 7); 1626 // @formatter:on 1627 1628 public final AMD64MOp m1Op; 1629 public final AMD64MOp mcOp; 1630 public final AMD64MIOp miOp; 1631 1632 private AMD64Shift(String opcode, int code) { 1633 m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.IntegerAssertion); 1634 mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.IntegerAssertion); 1635 miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.IntegerAssertion); 1636 } 1637 } 1638 1639 public final void addl(AMD64Address dst, int imm32) { 1640 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1641 } 1642 1643 public final void addl(Register dst, int imm32) { 1644 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1645 } 1646 1647 public final void addl(Register dst, Register src) { 1648 ADD.rmOp.emit(this, DWORD, dst, src); 1649 } 1650 1651 public final void addpd(Register dst, Register src) { 1652 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 1653 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1654 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 1655 emitByte(0x58); 1656 emitByte(0xC0 | encode); 1657 } 1658 1659 public final void addpd(Register dst, AMD64Address src) { 1660 assert dst.getRegisterCategory().equals(AMD64.XMM); 1661 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1662 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 1663 emitByte(0x58); 1664 emitOperandHelper(dst, src, 0); 1665 } 1666 1667 public final void addsd(Register dst, Register src) { 1668 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 1669 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1670 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 1671 emitByte(0x58); 1672 emitByte(0xC0 | encode); 1673 } 1674 1675 public final void addsd(Register dst, AMD64Address src) { 1676 assert dst.getRegisterCategory().equals(AMD64.XMM); 1677 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1678 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 1679 emitByte(0x58); 1680 emitOperandHelper(dst, src, 0); 1681 } 1682 1683 private void addrNop4() { 1684 // 4 bytes: NOP DWORD PTR [EAX+0] 1685 emitByte(0x0F); 1686 emitByte(0x1F); 1687 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc); 1688 emitByte(0); // 8-bits offset (1 byte) 1689 } 1690 1691 private void addrNop5() { 1692 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 1693 emitByte(0x0F); 1694 emitByte(0x1F); 1695 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4); 1696 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1697 emitByte(0); // 8-bits offset (1 byte) 1698 } 1699 1700 private void addrNop7() { 1701 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 1702 emitByte(0x0F); 1703 emitByte(0x1F); 1704 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc); 1705 emitInt(0); // 32-bits offset (4 bytes) 1706 } 1707 1708 private void addrNop8() { 1709 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 1710 emitByte(0x0F); 1711 emitByte(0x1F); 1712 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4); 1713 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1714 emitInt(0); // 32-bits offset (4 bytes) 1715 } 1716 1717 public final void andl(Register dst, int imm32) { 1718 AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1719 } 1720 1721 public final void andl(Register dst, Register src) { 1722 AND.rmOp.emit(this, DWORD, dst, src); 1723 } 1724 1725 public final void andpd(Register dst, Register src) { 1726 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 1727 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1728 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 1729 emitByte(0x54); 1730 emitByte(0xC0 | encode); 1731 } 1732 1733 public final void andpd(Register dst, AMD64Address src) { 1734 assert dst.getRegisterCategory().equals(AMD64.XMM); 1735 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1736 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 1737 emitByte(0x54); 1738 emitOperandHelper(dst, src, 0); 1739 } 1740 1741 public final void bsrl(Register dst, Register src) { 1742 int encode = prefixAndEncode(dst.encoding(), src.encoding()); 1743 emitByte(0x0F); 1744 emitByte(0xBD); 1745 emitByte(0xC0 | encode); 1746 } 1747 1748 public final void bswapl(Register reg) { 1749 int encode = prefixAndEncode(reg.encoding); 1750 emitByte(0x0F); 1751 emitByte(0xC8 | encode); 1752 } 1753 1754 public final void cdql() { 1755 emitByte(0x99); 1756 } 1757 1758 public final void cmovl(ConditionFlag cc, Register dst, Register src) { 1759 int encode = prefixAndEncode(dst.encoding, src.encoding); 1760 emitByte(0x0F); 1761 emitByte(0x40 | cc.getValue()); 1762 emitByte(0xC0 | encode); 1763 } 1764 1765 public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) { 1766 prefix(src, dst); 1767 emitByte(0x0F); 1768 emitByte(0x40 | cc.getValue()); 1769 emitOperandHelper(dst, src, 0); 1770 } 1771 1772 public final void cmpl(Register dst, int imm32) { 1773 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1774 } 1775 1776 public final void cmpl(Register dst, Register src) { 1777 CMP.rmOp.emit(this, DWORD, dst, src); 1778 } 1779 1780 public final void cmpl(Register dst, AMD64Address src) { 1781 CMP.rmOp.emit(this, DWORD, dst, src); 1782 } 1783 1784 public final void cmpl(AMD64Address dst, int imm32) { 1785 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1786 } 1787 1788 // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax, 1789 // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,. 1790 // The ZF is set if the compared values were equal, and cleared otherwise. 1791 public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg 1792 prefix(adr, reg); 1793 emitByte(0x0F); 1794 emitByte(0xB1); 1795 emitOperandHelper(reg, adr, 0); 1796 } 1797 1798 public final void cvtsi2sdl(Register dst, Register src) { 1799 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU); 1800 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1801 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 1802 emitByte(0x2A); 1803 emitByte(0xC0 | encode); 1804 } 1805 1806 public final void cvttsd2sil(Register dst, Register src) { 1807 assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM); 1808 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1809 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 1810 emitByte(0x2C); 1811 emitByte(0xC0 | encode); 1812 } 1813 1814 protected final void decl(AMD64Address dst) { 1815 prefix(dst); 1816 emitByte(0xFF); 1817 emitOperandHelper(1, dst, 0); 1818 } 1819 1820 public final void divsd(Register dst, Register src) { 1821 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 1822 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1823 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 1824 emitByte(0x5E); 1825 emitByte(0xC0 | encode); 1826 } 1827 1828 public final void hlt() { 1829 emitByte(0xF4); 1830 } 1831 1832 public final void imull(Register dst, Register src, int value) { 1833 if (isByte(value)) { 1834 AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value); 1835 } else { 1836 AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value); 1837 } 1838 } 1839 1840 protected final void incl(AMD64Address dst) { 1841 prefix(dst); 1842 emitByte(0xFF); 1843 emitOperandHelper(0, dst, 0); 1844 } 1845 1846 public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { 1847 int shortSize = 2; 1848 int longSize = 6; 1849 long disp = jumpTarget - position(); 1850 if (!forceDisp32 && isByte(disp - shortSize)) { 1851 // 0111 tttn #8-bit disp 1852 emitByte(0x70 | cc.getValue()); 1853 emitByte((int) ((disp - shortSize) & 0xFF)); 1854 } else { 1855 // 0000 1111 1000 tttn #32-bit disp 1856 assert isInt(disp - longSize) : "must be 32bit offset (call4)"; 1857 emitByte(0x0F); 1858 emitByte(0x80 | cc.getValue()); 1859 emitInt((int) (disp - longSize)); 1860 } 1861 } 1862 1863 public final void jcc(ConditionFlag cc, Label l) { 1864 assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc"; 1865 if (l.isBound()) { 1866 jcc(cc, l.position(), false); 1867 } else { 1868 // Note: could eliminate cond. jumps to this jump if condition 1869 // is the same however, seems to be rather unlikely case. 1870 // Note: use jccb() if label to be bound is very close to get 1871 // an 8-bit displacement 1872 l.addPatchAt(position()); 1873 emitByte(0x0F); 1874 emitByte(0x80 | cc.getValue()); 1875 emitInt(0); 1876 } 1877 1878 } 1879 1880 public final void jccb(ConditionFlag cc, Label l) { 1881 if (l.isBound()) { 1882 int shortSize = 2; 1883 int entry = l.position(); 1884 assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp"; 1885 long disp = entry - position(); 1886 // 0111 tttn #8-bit disp 1887 emitByte(0x70 | cc.getValue()); 1888 emitByte((int) ((disp - shortSize) & 0xFF)); 1889 } else { 1890 l.addPatchAt(position()); 1891 emitByte(0x70 | cc.getValue()); 1892 emitByte(0); 1893 } 1894 } 1895 1896 public final void jmp(int jumpTarget, boolean forceDisp32) { 1897 int shortSize = 2; 1898 int longSize = 5; 1899 long disp = jumpTarget - position(); 1900 if (!forceDisp32 && isByte(disp - shortSize)) { 1901 emitByte(0xEB); 1902 emitByte((int) ((disp - shortSize) & 0xFF)); 1903 } else { 1904 emitByte(0xE9); 1905 emitInt((int) (disp - longSize)); 1906 } 1907 } 1908 1909 @Override 1910 public final void jmp(Label l) { 1911 if (l.isBound()) { 1912 jmp(l.position(), false); 1913 } else { 1914 // By default, forward jumps are always 32-bit displacements, since 1915 // we can't yet know where the label will be bound. If you're sure that 1916 // the forward jump will not run beyond 256 bytes, use jmpb to 1917 // force an 8-bit displacement. 1918 1919 l.addPatchAt(position()); 1920 emitByte(0xE9); 1921 emitInt(0); 1922 } 1923 } 1924 1925 public final void jmp(Register entry) { 1926 int encode = prefixAndEncode(entry.encoding); 1927 emitByte(0xFF); 1928 emitByte(0xE0 | encode); 1929 } 1930 1931 public final void jmp(AMD64Address adr) { 1932 prefix(adr); 1933 emitByte(0xFF); 1934 emitOperandHelper(rsp, adr, 0); 1935 } 1936 1937 public final void jmpb(Label l) { 1938 if (l.isBound()) { 1939 int shortSize = 2; 1940 int entry = l.position(); 1941 assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp"; 1942 long offs = entry - position(); 1943 emitByte(0xEB); 1944 emitByte((int) ((offs - shortSize) & 0xFF)); 1945 } else { 1946 1947 l.addPatchAt(position()); 1948 emitByte(0xEB); 1949 emitByte(0); 1950 } 1951 } 1952 1953 public final void leaq(Register dst, AMD64Address src) { 1954 prefixq(src, dst); 1955 emitByte(0x8D); 1956 emitOperandHelper(dst, src, 0); 1957 } 1958 1959 public final void leave() { 1960 emitByte(0xC9); 1961 } 1962 1963 public final void lock() { 1964 emitByte(0xF0); 1965 } 1966 1967 public final void movapd(Register dst, Register src) { 1968 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 1969 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1970 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 1971 emitByte(0x28); 1972 emitByte(0xC0 | encode); 1973 } 1974 1975 public final void movaps(Register dst, Register src) { 1976 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 1977 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1978 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes); 1979 emitByte(0x28); 1980 emitByte(0xC0 | encode); 1981 } 1982 1983 public final void movb(AMD64Address dst, int imm8) { 1984 prefix(dst); 1985 emitByte(0xC6); 1986 emitOperandHelper(0, dst, 1); 1987 emitByte(imm8); 1988 } 1989 1990 public final void movb(AMD64Address dst, Register src) { 1991 assert src.getRegisterCategory().equals(AMD64.CPU) : "must have byte register"; 1992 prefix(dst, src, true); 1993 emitByte(0x88); 1994 emitOperandHelper(src, dst, 0); 1995 } 1996 1997 public final void movl(Register dst, int imm32) { 1998 int encode = prefixAndEncode(dst.encoding); 1999 emitByte(0xB8 | encode); 2000 emitInt(imm32); 2001 } 2002 2003 public final void movl(Register dst, Register src) { 2004 int encode = prefixAndEncode(dst.encoding, src.encoding); 2005 emitByte(0x8B); 2006 emitByte(0xC0 | encode); 2007 } 2008 2009 public final void movl(Register dst, AMD64Address src) { 2010 prefix(src, dst); 2011 emitByte(0x8B); 2012 emitOperandHelper(dst, src, 0); 2013 } 2014 2015 public final void movl(AMD64Address dst, int imm32) { 2016 prefix(dst); 2017 emitByte(0xC7); 2018 emitOperandHelper(0, dst, 4); 2019 emitInt(imm32); 2020 } 2021 2022 public final void movl(AMD64Address dst, Register src) { 2023 prefix(dst, src); 2024 emitByte(0x89); 2025 emitOperandHelper(src, dst, 0); 2026 } 2027 2028 /** 2029 * New CPUs require use of movsd and movss to avoid partial register stall when loading from 2030 * memory. But for old Opteron use movlpd instead of movsd. The selection is done in 2031 * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and 2032 * {@link AMD64MacroAssembler#movflt(Register, Register)}. 2033 */ 2034 public final void movlpd(Register dst, AMD64Address src) { 2035 assert dst.getRegisterCategory().equals(AMD64.XMM); 2036 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2037 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2038 emitByte(0x12); 2039 emitOperandHelper(dst, src, 0); 2040 } 2041 2042 public final void movlhps(Register dst, Register src) { 2043 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2044 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2045 int encode = simdPrefixAndEncode(dst, src, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes); 2046 emitByte(0x16); 2047 emitByte(0xC0 | encode); 2048 } 2049 2050 public final void movq(Register dst, AMD64Address src) { 2051 movq(dst, src, false); 2052 } 2053 2054 public final void movq(Register dst, AMD64Address src, boolean wide) { 2055 if (dst.getRegisterCategory().equals(AMD64.XMM)) { 2056 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ wide, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2057 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 2058 emitByte(0x7E); 2059 emitOperandHelper(dst, src, wide, 0); 2060 } else { 2061 // gpr version of movq 2062 prefixq(src, dst); 2063 emitByte(0x8B); 2064 emitOperandHelper(dst, src, wide, 0); 2065 } 2066 } 2067 2068 public final void movq(Register dst, Register src) { 2069 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2070 emitByte(0x8B); 2071 emitByte(0xC0 | encode); 2072 } 2073 2074 public final void movq(AMD64Address dst, Register src) { 2075 if (src.getRegisterCategory().equals(AMD64.XMM)) { 2076 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2077 simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2078 emitByte(0xD6); 2079 emitOperandHelper(src, dst, 0); 2080 } else { 2081 // gpr version of movq 2082 prefixq(dst, src); 2083 emitByte(0x89); 2084 emitOperandHelper(src, dst, 0); 2085 } 2086 } 2087 2088 public final void movsbl(Register dst, AMD64Address src) { 2089 prefix(src, dst); 2090 emitByte(0x0F); 2091 emitByte(0xBE); 2092 emitOperandHelper(dst, src, 0); 2093 } 2094 2095 public final void movsbl(Register dst, Register src) { 2096 int encode = prefixAndEncode(dst.encoding, false, src.encoding, true); 2097 emitByte(0x0F); 2098 emitByte(0xBE); 2099 emitByte(0xC0 | encode); 2100 } 2101 2102 public final void movsbq(Register dst, AMD64Address src) { 2103 prefixq(src, dst); 2104 emitByte(0x0F); 2105 emitByte(0xBE); 2106 emitOperandHelper(dst, src, 0); 2107 } 2108 2109 public final void movsbq(Register dst, Register src) { 2110 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2111 emitByte(0x0F); 2112 emitByte(0xBE); 2113 emitByte(0xC0 | encode); 2114 } 2115 2116 public final void movsd(Register dst, Register src) { 2117 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2118 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2119 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2120 emitByte(0x10); 2121 emitByte(0xC0 | encode); 2122 } 2123 2124 public final void movsd(Register dst, AMD64Address src) { 2125 assert dst.getRegisterCategory().equals(AMD64.XMM); 2126 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2127 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2128 emitByte(0x10); 2129 emitOperandHelper(dst, src, 0); 2130 } 2131 2132 public final void movsd(AMD64Address dst, Register src) { 2133 assert src.getRegisterCategory().equals(AMD64.XMM); 2134 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2135 simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2136 emitByte(0x11); 2137 emitOperandHelper(src, dst, 0); 2138 } 2139 2140 public final void movss(Register dst, Register src) { 2141 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2142 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2143 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 2144 emitByte(0x10); 2145 emitByte(0xC0 | encode); 2146 } 2147 2148 public final void movss(Register dst, AMD64Address src) { 2149 assert dst.getRegisterCategory().equals(AMD64.XMM); 2150 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2151 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 2152 emitByte(0x10); 2153 emitOperandHelper(dst, src, 0); 2154 } 2155 2156 public final void movss(AMD64Address dst, Register src) { 2157 assert src.getRegisterCategory().equals(AMD64.XMM); 2158 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2159 simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 2160 emitByte(0x11); 2161 emitOperandHelper(src, dst, 0); 2162 } 2163 2164 public final void mulpd(Register dst, Register src) { 2165 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2166 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2167 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2168 emitByte(0x59); 2169 emitByte(0xC0 | encode); 2170 } 2171 2172 public final void mulpd(Register dst, AMD64Address src) { 2173 assert dst.getRegisterCategory().equals(AMD64.XMM); 2174 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2175 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2176 emitByte(0x59); 2177 emitOperandHelper(dst, src, 0); 2178 } 2179 2180 public final void mulsd(Register dst, Register src) { 2181 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2182 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2183 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2184 emitByte(0x59); 2185 emitByte(0xC0 | encode); 2186 } 2187 2188 public final void mulsd(Register dst, AMD64Address src) { 2189 assert dst.getRegisterCategory().equals(AMD64.XMM); 2190 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2191 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2192 emitByte(0x59); 2193 emitOperandHelper(dst, src, 0); 2194 } 2195 2196 public final void mulss(Register dst, Register src) { 2197 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2198 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2199 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 2200 emitByte(0x59); 2201 emitByte(0xC0 | encode); 2202 } 2203 2204 public final void movswl(Register dst, AMD64Address src) { 2205 prefix(src, dst); 2206 emitByte(0x0F); 2207 emitByte(0xBF); 2208 emitOperandHelper(dst, src, 0); 2209 } 2210 2211 public final void movw(AMD64Address dst, int imm16) { 2212 emitByte(0x66); // switch to 16-bit mode 2213 prefix(dst); 2214 emitByte(0xC7); 2215 emitOperandHelper(0, dst, 2); 2216 emitShort(imm16); 2217 } 2218 2219 public final void movw(AMD64Address dst, Register src) { 2220 emitByte(0x66); 2221 prefix(dst, src); 2222 emitByte(0x89); 2223 emitOperandHelper(src, dst, 0); 2224 } 2225 2226 public final void movzbl(Register dst, AMD64Address src) { 2227 prefix(src, dst); 2228 emitByte(0x0F); 2229 emitByte(0xB6); 2230 emitOperandHelper(dst, src, 0); 2231 } 2232 2233 public final void movzwl(Register dst, AMD64Address src) { 2234 prefix(src, dst); 2235 emitByte(0x0F); 2236 emitByte(0xB7); 2237 emitOperandHelper(dst, src, 0); 2238 } 2239 2240 public final void negl(Register dst) { 2241 NEG.emit(this, DWORD, dst); 2242 } 2243 2244 public final void notl(Register dst) { 2245 NOT.emit(this, DWORD, dst); 2246 } 2247 2248 @Override 2249 public final void ensureUniquePC() { 2250 nop(); 2251 } 2252 2253 public final void nop() { 2254 nop(1); 2255 } 2256 2257 public void nop(int count) { 2258 int i = count; 2259 if (UseNormalNop) { 2260 assert i > 0 : " "; 2261 // The fancy nops aren't currently recognized by debuggers making it a 2262 // pain to disassemble code while debugging. If assert are on clearly 2263 // speed is not an issue so simply use the single byte traditional nop 2264 // to do alignment. 2265 2266 for (; i > 0; i--) { 2267 emitByte(0x90); 2268 } 2269 return; 2270 } 2271 2272 if (UseAddressNop) { 2273 // 2274 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD. 2275 // 1: 0x90 2276 // 2: 0x66 0x90 2277 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2278 // 4: 0x0F 0x1F 0x40 0x00 2279 // 5: 0x0F 0x1F 0x44 0x00 0x00 2280 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2281 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2282 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2283 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2284 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2285 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2286 2287 // The rest coding is AMD specific - use consecutive Address nops 2288 2289 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2290 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2291 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2292 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2293 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2294 // Size prefixes (0x66) are added for larger sizes 2295 2296 while (i >= 22) { 2297 i -= 11; 2298 emitByte(0x66); // size prefix 2299 emitByte(0x66); // size prefix 2300 emitByte(0x66); // size prefix 2301 addrNop8(); 2302 } 2303 // Generate first nop for size between 21-12 2304 switch (i) { 2305 case 21: 2306 i -= 11; 2307 emitByte(0x66); // size prefix 2308 emitByte(0x66); // size prefix 2309 emitByte(0x66); // size prefix 2310 addrNop8(); 2311 break; 2312 case 20: 2313 case 19: 2314 i -= 10; 2315 emitByte(0x66); // size prefix 2316 emitByte(0x66); // size prefix 2317 addrNop8(); 2318 break; 2319 case 18: 2320 case 17: 2321 i -= 9; 2322 emitByte(0x66); // size prefix 2323 addrNop8(); 2324 break; 2325 case 16: 2326 case 15: 2327 i -= 8; 2328 addrNop8(); 2329 break; 2330 case 14: 2331 case 13: 2332 i -= 7; 2333 addrNop7(); 2334 break; 2335 case 12: 2336 i -= 6; 2337 emitByte(0x66); // size prefix 2338 addrNop5(); 2339 break; 2340 default: 2341 assert i < 12; 2342 } 2343 2344 // Generate second nop for size between 11-1 2345 switch (i) { 2346 case 11: 2347 emitByte(0x66); // size prefix 2348 emitByte(0x66); // size prefix 2349 emitByte(0x66); // size prefix 2350 addrNop8(); 2351 break; 2352 case 10: 2353 emitByte(0x66); // size prefix 2354 emitByte(0x66); // size prefix 2355 addrNop8(); 2356 break; 2357 case 9: 2358 emitByte(0x66); // size prefix 2359 addrNop8(); 2360 break; 2361 case 8: 2362 addrNop8(); 2363 break; 2364 case 7: 2365 addrNop7(); 2366 break; 2367 case 6: 2368 emitByte(0x66); // size prefix 2369 addrNop5(); 2370 break; 2371 case 5: 2372 addrNop5(); 2373 break; 2374 case 4: 2375 addrNop4(); 2376 break; 2377 case 3: 2378 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2379 emitByte(0x66); // size prefix 2380 emitByte(0x66); // size prefix 2381 emitByte(0x90); // nop 2382 break; 2383 case 2: 2384 emitByte(0x66); // size prefix 2385 emitByte(0x90); // nop 2386 break; 2387 case 1: 2388 emitByte(0x90); // nop 2389 break; 2390 default: 2391 assert i == 0; 2392 } 2393 return; 2394 } 2395 2396 // Using nops with size prefixes "0x66 0x90". 2397 // From AMD Optimization Guide: 2398 // 1: 0x90 2399 // 2: 0x66 0x90 2400 // 3: 0x66 0x66 0x90 2401 // 4: 0x66 0x66 0x66 0x90 2402 // 5: 0x66 0x66 0x90 0x66 0x90 2403 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2404 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2405 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2406 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2407 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2408 // 2409 while (i > 12) { 2410 i -= 4; 2411 emitByte(0x66); // size prefix 2412 emitByte(0x66); 2413 emitByte(0x66); 2414 emitByte(0x90); // nop 2415 } 2416 // 1 - 12 nops 2417 if (i > 8) { 2418 if (i > 9) { 2419 i -= 1; 2420 emitByte(0x66); 2421 } 2422 i -= 3; 2423 emitByte(0x66); 2424 emitByte(0x66); 2425 emitByte(0x90); 2426 } 2427 // 1 - 8 nops 2428 if (i > 4) { 2429 if (i > 6) { 2430 i -= 1; 2431 emitByte(0x66); 2432 } 2433 i -= 3; 2434 emitByte(0x66); 2435 emitByte(0x66); 2436 emitByte(0x90); 2437 } 2438 switch (i) { 2439 case 4: 2440 emitByte(0x66); 2441 emitByte(0x66); 2442 emitByte(0x66); 2443 emitByte(0x90); 2444 break; 2445 case 3: 2446 emitByte(0x66); 2447 emitByte(0x66); 2448 emitByte(0x90); 2449 break; 2450 case 2: 2451 emitByte(0x66); 2452 emitByte(0x90); 2453 break; 2454 case 1: 2455 emitByte(0x90); 2456 break; 2457 default: 2458 assert i == 0; 2459 } 2460 } 2461 2462 public final void orl(Register dst, Register src) { 2463 OR.rmOp.emit(this, DWORD, dst, src); 2464 } 2465 2466 public final void orl(Register dst, int imm32) { 2467 OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2468 } 2469 2470 public final void pop(Register dst) { 2471 int encode = prefixAndEncode(dst.encoding); 2472 emitByte(0x58 | encode); 2473 } 2474 2475 public void popfq() { 2476 emitByte(0x9D); 2477 } 2478 2479 public final void ptest(Register dst, Register src) { 2480 assert supports(CPUFeature.SSE4_1); 2481 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2482 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2483 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes); 2484 emitByte(0x17); 2485 emitByte(0xC0 | encode); 2486 } 2487 2488 public final void vptest(Register dst, Register src) { 2489 assert supports(CPUFeature.AVX); 2490 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2491 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2492 int encode = vexPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes); 2493 emitByte(0x17); 2494 emitByte(0xC0 | encode); 2495 } 2496 2497 public final void push(Register src) { 2498 int encode = prefixAndEncode(src.encoding); 2499 emitByte(0x50 | encode); 2500 } 2501 2502 public void pushfq() { 2503 emitByte(0x9c); 2504 } 2505 2506 public final void paddd(Register dst, Register src) { 2507 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2508 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2509 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2510 emitByte(0xFE); 2511 emitByte(0xC0 | encode); 2512 } 2513 2514 public final void paddq(Register dst, Register src) { 2515 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2516 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2517 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2518 emitByte(0xD4); 2519 emitByte(0xC0 | encode); 2520 } 2521 2522 public final void pextrw(Register dst, Register src, int imm8) { 2523 assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM); 2524 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2525 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2526 emitByte(0xC5); 2527 emitByte(0xC0 | encode); 2528 emitByte(imm8); 2529 } 2530 2531 public final void pinsrw(Register dst, Register src, int imm8) { 2532 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU); 2533 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2534 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2535 emitByte(0xC4); 2536 emitByte(0xC0 | encode); 2537 emitByte(imm8); 2538 } 2539 2540 public final void por(Register dst, Register src) { 2541 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2542 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2543 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2544 emitByte(0xEB); 2545 emitByte(0xC0 | encode); 2546 } 2547 2548 public final void pand(Register dst, Register src) { 2549 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2550 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2551 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2552 emitByte(0xDB); 2553 emitByte(0xC0 | encode); 2554 } 2555 2556 public final void pxor(Register dst, Register src) { 2557 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2558 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2559 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2560 emitByte(0xEF); 2561 emitByte(0xC0 | encode); 2562 } 2563 2564 public final void vpxor(Register dst, Register nds, Register src) { 2565 assert supports(CPUFeature.AVX); 2566 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2567 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2568 int encode = vexPrefixAndEncode(dst, nds, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2569 emitByte(0xEF); 2570 emitByte(0xC0 | encode); 2571 } 2572 2573 public final void pslld(Register dst, int imm8) { 2574 assert isUByte(imm8) : "invalid value"; 2575 assert dst.getRegisterCategory().equals(AMD64.XMM); 2576 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2577 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 2578 int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2579 emitByte(0x72); 2580 emitByte(0xC0 | encode); 2581 emitByte(imm8 & 0xFF); 2582 } 2583 2584 public final void psllq(Register dst, Register shift) { 2585 assert dst.getRegisterCategory().equals(AMD64.XMM) && shift.getRegisterCategory().equals(AMD64.XMM); 2586 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2587 int encode = simdPrefixAndEncode(dst, dst, shift, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2588 emitByte(0xF3); 2589 emitByte(0xC0 | encode); 2590 } 2591 2592 public final void psllq(Register dst, int imm8) { 2593 assert isUByte(imm8) : "invalid value"; 2594 assert dst.getRegisterCategory().equals(AMD64.XMM); 2595 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2596 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 2597 int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2598 emitByte(0x73); 2599 emitByte(0xC0 | encode); 2600 emitByte(imm8); 2601 } 2602 2603 public final void psrad(Register dst, int imm8) { 2604 assert isUByte(imm8) : "invalid value"; 2605 assert dst.getRegisterCategory().equals(AMD64.XMM); 2606 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2607 // XMM4 is for /2 encoding: 66 0F 72 /4 ib 2608 int encode = simdPrefixAndEncode(AMD64.xmm4, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2609 emitByte(0x72); 2610 emitByte(0xC0 | encode); 2611 emitByte(imm8); 2612 } 2613 2614 public final void psrld(Register dst, int imm8) { 2615 assert isUByte(imm8) : "invalid value"; 2616 assert dst.getRegisterCategory().equals(AMD64.XMM); 2617 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2618 // XMM2 is for /2 encoding: 66 0F 72 /2 ib 2619 int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2620 emitByte(0x72); 2621 emitByte(0xC0 | encode); 2622 emitByte(imm8); 2623 } 2624 2625 public final void psrlq(Register dst, int imm8) { 2626 assert isUByte(imm8) : "invalid value"; 2627 assert dst.getRegisterCategory().equals(AMD64.XMM); 2628 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2629 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 2630 int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2631 emitByte(0x73); 2632 emitByte(0xC0 | encode); 2633 emitByte(imm8); 2634 } 2635 2636 public final void pshufd(Register dst, Register src, int imm8) { 2637 assert isUByte(imm8) : "invalid value"; 2638 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2639 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2640 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2641 emitByte(0x70); 2642 emitByte(0xC0 | encode); 2643 emitByte(imm8); 2644 } 2645 2646 public final void psubd(Register dst, Register src) { 2647 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2648 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2649 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2650 emitByte(0xFA); 2651 emitByte(0xC0 | encode); 2652 } 2653 2654 public final void rcpps(Register dst, Register src) { 2655 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2656 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ true, /* noMaskReg */ false, /* usesVl */ false, target); 2657 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes); 2658 emitByte(0x53); 2659 emitByte(0xC0 | encode); 2660 } 2661 2662 public final void ret(int imm16) { 2663 if (imm16 == 0) { 2664 emitByte(0xC3); 2665 } else { 2666 emitByte(0xC2); 2667 emitShort(imm16); 2668 } 2669 } 2670 2671 public final void sarl(Register dst, int imm8) { 2672 int encode = prefixAndEncode(dst.encoding); 2673 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2674 if (imm8 == 1) { 2675 emitByte(0xD1); 2676 emitByte(0xF8 | encode); 2677 } else { 2678 emitByte(0xC1); 2679 emitByte(0xF8 | encode); 2680 emitByte(imm8); 2681 } 2682 } 2683 2684 public final void shll(Register dst, int imm8) { 2685 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2686 int encode = prefixAndEncode(dst.encoding); 2687 if (imm8 == 1) { 2688 emitByte(0xD1); 2689 emitByte(0xE0 | encode); 2690 } else { 2691 emitByte(0xC1); 2692 emitByte(0xE0 | encode); 2693 emitByte(imm8); 2694 } 2695 } 2696 2697 public final void shll(Register dst) { 2698 int encode = prefixAndEncode(dst.encoding); 2699 emitByte(0xD3); 2700 emitByte(0xE0 | encode); 2701 } 2702 2703 public final void shrl(Register dst, int imm8) { 2704 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2705 int encode = prefixAndEncode(dst.encoding); 2706 emitByte(0xC1); 2707 emitByte(0xE8 | encode); 2708 emitByte(imm8); 2709 } 2710 2711 public final void shrl(Register dst) { 2712 int encode = prefixAndEncode(dst.encoding); 2713 emitByte(0xD3); 2714 emitByte(0xE8 | encode); 2715 } 2716 2717 public final void subl(AMD64Address dst, int imm32) { 2718 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2719 } 2720 2721 public final void subl(Register dst, int imm32) { 2722 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2723 } 2724 2725 public final void subl(Register dst, Register src) { 2726 SUB.rmOp.emit(this, DWORD, dst, src); 2727 } 2728 2729 public final void subpd(Register dst, Register src) { 2730 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2731 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2732 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2733 emitByte(0x5C); 2734 emitByte(0xC0 | encode); 2735 } 2736 2737 public final void subsd(Register dst, Register src) { 2738 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2739 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2740 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2741 emitByte(0x5C); 2742 emitByte(0xC0 | encode); 2743 } 2744 2745 public final void subsd(Register dst, AMD64Address src) { 2746 assert dst.getRegisterCategory().equals(AMD64.XMM); 2747 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2748 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2749 emitByte(0x5C); 2750 emitOperandHelper(dst, src, 0); 2751 } 2752 2753 public final void testl(Register dst, int imm32) { 2754 // not using emitArith because test 2755 // doesn't support sign-extension of 2756 // 8bit operands 2757 int encode = dst.encoding; 2758 if (encode == 0) { 2759 emitByte(0xA9); 2760 } else { 2761 encode = prefixAndEncode(encode); 2762 emitByte(0xF7); 2763 emitByte(0xC0 | encode); 2764 } 2765 emitInt(imm32); 2766 } 2767 2768 public final void testl(Register dst, Register src) { 2769 int encode = prefixAndEncode(dst.encoding, src.encoding); 2770 emitByte(0x85); 2771 emitByte(0xC0 | encode); 2772 } 2773 2774 public final void testl(Register dst, AMD64Address src) { 2775 prefix(src, dst); 2776 emitByte(0x85); 2777 emitOperandHelper(dst, src, 0); 2778 } 2779 2780 public final void unpckhpd(Register dst, Register src) { 2781 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2782 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2783 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2784 emitByte(0x15); 2785 emitByte(0xC0 | encode); 2786 } 2787 2788 public final void unpcklpd(Register dst, Register src) { 2789 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2790 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2791 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2792 emitByte(0x14); 2793 emitByte(0xC0 | encode); 2794 } 2795 2796 public final void xorl(Register dst, Register src) { 2797 XOR.rmOp.emit(this, DWORD, dst, src); 2798 } 2799 2800 public final void xorpd(Register dst, Register src) { 2801 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2802 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2803 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2804 emitByte(0x57); 2805 emitByte(0xC0 | encode); 2806 } 2807 2808 public final void xorps(Register dst, Register src) { 2809 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2810 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2811 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes); 2812 emitByte(0x57); 2813 emitByte(0xC0 | encode); 2814 } 2815 2816 protected final void decl(Register dst) { 2817 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 2818 int encode = prefixAndEncode(dst.encoding); 2819 emitByte(0xFF); 2820 emitByte(0xC8 | encode); 2821 } 2822 2823 protected final void incl(Register dst) { 2824 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 2825 int encode = prefixAndEncode(dst.encoding); 2826 emitByte(0xFF); 2827 emitByte(0xC0 | encode); 2828 } 2829 2830 private int prefixAndEncode(int regEnc) { 2831 return prefixAndEncode(regEnc, false); 2832 } 2833 2834 private int prefixAndEncode(int regEnc, boolean byteinst) { 2835 if (regEnc >= 8) { 2836 emitByte(Prefix.REXB); 2837 return regEnc - 8; 2838 } else if (byteinst && regEnc >= 4) { 2839 emitByte(Prefix.REX); 2840 } 2841 return regEnc; 2842 } 2843 2844 private int prefixqAndEncode(int regEnc) { 2845 if (regEnc < 8) { 2846 emitByte(Prefix.REXW); 2847 return regEnc; 2848 } else { 2849 emitByte(Prefix.REXWB); 2850 return regEnc - 8; 2851 } 2852 } 2853 2854 private int prefixAndEncode(int dstEnc, int srcEnc) { 2855 return prefixAndEncode(dstEnc, false, srcEnc, false); 2856 } 2857 2858 private int prefixAndEncode(int dstEncoding, boolean dstIsByte, int srcEncoding, boolean srcIsByte) { 2859 int srcEnc = srcEncoding; 2860 int dstEnc = dstEncoding; 2861 if (dstEnc < 8) { 2862 if (srcEnc >= 8) { 2863 emitByte(Prefix.REXB); 2864 srcEnc -= 8; 2865 } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) { 2866 emitByte(Prefix.REX); 2867 } 2868 } else { 2869 if (srcEnc < 8) { 2870 emitByte(Prefix.REXR); 2871 } else { 2872 emitByte(Prefix.REXRB); 2873 srcEnc -= 8; 2874 } 2875 dstEnc -= 8; 2876 } 2877 return dstEnc << 3 | srcEnc; 2878 } 2879 2880 /** 2881 * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand 2882 * prefix. If the given operands exceed 3 bits, the 4th bit is encoded in the prefix. 2883 * 2884 * @param regEncoding the encoding of the register part of the ModRM-Byte 2885 * @param rmEncoding the encoding of the r/m part of the ModRM-Byte 2886 * @return the lower 6 bits of the ModRM-Byte that should be emitted 2887 */ 2888 private int prefixqAndEncode(int regEncoding, int rmEncoding) { 2889 int rmEnc = rmEncoding; 2890 int regEnc = regEncoding; 2891 if (regEnc < 8) { 2892 if (rmEnc < 8) { 2893 emitByte(Prefix.REXW); 2894 } else { 2895 emitByte(Prefix.REXWB); 2896 rmEnc -= 8; 2897 } 2898 } else { 2899 if (rmEnc < 8) { 2900 emitByte(Prefix.REXWR); 2901 } else { 2902 emitByte(Prefix.REXWRB); 2903 rmEnc -= 8; 2904 } 2905 regEnc -= 8; 2906 } 2907 return regEnc << 3 | rmEnc; 2908 } 2909 2910 private void vexPrefix(int rxb, int ndsEncoding, int pre, int opc, AMD64InstructionAttr attributes) { 2911 int vectorLen = attributes.getVectorLen(); 2912 boolean vexW = attributes.isRexVexW(); 2913 boolean isXorB = ((rxb & 0x3) > 0); 2914 if (isXorB || vexW || (opc == VexOpcode.VEX_OPCODE_0F_38) || (opc == VexOpcode.VEX_OPCODE_0F_3A)) { 2915 emitByte(Prefix.VEX_3BYTES); 2916 2917 int byte1 = (rxb << 5); 2918 byte1 = ((~byte1) & 0xE0) | opc; 2919 emitByte(byte1); 2920 2921 int byte2 = ((~ndsEncoding) & 0xf) << 3; 2922 byte2 |= (vexW ? VexPrefix.VEX_W : 0) | ((vectorLen > 0) ? 4 : 0) | pre; 2923 emitByte(byte2); 2924 } else { 2925 emitByte(Prefix.VEX_2BYTES); 2926 2927 int byte1 = ((rxb & 0x4) > 0) ? VexPrefix.VEX_R : 0; 2928 byte1 = (~byte1) & 0x80; 2929 byte1 |= ((~ndsEncoding) & 0xf) << 3; 2930 byte1 |= ((vectorLen > 0) ? 4 : 0) | pre; 2931 emitByte(byte1); 2932 } 2933 } 2934 2935 private void vexPrefix(AMD64Address adr, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) { 2936 int rxb = getRXB(src, adr); 2937 int ndsEncoding = nds.isValid() ? nds.encoding : 0; 2938 vexPrefix(rxb, ndsEncoding, pre, opc, attributes); 2939 setCurAttributes(attributes); 2940 } 2941 2942 private int vexPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) { 2943 int rxb = getRXB(dst, src); 2944 int ndsEncoding = nds.isValid() ? nds.encoding : 0; 2945 vexPrefix(rxb, ndsEncoding, pre, opc, attributes); 2946 // return modrm byte components for operands 2947 return (((dst.encoding & 7) << 3) | (src.encoding & 7)); 2948 } 2949 2950 private void simdPrefix(Register xreg, Register nds, AMD64Address adr, int pre, int opc, AMD64InstructionAttr attributes) { 2951 if (supports(CPUFeature.AVX)) { 2952 vexPrefix(adr, nds, xreg, pre, opc, attributes); 2953 } else { 2954 switch (pre) { 2955 case VexSimdPrefix.VEX_SIMD_66: 2956 emitByte(0x66); 2957 break; 2958 case VexSimdPrefix.VEX_SIMD_F2: 2959 emitByte(0xF2); 2960 break; 2961 case VexSimdPrefix.VEX_SIMD_F3: 2962 emitByte(0xF3); 2963 break; 2964 } 2965 if (attributes.isRexVexW()) { 2966 prefixq(adr, xreg); 2967 } else { 2968 prefix(adr, xreg); 2969 } 2970 switch (opc) { 2971 case VexOpcode.VEX_OPCODE_0F: 2972 emitByte(0x0F); 2973 break; 2974 case VexOpcode.VEX_OPCODE_0F_38: 2975 emitByte(0x0F); 2976 emitByte(0x38); 2977 break; 2978 case VexOpcode.VEX_OPCODE_0F_3A: 2979 emitByte(0x0F); 2980 emitByte(0x3A); 2981 break; 2982 } 2983 } 2984 } 2985 2986 private int simdPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) { 2987 if (supports(CPUFeature.AVX)) { 2988 return vexPrefixAndEncode(dst, nds, src, pre, opc, attributes); 2989 } else { 2990 switch (pre) { 2991 case VexSimdPrefix.VEX_SIMD_66: 2992 emitByte(0x66); 2993 break; 2994 case VexSimdPrefix.VEX_SIMD_F2: 2995 emitByte(0xF2); 2996 break; 2997 case VexSimdPrefix.VEX_SIMD_F3: 2998 emitByte(0xF3); 2999 break; 3000 } 3001 int encode; 3002 int dstEncoding = dst.encoding; 3003 int srcEncoding = src.encoding; 3004 if (attributes.isRexVexW()) { 3005 encode = prefixqAndEncode(dstEncoding, srcEncoding); 3006 } else { 3007 encode = prefixAndEncode(dstEncoding, srcEncoding); 3008 } 3009 switch (opc) { 3010 case VexOpcode.VEX_OPCODE_0F: 3011 emitByte(0x0F); 3012 break; 3013 case VexOpcode.VEX_OPCODE_0F_38: 3014 emitByte(0x0F); 3015 emitByte(0x38); 3016 break; 3017 case VexOpcode.VEX_OPCODE_0F_3A: 3018 emitByte(0x0F); 3019 emitByte(0x3A); 3020 break; 3021 } 3022 return encode; 3023 } 3024 } 3025 3026 private static boolean needsRex(Register reg) { 3027 return reg.encoding >= MinEncodingNeedsRex; 3028 } 3029 3030 private void prefix(AMD64Address adr) { 3031 if (needsRex(adr.getBase())) { 3032 if (needsRex(adr.getIndex())) { 3033 emitByte(Prefix.REXXB); 3034 } else { 3035 emitByte(Prefix.REXB); 3036 } 3037 } else { 3038 if (needsRex(adr.getIndex())) { 3039 emitByte(Prefix.REXX); 3040 } 3041 } 3042 } 3043 3044 private void prefixq(AMD64Address adr) { 3045 if (needsRex(adr.getBase())) { 3046 if (needsRex(adr.getIndex())) { 3047 emitByte(Prefix.REXWXB); 3048 } else { 3049 emitByte(Prefix.REXWB); 3050 } 3051 } else { 3052 if (needsRex(adr.getIndex())) { 3053 emitByte(Prefix.REXWX); 3054 } else { 3055 emitByte(Prefix.REXW); 3056 } 3057 } 3058 } 3059 3060 private void prefix(AMD64Address adr, Register reg) { 3061 prefix(adr, reg, false); 3062 } 3063 3064 private void prefix(AMD64Address adr, Register reg, boolean byteinst) { 3065 if (reg.encoding < 8) { 3066 if (needsRex(adr.getBase())) { 3067 if (needsRex(adr.getIndex())) { 3068 emitByte(Prefix.REXXB); 3069 } else { 3070 emitByte(Prefix.REXB); 3071 } 3072 } else { 3073 if (needsRex(adr.getIndex())) { 3074 emitByte(Prefix.REXX); 3075 } else if (byteinst && reg.encoding >= 4) { 3076 emitByte(Prefix.REX); 3077 } 3078 } 3079 } else { 3080 if (needsRex(adr.getBase())) { 3081 if (needsRex(adr.getIndex())) { 3082 emitByte(Prefix.REXRXB); 3083 } else { 3084 emitByte(Prefix.REXRB); 3085 } 3086 } else { 3087 if (needsRex(adr.getIndex())) { 3088 emitByte(Prefix.REXRX); 3089 } else { 3090 emitByte(Prefix.REXR); 3091 } 3092 } 3093 } 3094 } 3095 3096 private void prefixq(AMD64Address adr, Register src) { 3097 if (src.encoding < 8) { 3098 if (needsRex(adr.getBase())) { 3099 if (needsRex(adr.getIndex())) { 3100 emitByte(Prefix.REXWXB); 3101 } else { 3102 emitByte(Prefix.REXWB); 3103 } 3104 } else { 3105 if (needsRex(adr.getIndex())) { 3106 emitByte(Prefix.REXWX); 3107 } else { 3108 emitByte(Prefix.REXW); 3109 } 3110 } 3111 } else { 3112 if (needsRex(adr.getBase())) { 3113 if (needsRex(adr.getIndex())) { 3114 emitByte(Prefix.REXWRXB); 3115 } else { 3116 emitByte(Prefix.REXWRB); 3117 } 3118 } else { 3119 if (needsRex(adr.getIndex())) { 3120 emitByte(Prefix.REXWRX); 3121 } else { 3122 emitByte(Prefix.REXWR); 3123 } 3124 } 3125 } 3126 } 3127 3128 public final void addq(Register dst, int imm32) { 3129 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3130 } 3131 3132 public final void addq(AMD64Address dst, int imm32) { 3133 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3134 } 3135 3136 public final void addq(Register dst, Register src) { 3137 ADD.rmOp.emit(this, QWORD, dst, src); 3138 } 3139 3140 public final void addq(AMD64Address dst, Register src) { 3141 ADD.mrOp.emit(this, QWORD, dst, src); 3142 } 3143 3144 public final void andq(Register dst, int imm32) { 3145 AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3146 } 3147 3148 public final void bsrq(Register dst, Register src) { 3149 int encode = prefixqAndEncode(dst.encoding(), src.encoding()); 3150 emitByte(0x0F); 3151 emitByte(0xBD); 3152 emitByte(0xC0 | encode); 3153 } 3154 3155 public final void bswapq(Register reg) { 3156 int encode = prefixqAndEncode(reg.encoding); 3157 emitByte(0x0F); 3158 emitByte(0xC8 | encode); 3159 } 3160 3161 public final void cdqq() { 3162 emitByte(Prefix.REXW); 3163 emitByte(0x99); 3164 } 3165 3166 public final void cmovq(ConditionFlag cc, Register dst, Register src) { 3167 int encode = prefixqAndEncode(dst.encoding, src.encoding); 3168 emitByte(0x0F); 3169 emitByte(0x40 | cc.getValue()); 3170 emitByte(0xC0 | encode); 3171 } 3172 3173 public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) { 3174 prefixq(src, dst); 3175 emitByte(0x0F); 3176 emitByte(0x40 | cc.getValue()); 3177 emitOperandHelper(dst, src, 0); 3178 } 3179 3180 public final void cmpq(Register dst, int imm32) { 3181 CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3182 } 3183 3184 public final void cmpq(Register dst, Register src) { 3185 CMP.rmOp.emit(this, QWORD, dst, src); 3186 } 3187 3188 public final void cmpq(Register dst, AMD64Address src) { 3189 CMP.rmOp.emit(this, QWORD, dst, src); 3190 } 3191 3192 public final void cmpxchgq(Register reg, AMD64Address adr) { 3193 prefixq(adr, reg); 3194 emitByte(0x0F); 3195 emitByte(0xB1); 3196 emitOperandHelper(reg, adr, 0); 3197 } 3198 3199 public final void cvtdq2pd(Register dst, Register src) { 3200 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 3201 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3202 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 3203 emitByte(0xE6); 3204 emitByte(0xC0 | encode); 3205 } 3206 3207 public final void cvtsi2sdq(Register dst, Register src) { 3208 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU); 3209 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3210 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 3211 emitByte(0x2A); 3212 emitByte(0xC0 | encode); 3213 } 3214 3215 public final void cvttsd2siq(Register dst, Register src) { 3216 assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM); 3217 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3218 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 3219 emitByte(0x2C); 3220 emitByte(0xC0 | encode); 3221 } 3222 3223 public final void cvttpd2dq(Register dst, Register src) { 3224 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 3225 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3226 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3227 emitByte(0xE6); 3228 emitByte(0xC0 | encode); 3229 } 3230 3231 protected final void decq(Register dst) { 3232 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3233 int encode = prefixqAndEncode(dst.encoding); 3234 emitByte(0xFF); 3235 emitByte(0xC8 | encode); 3236 } 3237 3238 public final void decq(AMD64Address dst) { 3239 DEC.emit(this, QWORD, dst); 3240 } 3241 3242 public final void imulq(Register dst, Register src) { 3243 int encode = prefixqAndEncode(dst.encoding, src.encoding); 3244 emitByte(0x0F); 3245 emitByte(0xAF); 3246 emitByte(0xC0 | encode); 3247 } 3248 3249 public final void incq(Register dst) { 3250 // Don't use it directly. Use Macroincrementq() instead. 3251 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3252 int encode = prefixqAndEncode(dst.encoding); 3253 emitByte(0xFF); 3254 emitByte(0xC0 | encode); 3255 } 3256 3257 public final void incq(AMD64Address dst) { 3258 INC.emit(this, QWORD, dst); 3259 } 3260 3261 public final void movq(Register dst, long imm64) { 3262 int encode = prefixqAndEncode(dst.encoding); 3263 emitByte(0xB8 | encode); 3264 emitLong(imm64); 3265 } 3266 3267 public final void movslq(Register dst, int imm32) { 3268 int encode = prefixqAndEncode(dst.encoding); 3269 emitByte(0xC7); 3270 emitByte(0xC0 | encode); 3271 emitInt(imm32); 3272 } 3273 3274 public final void movdq(Register dst, AMD64Address src) { 3275 assert dst.getRegisterCategory().equals(AMD64.XMM); 3276 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3277 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3278 emitByte(0x6E); 3279 emitOperandHelper(dst, src, 0); 3280 } 3281 3282 public final void movdq(AMD64Address dst, Register src) { 3283 assert src.getRegisterCategory().equals(AMD64.XMM); 3284 // swap src/dst to get correct prefix 3285 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3286 simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3287 emitByte(0x7E); 3288 emitOperandHelper(src, dst, 0); 3289 } 3290 3291 public final void movdq(Register dst, Register src) { 3292 if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) { 3293 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3294 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3295 emitByte(0x6E); 3296 emitByte(0xC0 | encode); 3297 } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) { 3298 // swap src/dst to get correct prefix 3299 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3300 int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3301 emitByte(0x7E); 3302 emitByte(0xC0 | encode); 3303 } else { 3304 throw new InternalError("should not reach here"); 3305 } 3306 } 3307 3308 public final void movdl(Register dst, Register src) { 3309 if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) { 3310 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3311 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3312 emitByte(0x6E); 3313 emitByte(0xC0 | encode); 3314 } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) { 3315 // swap src/dst to get correct prefix 3316 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3317 int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3318 emitByte(0x7E); 3319 emitByte(0xC0 | encode); 3320 } else { 3321 throw new InternalError("should not reach here"); 3322 } 3323 } 3324 3325 public final void movddup(Register dst, Register src) { 3326 assert supports(CPUFeature.SSE3); 3327 assert dst.getRegisterCategory().equals(AMD64.XMM); 3328 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3329 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 3330 emitByte(0x12); 3331 emitByte(0xC0 | encode); 3332 } 3333 3334 public final void movdqu(Register dst, AMD64Address src) { 3335 assert dst.getRegisterCategory().equals(AMD64.XMM); 3336 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3337 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 3338 emitByte(0x6F); 3339 emitOperandHelper(dst, src, 0); 3340 } 3341 3342 public final void movdqu(Register dst, Register src) { 3343 assert dst.getRegisterCategory().equals(AMD64.XMM); 3344 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3345 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 3346 emitByte(0x6F); 3347 emitByte(0xC0 | encode); 3348 } 3349 3350 public final void vmovdqu(Register dst, AMD64Address src) { 3351 assert supports(CPUFeature.AVX); 3352 assert dst.getRegisterCategory().equals(AMD64.XMM); 3353 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3354 vexPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 3355 emitByte(0x6F); 3356 emitOperandHelper(dst, src, 0); 3357 } 3358 3359 public final void vzeroupper() { 3360 assert supports(CPUFeature.AVX); 3361 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3362 vexPrefixAndEncode(AMD64.xmm0, AMD64.xmm0, AMD64.xmm0, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes); 3363 emitByte(0x77); 3364 } 3365 3366 public final void movslq(AMD64Address dst, int imm32) { 3367 prefixq(dst); 3368 emitByte(0xC7); 3369 emitOperandHelper(0, dst, 4); 3370 emitInt(imm32); 3371 } 3372 3373 public final void movslq(Register dst, AMD64Address src) { 3374 prefixq(src, dst); 3375 emitByte(0x63); 3376 emitOperandHelper(dst, src, 0); 3377 } 3378 3379 public final void movslq(Register dst, Register src) { 3380 int encode = prefixqAndEncode(dst.encoding, src.encoding); 3381 emitByte(0x63); 3382 emitByte(0xC0 | encode); 3383 } 3384 3385 public final void negq(Register dst) { 3386 int encode = prefixqAndEncode(dst.encoding); 3387 emitByte(0xF7); 3388 emitByte(0xD8 | encode); 3389 } 3390 3391 public final void orq(Register dst, Register src) { 3392 OR.rmOp.emit(this, QWORD, dst, src); 3393 } 3394 3395 public final void shlq(Register dst, int imm8) { 3396 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3397 int encode = prefixqAndEncode(dst.encoding); 3398 if (imm8 == 1) { 3399 emitByte(0xD1); 3400 emitByte(0xE0 | encode); 3401 } else { 3402 emitByte(0xC1); 3403 emitByte(0xE0 | encode); 3404 emitByte(imm8); 3405 } 3406 } 3407 3408 public final void shlq(Register dst) { 3409 int encode = prefixqAndEncode(dst.encoding); 3410 emitByte(0xD3); 3411 emitByte(0xE0 | encode); 3412 } 3413 3414 public final void shrq(Register dst, int imm8) { 3415 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3416 int encode = prefixqAndEncode(dst.encoding); 3417 if (imm8 == 1) { 3418 emitByte(0xD1); 3419 emitByte(0xE8 | encode); 3420 } else { 3421 emitByte(0xC1); 3422 emitByte(0xE8 | encode); 3423 emitByte(imm8); 3424 } 3425 } 3426 3427 public final void shrq(Register dst) { 3428 int encode = prefixqAndEncode(dst.encoding); 3429 emitByte(0xD3); 3430 emitByte(0xE8 | encode); 3431 } 3432 3433 public final void sbbq(Register dst, Register src) { 3434 SBB.rmOp.emit(this, QWORD, dst, src); 3435 } 3436 3437 public final void subq(Register dst, int imm32) { 3438 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3439 } 3440 3441 public final void subq(AMD64Address dst, int imm32) { 3442 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3443 } 3444 3445 public final void subqWide(Register dst, int imm32) { 3446 // don't use the sign-extending version, forcing a 32-bit immediate 3447 SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32); 3448 } 3449 3450 public final void subq(Register dst, Register src) { 3451 SUB.rmOp.emit(this, QWORD, dst, src); 3452 } 3453 3454 public final void testq(Register dst, Register src) { 3455 int encode = prefixqAndEncode(dst.encoding, src.encoding); 3456 emitByte(0x85); 3457 emitByte(0xC0 | encode); 3458 } 3459 3460 public final void xaddl(AMD64Address dst, Register src) { 3461 prefix(dst, src); 3462 emitByte(0x0F); 3463 emitByte(0xC1); 3464 emitOperandHelper(src, dst, 0); 3465 } 3466 3467 public final void xaddq(AMD64Address dst, Register src) { 3468 prefixq(dst, src); 3469 emitByte(0x0F); 3470 emitByte(0xC1); 3471 emitOperandHelper(src, dst, 0); 3472 } 3473 3474 public final void xchgl(Register dst, AMD64Address src) { 3475 prefix(src, dst); 3476 emitByte(0x87); 3477 emitOperandHelper(dst, src, 0); 3478 } 3479 3480 public final void xchgq(Register dst, AMD64Address src) { 3481 prefixq(src, dst); 3482 emitByte(0x87); 3483 emitOperandHelper(dst, src, 0); 3484 } 3485 3486 public final void membar(int barriers) { 3487 if (target.isMP) { 3488 // We only have to handle StoreLoad 3489 if ((barriers & STORE_LOAD) != 0) { 3490 // All usable chips support "locked" instructions which suffice 3491 // as barriers, and are much faster than the alternative of 3492 // using cpuid instruction. We use here a locked add [rsp],0. 3493 // This is conveniently otherwise a no-op except for blowing 3494 // flags. 3495 // Any change to this code may need to revisit other places in 3496 // the code where this idiom is used, in particular the 3497 // orderAccess code. 3498 lock(); 3499 addl(new AMD64Address(rsp, 0), 0); // Assert the lock# signal here 3500 } 3501 } 3502 } 3503 3504 @Override 3505 protected final void patchJumpTarget(int branch, int branchTarget) { 3506 int op = getByte(branch); 3507 assert op == 0xE8 // call 3508 || 3509 op == 0x00 // jump table entry 3510 || op == 0xE9 // jmp 3511 || op == 0xEB // short jmp 3512 || (op & 0xF0) == 0x70 // short jcc 3513 || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc 3514 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op; 3515 3516 if (op == 0x00) { 3517 int offsetToJumpTableBase = getShort(branch + 1); 3518 int jumpTableBase = branch - offsetToJumpTableBase; 3519 int imm32 = branchTarget - jumpTableBase; 3520 emitInt(imm32, branch); 3521 } else if (op == 0xEB || (op & 0xF0) == 0x70) { 3522 3523 // short offset operators (jmp and jcc) 3524 final int imm8 = branchTarget - (branch + 2); 3525 /* 3526 * Since a wrongly patched short branch can potentially lead to working but really bad 3527 * behaving code we should always fail with an exception instead of having an assert. 3528 */ 3529 if (!NumUtil.isByte(imm8)) { 3530 throw new InternalError("branch displacement out of range: " + imm8); 3531 } 3532 emitByte(imm8, branch + 1); 3533 3534 } else { 3535 3536 int off = 1; 3537 if (op == 0x0F) { 3538 off = 2; 3539 } 3540 3541 int imm32 = branchTarget - (branch + 4 + off); 3542 emitInt(imm32, branch + off); 3543 } 3544 } 3545 3546 public void nullCheck(AMD64Address address) { 3547 testl(AMD64.rax, address); 3548 } 3549 3550 @Override 3551 public void align(int modulus) { 3552 if (position() % modulus != 0) { 3553 nop(modulus - (position() % modulus)); 3554 } 3555 } 3556 3557 /** 3558 * Emits a direct call instruction. Note that the actual call target is not specified, because 3559 * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is 3560 * responsible to add the call address to the appropriate patching tables. 3561 */ 3562 public final void call() { 3563 if (codePatchingAnnotationConsumer != null) { 3564 int pos = position(); 3565 codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(pos, pos + 1, 4, pos + 5)); 3566 } 3567 emitByte(0xE8); 3568 emitInt(0); 3569 } 3570 3571 public final void call(Register src) { 3572 int encode = prefixAndEncode(src.encoding); 3573 emitByte(0xFF); 3574 emitByte(0xD0 | encode); 3575 } 3576 3577 public final void int3() { 3578 emitByte(0xCC); 3579 } 3580 3581 public final void pause() { 3582 emitByte(0xF3); 3583 emitByte(0x90); 3584 } 3585 3586 private void emitx87(int b1, int b2, int i) { 3587 assert 0 <= i && i < 8 : "illegal stack offset"; 3588 emitByte(b1); 3589 emitByte(b2 + i); 3590 } 3591 3592 public final void fldd(AMD64Address src) { 3593 emitByte(0xDD); 3594 emitOperandHelper(0, src, 0); 3595 } 3596 3597 public final void flds(AMD64Address src) { 3598 emitByte(0xD9); 3599 emitOperandHelper(0, src, 0); 3600 } 3601 3602 public final void fldln2() { 3603 emitByte(0xD9); 3604 emitByte(0xED); 3605 } 3606 3607 public final void fldlg2() { 3608 emitByte(0xD9); 3609 emitByte(0xEC); 3610 } 3611 3612 public final void fyl2x() { 3613 emitByte(0xD9); 3614 emitByte(0xF1); 3615 } 3616 3617 public final void fstps(AMD64Address src) { 3618 emitByte(0xD9); 3619 emitOperandHelper(3, src, 0); 3620 } 3621 3622 public final void fstpd(AMD64Address src) { 3623 emitByte(0xDD); 3624 emitOperandHelper(3, src, 0); 3625 } 3626 3627 private void emitFPUArith(int b1, int b2, int i) { 3628 assert 0 <= i && i < 8 : "illegal FPU register: " + i; 3629 emitByte(b1); 3630 emitByte(b2 + i); 3631 } 3632 3633 public void ffree(int i) { 3634 emitFPUArith(0xDD, 0xC0, i); 3635 } 3636 3637 public void fincstp() { 3638 emitByte(0xD9); 3639 emitByte(0xF7); 3640 } 3641 3642 public void fxch(int i) { 3643 emitFPUArith(0xD9, 0xC8, i); 3644 } 3645 3646 public void fnstswAX() { 3647 emitByte(0xDF); 3648 emitByte(0xE0); 3649 } 3650 3651 public void fwait() { 3652 emitByte(0x9B); 3653 } 3654 3655 public void fprem() { 3656 emitByte(0xD9); 3657 emitByte(0xF8); 3658 } 3659 3660 public final void fsin() { 3661 emitByte(0xD9); 3662 emitByte(0xFE); 3663 } 3664 3665 public final void fcos() { 3666 emitByte(0xD9); 3667 emitByte(0xFF); 3668 } 3669 3670 public final void fptan() { 3671 emitByte(0xD9); 3672 emitByte(0xF2); 3673 } 3674 3675 public final void fstp(int i) { 3676 emitx87(0xDD, 0xD8, i); 3677 } 3678 3679 @Override 3680 public AMD64Address makeAddress(Register base, int displacement) { 3681 return new AMD64Address(base, displacement); 3682 } 3683 3684 @Override 3685 public AMD64Address getPlaceholder(int instructionStartPosition) { 3686 return new AMD64Address(rip, Register.None, Scale.Times1, 0, instructionStartPosition); 3687 } 3688 3689 private void prefetchPrefix(AMD64Address src) { 3690 prefix(src); 3691 emitByte(0x0F); 3692 } 3693 3694 public void prefetchnta(AMD64Address src) { 3695 prefetchPrefix(src); 3696 emitByte(0x18); 3697 emitOperandHelper(0, src, 0); 3698 } 3699 3700 void prefetchr(AMD64Address src) { 3701 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3702 prefetchPrefix(src); 3703 emitByte(0x0D); 3704 emitOperandHelper(0, src, 0); 3705 } 3706 3707 public void prefetcht0(AMD64Address src) { 3708 assert supports(CPUFeature.SSE); 3709 prefetchPrefix(src); 3710 emitByte(0x18); 3711 emitOperandHelper(1, src, 0); 3712 } 3713 3714 public void prefetcht1(AMD64Address src) { 3715 assert supports(CPUFeature.SSE); 3716 prefetchPrefix(src); 3717 emitByte(0x18); 3718 emitOperandHelper(2, src, 0); 3719 } 3720 3721 public void prefetcht2(AMD64Address src) { 3722 assert supports(CPUFeature.SSE); 3723 prefix(src); 3724 emitByte(0x0f); 3725 emitByte(0x18); 3726 emitOperandHelper(3, src, 0); 3727 } 3728 3729 public void prefetchw(AMD64Address src) { 3730 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3731 prefix(src); 3732 emitByte(0x0f); 3733 emitByte(0x0D); 3734 emitOperandHelper(1, src, 0); 3735 } 3736 3737 public void rdtsc() { 3738 emitByte(0x0F); 3739 emitByte(0x31); 3740 } 3741 3742 /** 3743 * Emits an instruction which is considered to be illegal. This is used if we deliberately want 3744 * to crash the program (debugging etc.). 3745 */ 3746 public void illegal() { 3747 emitByte(0x0f); 3748 emitByte(0x0b); 3749 } 3750 }