1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.asm.amd64; 26 27 import static jdk.vm.ci.amd64.AMD64.MASK; 28 import static jdk.vm.ci.amd64.AMD64.XMM; 29 import static jdk.vm.ci.amd64.AMD64.r12; 30 import static jdk.vm.ci.amd64.AMD64.r13; 31 import static jdk.vm.ci.amd64.AMD64.rbp; 32 import static jdk.vm.ci.amd64.AMD64.rsp; 33 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0; 34 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B1; 35 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.L512; 36 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0; 37 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1; 38 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128; 39 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256; 40 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ; 41 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F; 42 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38; 43 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A; 44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_; 45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66; 46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2; 47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3; 48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0; 49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1; 50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG; 51 import static org.graalvm.compiler.core.common.NumUtil.isByte; 52 53 import org.graalvm.compiler.asm.Assembler; 54 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 55 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize; 56 import org.graalvm.compiler.debug.GraalError; 57 58 import jdk.vm.ci.amd64.AMD64; 59 import jdk.vm.ci.amd64.AMD64.CPUFeature; 60 import jdk.vm.ci.amd64.AMD64Kind; 61 import jdk.vm.ci.code.Register; 62 import jdk.vm.ci.code.Register.RegisterCategory; 63 import jdk.vm.ci.code.TargetDescription; 64 import jdk.vm.ci.meta.PlatformKind; 65 66 /** 67 * This class implements an assembler that can encode most X86 instructions. 68 */ 69 public abstract class AMD64BaseAssembler extends Assembler { 70 71 private final SIMDEncoder simdEncoder; 72 73 /** 74 * Constructs an assembler for the AMD64 architecture. 75 */ 76 public AMD64BaseAssembler(TargetDescription target) { 77 super(target); 78 79 if (supports(CPUFeature.AVX)) { 80 simdEncoder = new VEXEncoderImpl(); 81 } else { 82 simdEncoder = new SSEEncoderImpl(); 83 } 84 } 85 86 /** 87 * The x86 operand sizes. 88 */ 89 public enum OperandSize { 90 BYTE(1, AMD64Kind.BYTE) { 91 @Override 92 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 93 assert imm == (byte) imm; 94 asm.emitByte(imm); 95 } 96 97 @Override 98 protected int immediateSize() { 99 return 1; 100 } 101 }, 102 103 WORD(2, AMD64Kind.WORD, 0x66) { 104 @Override 105 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 106 assert imm == (short) imm; 107 asm.emitShort(imm); 108 } 109 110 @Override 111 protected int immediateSize() { 112 return 2; 113 } 114 }, 115 116 DWORD(4, AMD64Kind.DWORD) { 117 @Override 118 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 119 asm.emitInt(imm); 120 } 121 122 @Override 123 protected int immediateSize() { 124 return 4; 125 } 126 }, 127 128 QWORD(8, AMD64Kind.QWORD) { 129 @Override 130 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 131 asm.emitInt(imm); 132 } 133 134 @Override 135 protected int immediateSize() { 136 return 4; 137 } 138 }, 139 140 SS(4, AMD64Kind.SINGLE, 0xF3, true), 141 142 SD(8, AMD64Kind.DOUBLE, 0xF2, true), 143 144 PS(16, AMD64Kind.V128_SINGLE, true), 145 146 PD(16, AMD64Kind.V128_DOUBLE, 0x66, true); 147 148 private final int sizePrefix; 149 private final int bytes; 150 private final boolean xmm; 151 private final AMD64Kind kind; 152 153 OperandSize(int bytes, AMD64Kind kind) { 154 this(bytes, kind, 0); 155 } 156 157 OperandSize(int bytes, AMD64Kind kind, int sizePrefix) { 158 this(bytes, kind, sizePrefix, false); 159 } 160 161 OperandSize(int bytes, AMD64Kind kind, boolean xmm) { 162 this(bytes, kind, 0, xmm); 163 } 164 165 OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm) { 166 this.sizePrefix = sizePrefix; 167 this.bytes = bytes; 168 this.kind = kind; 169 this.xmm = xmm; 170 } 171 172 public int getSizePrefix() { 173 return sizePrefix; 174 } 175 176 public int getBytes() { 177 return bytes; 178 } 179 180 public boolean isXmmType() { 181 return xmm; 182 } 183 184 public AMD64Kind getKind() { 185 return kind; 186 } 187 188 public static OperandSize get(PlatformKind kind) { 189 for (OperandSize operandSize : OperandSize.values()) { 190 if (operandSize.kind.equals(kind)) { 191 return operandSize; 192 } 193 } 194 throw GraalError.shouldNotReachHere("Unexpected kind: " + kind.toString()); 195 } 196 197 /** 198 * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded 199 * as sign-extended 32-bit values. 200 * 201 * @param asm 202 * @param imm 203 */ 204 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 205 throw new UnsupportedOperationException(); 206 } 207 208 protected int immediateSize() { 209 throw new UnsupportedOperationException(); 210 } 211 } 212 213 public abstract static class OperandDataAnnotation extends CodeAnnotation { 214 /** 215 * The position (bytes from the beginning of the method) of the operand. 216 */ 217 public final int operandPosition; 218 /** 219 * The size of the operand, in bytes. 220 */ 221 public final int operandSize; 222 /** 223 * The position (bytes from the beginning of the method) of the next instruction. On AMD64, 224 * RIP-relative operands are relative to this position. 225 */ 226 public final int nextInstructionPosition; 227 228 OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) { 229 super(instructionPosition); 230 231 this.operandPosition = operandPosition; 232 this.operandSize = operandSize; 233 this.nextInstructionPosition = nextInstructionPosition; 234 } 235 236 @Override 237 public String toString() { 238 return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize; 239 } 240 } 241 242 /** 243 * Annotation that stores additional information about the displacement of a 244 * {@link Assembler#getPlaceholder placeholder address} that needs patching. 245 */ 246 protected static class AddressDisplacementAnnotation extends OperandDataAnnotation { 247 AddressDisplacementAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) { 248 super(instructionPosition, operandPosition, operandSize, nextInstructionPosition); 249 } 250 } 251 252 /** 253 * Annotation that stores additional information about the immediate operand, e.g., of a call 254 * instruction, that needs patching. 255 */ 256 protected static class ImmediateOperandAnnotation extends OperandDataAnnotation { 257 ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) { 258 super(instructionPosition, operandPosition, operandSize, nextInstructionPosition); 259 } 260 } 261 262 protected void annotatePatchingImmediate(int operandOffset, int operandSize) { 263 if (codePatchingAnnotationConsumer != null) { 264 int pos = position(); 265 codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(pos, pos + operandOffset, operandSize, pos + operandOffset + operandSize)); 266 } 267 } 268 269 public final boolean supports(CPUFeature feature) { 270 return ((AMD64) target.arch).getFeatures().contains(feature); 271 } 272 273 protected static boolean inRC(RegisterCategory rc, Register r) { 274 return r.getRegisterCategory().equals(rc); 275 } 276 277 protected static int encode(Register r) { 278 assert r.encoding >= 0 && (inRC(XMM, r) ? r.encoding < 32 : r.encoding < 16) : "encoding out of range: " + r.encoding; 279 return r.encoding & 0x7; 280 } 281 282 private static final int MinEncodingNeedsRex = 8; 283 284 /** 285 * Constants for X86 prefix bytes. 286 */ 287 private static class Prefix { 288 private static final int REX = 0x40; 289 private static final int REXB = 0x41; 290 private static final int REXX = 0x42; 291 private static final int REXXB = 0x43; 292 private static final int REXR = 0x44; 293 private static final int REXRB = 0x45; 294 private static final int REXRX = 0x46; 295 private static final int REXRXB = 0x47; 296 private static final int REXW = 0x48; 297 private static final int REXWB = 0x49; 298 private static final int REXWX = 0x4A; 299 private static final int REXWXB = 0x4B; 300 private static final int REXWR = 0x4C; 301 private static final int REXWRB = 0x4D; 302 private static final int REXWRX = 0x4E; 303 private static final int REXWRXB = 0x4F; 304 305 private static final int VEX2 = 0xC5; 306 private static final int VEX3 = 0xC4; 307 private static final int EVEX = 0x62; 308 } 309 310 protected final void rexw() { 311 emitByte(Prefix.REXW); 312 } 313 314 protected final void prefix(Register reg) { 315 prefix(reg, false); 316 } 317 318 protected final void prefix(Register reg, boolean byteinst) { 319 int regEnc = reg.encoding; 320 if (regEnc >= 8) { 321 emitByte(Prefix.REXB); 322 } else if (byteinst && regEnc >= 4) { 323 emitByte(Prefix.REX); 324 } 325 } 326 327 protected final void prefixq(Register reg) { 328 if (reg.encoding < 8) { 329 emitByte(Prefix.REXW); 330 } else { 331 emitByte(Prefix.REXWB); 332 } 333 } 334 335 protected final void prefix(Register dst, Register src) { 336 prefix(dst, false, src, false); 337 } 338 339 protected final void prefix(Register dst, boolean dstIsByte, Register src, boolean srcIsByte) { 340 int dstEnc = dst.encoding; 341 int srcEnc = src.encoding; 342 if (dstEnc < 8) { 343 if (srcEnc >= 8) { 344 emitByte(Prefix.REXB); 345 } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) { 346 emitByte(Prefix.REX); 347 } 348 } else { 349 if (srcEnc < 8) { 350 emitByte(Prefix.REXR); 351 } else { 352 emitByte(Prefix.REXRB); 353 } 354 } 355 } 356 357 /** 358 * Creates prefix for the operands. If the given operands exceed 3 bits, the 4th bit is encoded 359 * in the prefix. 360 */ 361 protected final void prefixq(Register reg, Register rm) { 362 int regEnc = reg.encoding; 363 int rmEnc = rm.encoding; 364 if (regEnc < 8) { 365 if (rmEnc < 8) { 366 emitByte(Prefix.REXW); 367 } else { 368 emitByte(Prefix.REXWB); 369 } 370 } else { 371 if (rmEnc < 8) { 372 emitByte(Prefix.REXWR); 373 } else { 374 emitByte(Prefix.REXWRB); 375 } 376 } 377 } 378 379 private static boolean needsRex(Register reg) { 380 return reg.encoding >= MinEncodingNeedsRex; 381 } 382 383 protected final void prefix(AMD64Address adr) { 384 if (needsRex(adr.getBase())) { 385 if (needsRex(adr.getIndex())) { 386 emitByte(Prefix.REXXB); 387 } else { 388 emitByte(Prefix.REXB); 389 } 390 } else { 391 if (needsRex(adr.getIndex())) { 392 emitByte(Prefix.REXX); 393 } 394 } 395 } 396 397 protected final void prefixq(AMD64Address adr) { 398 if (needsRex(adr.getBase())) { 399 if (needsRex(adr.getIndex())) { 400 emitByte(Prefix.REXWXB); 401 } else { 402 emitByte(Prefix.REXWB); 403 } 404 } else { 405 if (needsRex(adr.getIndex())) { 406 emitByte(Prefix.REXWX); 407 } else { 408 emitByte(Prefix.REXW); 409 } 410 } 411 } 412 413 protected void prefixb(AMD64Address adr, Register reg) { 414 prefix(adr, reg, true); 415 } 416 417 protected void prefix(AMD64Address adr, Register reg) { 418 prefix(adr, reg, false); 419 } 420 421 protected void prefix(AMD64Address adr, Register reg, boolean byteinst) { 422 if (reg.encoding < 8) { 423 if (needsRex(adr.getBase())) { 424 if (needsRex(adr.getIndex())) { 425 emitByte(Prefix.REXXB); 426 } else { 427 emitByte(Prefix.REXB); 428 } 429 } else { 430 if (needsRex(adr.getIndex())) { 431 emitByte(Prefix.REXX); 432 } else if (byteinst && reg.encoding >= 4) { 433 emitByte(Prefix.REX); 434 } 435 } 436 } else { 437 if (needsRex(adr.getBase())) { 438 if (needsRex(adr.getIndex())) { 439 emitByte(Prefix.REXRXB); 440 } else { 441 emitByte(Prefix.REXRB); 442 } 443 } else { 444 if (needsRex(adr.getIndex())) { 445 emitByte(Prefix.REXRX); 446 } else { 447 emitByte(Prefix.REXR); 448 } 449 } 450 } 451 } 452 453 protected void prefixq(AMD64Address adr, Register src) { 454 if (src.encoding < 8) { 455 if (needsRex(adr.getBase())) { 456 if (needsRex(adr.getIndex())) { 457 emitByte(Prefix.REXWXB); 458 } else { 459 emitByte(Prefix.REXWB); 460 } 461 } else { 462 if (needsRex(adr.getIndex())) { 463 emitByte(Prefix.REXWX); 464 } else { 465 emitByte(Prefix.REXW); 466 } 467 } 468 } else { 469 if (needsRex(adr.getBase())) { 470 if (needsRex(adr.getIndex())) { 471 emitByte(Prefix.REXWRXB); 472 } else { 473 emitByte(Prefix.REXWRB); 474 } 475 } else { 476 if (needsRex(adr.getIndex())) { 477 emitByte(Prefix.REXWRX); 478 } else { 479 emitByte(Prefix.REXWR); 480 } 481 } 482 } 483 } 484 485 /** 486 * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a 487 * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm 488 * field. The X bit must be 0. 489 */ 490 protected static int getRXB(Register reg, Register rm) { 491 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 492 rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3; 493 return rxb; 494 } 495 496 /** 497 * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There 498 * are two cases for the memory operand:<br> 499 * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0. 500 * <br> 501 * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base. 502 */ 503 protected static int getRXB(Register reg, AMD64Address rm) { 504 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 505 if (!rm.getIndex().equals(Register.None)) { 506 rxb |= (rm.getIndex().encoding & 0x08) >> 2; 507 } 508 if (!rm.getBase().equals(Register.None)) { 509 rxb |= (rm.getBase().encoding & 0x08) >> 3; 510 } 511 return rxb; 512 } 513 514 /** 515 * Emit the ModR/M byte for one register operand and an opcode extension in the R field. 516 * <p> 517 * Format: [ 11 reg r/m ] 518 */ 519 protected final void emitModRM(int reg, Register rm) { 520 assert (reg & 0x07) == reg; 521 emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07)); 522 } 523 524 /** 525 * Emit the ModR/M byte for two register operands. 526 * <p> 527 * Format: [ 11 reg r/m ] 528 */ 529 protected final void emitModRM(Register reg, Register rm) { 530 emitModRM(reg.encoding & 0x07, rm); 531 } 532 533 /** 534 * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand. 535 * 536 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte 537 */ 538 protected final void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) { 539 assert !reg.equals(Register.None); 540 emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize, 1); 541 } 542 543 protected final void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) { 544 emitOperandHelper(reg, addr, false, additionalInstructionSize, 1); 545 } 546 547 protected final void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) { 548 assert !reg.equals(Register.None); 549 emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, 1); 550 } 551 552 protected final void emitEVEXOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize, int evexDisp8Scale) { 553 assert !reg.equals(Register.None); 554 emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, evexDisp8Scale); 555 } 556 557 /** 558 * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode 559 * extension in the R field. 560 * 561 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte 562 * @param additionalInstructionSize the number of bytes that will be emitted after the operand, 563 * so that the start position of the next instruction can be computed even though 564 * this instruction has not been completely emitted yet. 565 * @param evexDisp8Scale the scaling factor for computing the compressed displacement of 566 * EVEX-encoded instructions. This scaling factor only matters when the emitted 567 * instruction uses one-byte-displacement form. 568 */ 569 private void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize, int evexDisp8Scale) { 570 assert (reg & 0x07) == reg; 571 int regenc = reg << 3; 572 573 Register base = addr.getBase(); 574 Register index = addr.getIndex(); 575 576 Scale scale = addr.getScale(); 577 int disp = addr.getDisplacement(); 578 579 if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder() 580 // [00 000 101] disp32 581 assert index.equals(Register.None) : "cannot use RIP relative addressing with index register"; 582 emitByte(0x05 | regenc); 583 if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) { 584 codePatchingAnnotationConsumer.accept(new AddressDisplacementAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize)); 585 } 586 emitInt(disp); 587 } else if (base.isValid()) { 588 boolean overriddenForce4Byte = force4Byte; 589 int baseenc = base.isValid() ? encode(base) : 0; 590 591 if (index.isValid()) { 592 int indexenc = encode(index) << 3; 593 // [base + indexscale + disp] 594 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 595 // [base + indexscale] 596 // [00 reg 100][ss index base] 597 assert !index.equals(rsp) : "illegal addressing mode"; 598 emitByte(0x04 | regenc); 599 emitByte(scale.log2 << 6 | indexenc | baseenc); 600 } else { 601 if (evexDisp8Scale > 1 && !overriddenForce4Byte) { 602 if (disp % evexDisp8Scale == 0) { 603 int newDisp = disp / evexDisp8Scale; 604 if (isByte(newDisp)) { 605 disp = newDisp; 606 assert isByte(disp) && !overriddenForce4Byte; 607 } 608 } else { 609 overriddenForce4Byte = true; 610 } 611 } 612 if (isByte(disp) && !overriddenForce4Byte) { 613 // [base + indexscale + imm8] 614 // [01 reg 100][ss index base] imm8 615 assert !index.equals(rsp) : "illegal addressing mode"; 616 emitByte(0x44 | regenc); 617 emitByte(scale.log2 << 6 | indexenc | baseenc); 618 emitByte(disp & 0xFF); 619 } else { 620 // [base + indexscale + disp32] 621 // [10 reg 100][ss index base] disp32 622 assert !index.equals(rsp) : "illegal addressing mode"; 623 emitByte(0x84 | regenc); 624 emitByte(scale.log2 << 6 | indexenc | baseenc); 625 emitInt(disp); 626 } 627 } 628 } else if (base.equals(rsp) || base.equals(r12)) { 629 // [rsp + disp] 630 if (disp == 0) { 631 // [rsp] 632 // [00 reg 100][00 100 100] 633 emitByte(0x04 | regenc); 634 emitByte(0x24); 635 } else { 636 if (evexDisp8Scale > 1 && !overriddenForce4Byte) { 637 if (disp % evexDisp8Scale == 0) { 638 int newDisp = disp / evexDisp8Scale; 639 if (isByte(newDisp)) { 640 disp = newDisp; 641 assert isByte(disp) && !overriddenForce4Byte; 642 } 643 } else { 644 overriddenForce4Byte = true; 645 } 646 } 647 if (isByte(disp) && !overriddenForce4Byte) { 648 // [rsp + imm8] 649 // [01 reg 100][00 100 100] disp8 650 emitByte(0x44 | regenc); 651 emitByte(0x24); 652 emitByte(disp & 0xFF); 653 } else { 654 // [rsp + imm32] 655 // [10 reg 100][00 100 100] disp32 656 emitByte(0x84 | regenc); 657 emitByte(0x24); 658 emitInt(disp); 659 } 660 } 661 } else { 662 // [base + disp] 663 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode"; 664 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 665 // [base] 666 // [00 reg base] 667 emitByte(0x00 | regenc | baseenc); 668 } else { 669 if (evexDisp8Scale > 1 && !overriddenForce4Byte) { 670 if (disp % evexDisp8Scale == 0) { 671 int newDisp = disp / evexDisp8Scale; 672 if (isByte(newDisp)) { 673 disp = newDisp; 674 assert isByte(disp) && !overriddenForce4Byte; 675 } 676 } else { 677 overriddenForce4Byte = true; 678 } 679 } 680 if (isByte(disp) && !overriddenForce4Byte) { 681 // [base + disp8] 682 // [01 reg base] disp8 683 emitByte(0x40 | regenc | baseenc); 684 emitByte(disp & 0xFF); 685 } else { 686 // [base + disp32] 687 // [10 reg base] disp32 688 emitByte(0x80 | regenc | baseenc); 689 emitInt(disp); 690 } 691 } 692 } 693 } else { 694 if (index.isValid()) { 695 int indexenc = encode(index) << 3; 696 // [indexscale + disp] 697 // [00 reg 100][ss index 101] disp32 698 assert !index.equals(rsp) : "illegal addressing mode"; 699 emitByte(0x04 | regenc); 700 emitByte(scale.log2 << 6 | indexenc | 0x05); 701 emitInt(disp); 702 } else { 703 // [disp] ABSOLUTE 704 // [00 reg 100][00 100 101] disp32 705 emitByte(0x04 | regenc); 706 emitByte(0x25); 707 emitInt(disp); 708 } 709 } 710 } 711 712 private interface SIMDEncoder { 713 714 void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW); 715 716 void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW); 717 718 } 719 720 private class SSEEncoderImpl implements SIMDEncoder { 721 722 @Override 723 public void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 724 if (sizePrefix > 0) { 725 emitByte(sizePrefix); 726 } 727 if (isRexW) { 728 prefixq(adr, xreg); 729 } else { 730 prefix(adr, xreg); 731 } 732 if (opcodeEscapePrefix > 0xFF) { 733 emitShort(opcodeEscapePrefix); 734 } else if (opcodeEscapePrefix > 0) { 735 emitByte(opcodeEscapePrefix); 736 } 737 } 738 739 @Override 740 public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 741 if (sizePrefix > 0) { 742 emitByte(sizePrefix); 743 } 744 if (isRexW) { 745 prefixq(dst, src); 746 } else { 747 prefix(dst, src); 748 } 749 if (opcodeEscapePrefix > 0xFF) { 750 emitShort(opcodeEscapePrefix); 751 } else if (opcodeEscapePrefix > 0) { 752 emitByte(opcodeEscapePrefix); 753 } 754 } 755 } 756 757 public static final class VEXPrefixConfig { 758 public static final int L128 = 0; 759 public static final int L256 = 1; 760 public static final int LZ = 0; 761 762 public static final int W0 = 0; 763 public static final int W1 = 1; 764 public static final int WIG = 0; 765 766 public static final int P_ = 0x0; 767 public static final int P_66 = 0x1; 768 public static final int P_F3 = 0x2; 769 public static final int P_F2 = 0x3; 770 771 public static final int M_0F = 0x1; 772 public static final int M_0F38 = 0x2; 773 public static final int M_0F3A = 0x3; 774 775 private VEXPrefixConfig() { 776 } 777 } 778 779 private class VEXEncoderImpl implements SIMDEncoder { 780 781 private int sizePrefixToPP(int sizePrefix) { 782 switch (sizePrefix) { 783 case 0x66: 784 return P_66; 785 case 0xF2: 786 return P_F2; 787 case 0xF3: 788 return P_F3; 789 default: 790 return P_; 791 } 792 } 793 794 private int opcodeEscapePrefixToMMMMM(int opcodeEscapePrefix) { 795 switch (opcodeEscapePrefix) { 796 case 0x0F: 797 return M_0F; 798 case 0x380F: 799 return M_0F38; 800 case 0x3A0F: 801 return M_0F3A; 802 default: 803 return 0; 804 } 805 } 806 807 @Override 808 public void simdPrefix(Register reg, Register nds, AMD64Address rm, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 809 assert reg.encoding < 16 : "encoding out of range: " + reg.encoding; 810 assert nds.encoding < 16 : "encoding out of range: " + nds.encoding; 811 emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(reg, rm), nds.isValid() ? nds.encoding : 0, true); 812 } 813 814 @Override 815 public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 816 assert dst.encoding < 16 : "encoding out of range: " + dst.encoding; 817 assert src.encoding < 16 : "encoding out of range: " + src.encoding; 818 assert nds.encoding < 16 : "encoding out of range: " + nds.encoding; 819 emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(dst, src), nds.isValid() ? nds.encoding : 0, true); 820 } 821 } 822 823 protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) { 824 simdEncoder.simdPrefix(xreg, nds, adr, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW); 825 } 826 827 protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int opcodeEscapePrefix, boolean isRexW) { 828 simdEncoder.simdPrefix(xreg, nds, adr, size.sizePrefix, opcodeEscapePrefix, isRexW); 829 } 830 831 protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) { 832 simdEncoder.simdPrefix(dst, nds, src, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW); 833 } 834 835 protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int opcodeEscapePrefix, boolean isRexW) { 836 simdEncoder.simdPrefix(dst, nds, src, size.sizePrefix, opcodeEscapePrefix, isRexW); 837 } 838 839 // @formatter:off 840 // 841 // Instruction Format and VEX illustrated below (optional []): 842 // 843 // #of bytes: 2,3 1 1 1 1,2,4 1 844 // [Prefixes] VEX OpCode ModR/M [SIB] [Disp8*N] [Immediate] 845 // [Disp16,32] 846 // 847 // VEX: 0xC4 | P1 | P2 848 // 849 // 7 6 5 4 3 2 1 0 850 // P1 R X B m m m m m P[ 7:0] 851 // P2 W v v v v L p p P[15:8] 852 // 853 // VEX: 0xC5 | B1 854 // 855 // 7 6 5 4 3 2 1 0 856 // P1 R v v v v L p p P[7:0] 857 // 858 // Figure. Bit Field Layout of the VEX Prefix 859 // 860 // Table. VEX Prefix Bit Field Functional Grouping 861 // 862 // Notation Bit field Group Position Comment 863 // ---------- ------------------------- -------- ------------------- 864 // VEX.RXB Next-8 register specifier P[7:5] Combine with ModR/M.reg, ModR/M.rm (base, index/vidx). 865 // VEX.R REX.R inverse P[7] Combine with EVEX.R and ModR/M.reg. 866 // VEX.X REX.X inverse P[6] Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent. 867 // VEX.B REX.B inverse P[5] 868 // VEX.mmmmmm 0F, 0F_38, 0F_3A encoding P[4:0] b01/0x0F, b10/0F_38, b11/0F_3A (all other reserved) 869 // 870 // VEX.W Opcode specific P[15] 871 // VEX.vvvv A register specifier P[14:11] In inverse form, b1111 if not used. 872 // P[6:3] 873 // VEX.L Vector length/RC P[10] b0/scalar or 128b vec, b1/256b vec. 874 // P[2] 875 // VEX.pp Compressed legacy prefix P[9:8] b00/None, b01/0x66, b10/0xF3, b11/0xF2 876 // P[1:0] 877 // @formatter:on 878 879 /** 880 * Low-level function to encode and emit the VEX prefix. 881 * <p> 882 * 2 byte form: [1100 0101] [R vvvv L pp]<br> 883 * 3 byte form: [1100 0100] [RXB m-mmmm] [W vvvv L pp] 884 * <p> 885 * The RXB and vvvv fields are stored in 1's complement in the prefix encoding. This function 886 * performs the 1s complement conversion, the caller is expected to pass plain unencoded 887 * arguments. 888 * <p> 889 * The pp field encodes an extension to the opcode:<br> 890 * 00: no extension<br> 891 * 01: 66<br> 892 * 10: F3<br> 893 * 11: F2 894 * <p> 895 * The m-mmmm field encodes the leading bytes of the opcode:<br> 896 * 00001: implied 0F leading opcode byte (default in 2-byte encoding)<br> 897 * 00010: implied 0F 38 leading opcode bytes<br> 898 * 00011: implied 0F 3A leading opcode bytes 899 * <p> 900 * This function automatically chooses the 2 or 3 byte encoding, based on the XBW flags and the 901 * m-mmmm field. 902 */ 903 protected final void emitVEX(int l, int pp, int mmmmm, int w, int rxb, int vvvv, boolean checkAVX) { 904 assert !checkAVX || ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX) : "emitting VEX prefix on a CPU without AVX support"; 905 906 assert l == L128 || l == L256 : "invalid value for VEX.L"; 907 assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for VEX.pp"; 908 assert mmmmm == M_0F || mmmmm == M_0F38 || mmmmm == M_0F3A : "invalid value for VEX.m-mmmm"; 909 assert w == W0 || w == W1 : "invalid value for VEX.W"; 910 911 assert (rxb & 0x07) == rxb : "invalid value for VEX.RXB"; 912 assert (vvvv & 0x0F) == vvvv : "invalid value for VEX.vvvv"; 913 914 int rxb1s = rxb ^ 0x07; 915 int vvvv1s = vvvv ^ 0x0F; 916 if ((rxb & 0x03) == 0 && w == WIG && mmmmm == M_0F) { 917 // 2 byte encoding 918 int byte2 = 0; 919 byte2 |= (rxb1s & 0x04) << 5; 920 byte2 |= vvvv1s << 3; 921 byte2 |= l << 2; 922 byte2 |= pp; 923 924 emitByte(Prefix.VEX2); 925 emitByte(byte2); 926 } else { 927 // 3 byte encoding 928 int byte2 = 0; 929 byte2 = (rxb1s & 0x07) << 5; 930 byte2 |= mmmmm; 931 932 int byte3 = 0; 933 byte3 |= w << 7; 934 byte3 |= vvvv1s << 3; 935 byte3 |= l << 2; 936 byte3 |= pp; 937 938 emitByte(Prefix.VEX3); 939 emitByte(byte2); 940 emitByte(byte3); 941 } 942 } 943 944 public static int getLFlag(AVXSize size) { 945 switch (size) { 946 case XMM: 947 return L128; 948 case YMM: 949 return L256; 950 case ZMM: 951 return L512; 952 default: 953 return LZ; 954 } 955 } 956 957 public final void vexPrefix(Register dst, Register nds, Register src, AVXSize size, int pp, int mmmmm, int w, boolean checkAVX) { 958 emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0, checkAVX); 959 } 960 961 public final void vexPrefix(Register dst, Register nds, AMD64Address src, AVXSize size, int pp, int mmmmm, int w, boolean checkAVX) { 962 emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0, checkAVX); 963 } 964 965 protected static final class EVEXPrefixConfig { 966 public static final int L512 = 2; 967 public static final int LIG = 0; 968 969 public static final int Z0 = 0x0; 970 public static final int Z1 = 0x1; 971 972 public static final int B0 = 0x0; 973 public static final int B1 = 0x1; 974 975 private EVEXPrefixConfig() { 976 } 977 } 978 979 private static final int NOT_SUPPORTED_VECTOR_LENGTH = -1; 980 981 /** 982 * EVEX-encoded instructions use a compressed displacement scheme by multiplying disp8 with a 983 * scaling factor N depending on the tuple type and the vector length. 984 * 985 * Reference: Intel Software Developer's Manual Volume 2, Section 2.6.5 986 */ 987 protected enum EVEXTuple { 988 FV_NO_BROADCAST_32BIT(16, 32, 64), 989 FV_BROADCAST_32BIT(4, 4, 4), 990 FV_NO_BROADCAST_64BIT(16, 32, 64), 991 FV_BROADCAST_64BIT(8, 8, 8), 992 HV_NO_BROADCAST_32BIT(8, 16, 32), 993 HV_BROADCAST_32BIT(4, 4, 4), 994 FVM(16, 32, 64), 995 T1S_8BIT(1, 1, 1), 996 T1S_16BIT(2, 2, 2), 997 T1S_32BIT(4, 4, 4), 998 T1S_64BIT(8, 8, 8), 999 T1F_32BIT(4, 4, 4), 1000 T1F_64BIT(8, 8, 8), 1001 T2_32BIT(8, 8, 8), 1002 T2_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16), 1003 T4_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16), 1004 T4_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32), 1005 T8_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32), 1006 HVM(8, 16, 32), 1007 QVM(4, 8, 16), 1008 OVM(2, 4, 8), 1009 M128(16, 16, 16), 1010 DUP(8, 32, 64); 1011 1012 private final int scalingFactorVL128; 1013 private final int scalingFactorVL256; 1014 private final int scalingFactorVL512; 1015 1016 EVEXTuple(int scalingFactorVL128, int scalingFactorVL256, int scalingFactorVL512) { 1017 this.scalingFactorVL128 = scalingFactorVL128; 1018 this.scalingFactorVL256 = scalingFactorVL256; 1019 this.scalingFactorVL512 = scalingFactorVL512; 1020 } 1021 1022 private static int verifyScalingFactor(int scalingFactor) { 1023 if (scalingFactor == NOT_SUPPORTED_VECTOR_LENGTH) { 1024 throw GraalError.shouldNotReachHere("Invalid scaling factor."); 1025 } 1026 return scalingFactor; 1027 } 1028 1029 public int getDisp8ScalingFactor(AVXSize size) { 1030 switch (size) { 1031 case XMM: 1032 return verifyScalingFactor(scalingFactorVL128); 1033 case YMM: 1034 return verifyScalingFactor(scalingFactorVL256); 1035 case ZMM: 1036 return verifyScalingFactor(scalingFactorVL512); 1037 default: 1038 throw GraalError.shouldNotReachHere("Unsupported vector size."); 1039 } 1040 } 1041 } 1042 1043 // @formatter:off 1044 // 1045 // Instruction Format and EVEX illustrated below (optional []): 1046 // 1047 // #of bytes: 4 1 1 1 1,2,4 1 1048 // [Prefixes] EVEX OpCode ModR/M [SIB] [Disp8*N] [Immediate] 1049 // [Disp16,32] 1050 // 1051 // The EVEX prefix is a 4-byte prefix, with the first two bytes derived from unused encoding 1052 // form of the 32-bit-mode-only BOUND instruction. The layout of the EVEX prefix is shown in 1053 // the figure below. The first byte must be 0x62, followed by three pay-load bytes, denoted 1054 // as P1, P2, and P3 individually or collectively as P[23:0] (see below). 1055 // 1056 // EVEX: 0x62 | P1 | P2 | P3 1057 // 1058 // 7 6 5 4 3 2 1 0 1059 // P1 R X B R' 0 0 m m P[ 7: 0] 1060 // P2 W v v v v 1 p p P[15: 8] 1061 // P3 z L' L b V' a a a P[23:16] 1062 // 1063 // Figure. Bit Field Layout of the EVEX Prefix 1064 // 1065 // Table. EVEX Prefix Bit Field Functional Grouping 1066 // 1067 // Notation Bit field Group Position Comment 1068 // --------- -------------------------- -------- ----------------------- 1069 // EVEX.RXB Next-8 register specifier P[7:5] Combine with ModR/M.reg, ModR/M.rm (base, index/vidx). 1070 // EVEX.X High-16 register specifier P[6] Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent. 1071 // EVEX.R' High-16 register specifier P[4] Combine with EVEX.R and ModR/M.reg. 1072 // -- Reserved P[3:2] Must be 0. 1073 // EVEX.mm Compressed legacy escape P[1:0] Identical to low two bits of VEX.mmmmm. 1074 // 1075 // EVEX.W Osize promotion/Opcode ext P[15] 1076 // EVEX.vvvv NDS register specifier P[14:11] Same as VEX.vvvv. 1077 // -- Fixed Value P[10] Must be 1. 1078 // EVEX.pp Compressed legacy prefix P[9:8] Identical to VEX.pp. 1079 // 1080 // EVEX.z Zeroing/Merging P[23] 1081 // EVEX.L'L Vector length/RC P[22:21] 1082 // EVEX.b Broadcast/RC/SAE Context P[20] 1083 // EVEX.V' High-16 NDS/VIDX register P[19] Combine with EVEX.vvvv or VSIB when present. 1084 // EVEX.aaa Embedded opmask register P[18:16] 1085 // 1086 // @formatter:on 1087 1088 /** 1089 * Low-level function to encode and emit the EVEX prefix. 1090 * <p> 1091 * 62 [0 1 1 0 0 0 1 0]<br> 1092 * P1 [R X B R'0 0 m m]<br> 1093 * P2 [W v v v v 1 p p]<br> 1094 * P3 [z L'L b V'a a a] 1095 * <p> 1096 * The pp field encodes an extension to the opcode:<br> 1097 * 00: no extension<br> 1098 * 01: 66<br> 1099 * 10: F3<br> 1100 * 11: F2 1101 * <p> 1102 * The mm field encodes the leading bytes of the opcode:<br> 1103 * 01: implied 0F leading opcode byte<br> 1104 * 10: implied 0F 38 leading opcode bytes<br> 1105 * 11: implied 0F 3A leading opcode bytes 1106 * <p> 1107 * The z field encodes the merging mode (merge or zero). 1108 * <p> 1109 * The b field encodes the source broadcast or data rounding modes. 1110 * <p> 1111 * The aaa field encodes the operand mask register. 1112 */ 1113 private void emitEVEX(int l, int pp, int mm, int w, int rxb, int reg, int vvvvv, int z, int b, int aaa) { 1114 assert ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX512F) : "emitting EVEX prefix on a CPU without AVX512 support"; 1115 1116 assert l == L128 || l == L256 || l == L512 : "invalid value for EVEX.L'L"; 1117 assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for EVEX.pp"; 1118 assert mm == M_0F || mm == M_0F38 || mm == M_0F3A : "invalid value for EVEX.mm"; 1119 assert w == W0 || w == W1 : "invalid value for EVEX.W"; 1120 1121 assert (rxb & 0x07) == rxb : "invalid value for EVEX.RXB"; 1122 assert (reg & 0x1F) == reg : "invalid value for EVEX.R'"; 1123 assert (vvvvv & 0x1F) == vvvvv : "invalid value for EVEX.V'vvvv"; 1124 1125 assert z == Z0 || z == Z1 : "invalid value for EVEX.z"; 1126 assert b == B0 || b == B1 : "invalid value for EVEX.b"; 1127 assert (aaa & 0x07) == aaa : "invalid value for EVEX.aaa"; 1128 1129 emitByte(Prefix.EVEX); 1130 int p1 = 0; 1131 p1 |= ((rxb ^ 0x07) & 0x07) << 5; 1132 p1 |= reg < 16 ? 0x10 : 0; 1133 p1 |= mm; 1134 emitByte(p1); 1135 1136 int p2 = 0; 1137 p2 |= w << 7; 1138 p2 |= ((vvvvv ^ 0x0F) & 0x0F) << 3; 1139 p2 |= 0x04; 1140 p2 |= pp; 1141 emitByte(p2); 1142 1143 int p3 = 0; 1144 p3 |= z << 7; 1145 p3 |= l << 5; 1146 p3 |= b << 4; 1147 p3 |= vvvvv < 16 ? 0x08 : 0; 1148 p3 |= aaa; 1149 emitByte(p3); 1150 } 1151 1152 /** 1153 * Get RXB bits for register-register instructions in EVEX-encoding, where ModRM.rm contains a 1154 * register index. The R bit extends the ModRM.reg field and the X and B bits extends the 1155 * ModRM.rm field. 1156 */ 1157 private static int getRXBForEVEX(Register reg, Register rm) { 1158 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 1159 rxb |= (rm == null ? 0 : rm.encoding & 0x018) >> 3; 1160 return rxb; 1161 } 1162 1163 /** 1164 * Helper method for emitting EVEX prefix in the form of RRRR. 1165 */ 1166 protected final void evexPrefix(Register dst, Register mask, Register nds, Register src, AVXSize size, int pp, int mm, int w, int z, int b) { 1167 assert !mask.isValid() || inRC(MASK, mask); 1168 emitEVEX(getLFlag(size), pp, mm, w, getRXBForEVEX(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0); 1169 } 1170 1171 /** 1172 * Helper method for emitting EVEX prefix in the form of RRRM. Because the memory addressing in 1173 * EVEX-encoded instructions employ a compressed displacement scheme when using disp8 form, the 1174 * user of this API should make sure to encode the operands using 1175 * {@link #emitEVEXOperandHelper(Register, AMD64Address, int, int)}. 1176 */ 1177 protected final void evexPrefix(Register dst, Register mask, Register nds, AMD64Address src, AVXSize size, int pp, int mm, int w, int z, int b) { 1178 assert !mask.isValid() || inRC(MASK, mask); 1179 emitEVEX(getLFlag(size), pp, mm, w, getRXB(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0); 1180 } 1181 1182 }