1 /* 2 * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.asm.amd64; 26 27 import static jdk.vm.ci.amd64.AMD64.MASK; 28 import static jdk.vm.ci.amd64.AMD64.XMM; 29 import static jdk.vm.ci.amd64.AMD64.r12; 30 import static jdk.vm.ci.amd64.AMD64.r13; 31 import static jdk.vm.ci.amd64.AMD64.rbp; 32 import static jdk.vm.ci.amd64.AMD64.rsp; 33 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0; 34 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B1; 35 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.L512; 36 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0; 37 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1; 38 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128; 39 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256; 40 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ; 41 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F; 42 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38; 43 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A; 44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_; 45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66; 46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2; 47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3; 48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0; 49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1; 50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG; 51 import static org.graalvm.compiler.core.common.NumUtil.isByte; 52 53 import org.graalvm.compiler.asm.Assembler; 54 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 55 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize; 56 import org.graalvm.compiler.debug.GraalError; 57 58 import jdk.vm.ci.amd64.AMD64; 59 import jdk.vm.ci.amd64.AMD64.CPUFeature; 60 import jdk.vm.ci.amd64.AMD64Kind; 61 import jdk.vm.ci.code.Register; 62 import jdk.vm.ci.code.Register.RegisterCategory; 63 import jdk.vm.ci.code.TargetDescription; 64 import jdk.vm.ci.meta.PlatformKind; 65 66 /** 67 * This class implements an assembler that can encode most X86 instructions. 68 */ 69 public abstract class AMD64BaseAssembler extends Assembler { 70 71 private final SIMDEncoder simdEncoder; 72 73 /** 74 * Constructs an assembler for the AMD64 architecture. 75 */ 76 public AMD64BaseAssembler(TargetDescription target) { 77 super(target); 78 79 if (supports(CPUFeature.AVX)) { 80 simdEncoder = new VEXEncoderImpl(); 81 } else { 82 simdEncoder = new SSEEncoderImpl(); 83 } 84 } 85 86 /** 87 * The x86 operand sizes. 88 */ 89 public enum OperandSize { 90 BYTE(1, AMD64Kind.BYTE) { 91 @Override 92 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 93 assert imm == (byte) imm; 94 asm.emitByte(imm); 95 } 96 97 @Override 98 protected int immediateSize() { 99 return 1; 100 } 101 }, 102 103 WORD(2, AMD64Kind.WORD, 0x66) { 104 @Override 105 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 106 assert imm == (short) imm; 107 asm.emitShort(imm); 108 } 109 110 @Override 111 protected int immediateSize() { 112 return 2; 113 } 114 }, 115 116 DWORD(4, AMD64Kind.DWORD) { 117 @Override 118 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 119 asm.emitInt(imm); 120 } 121 122 @Override 123 protected int immediateSize() { 124 return 4; 125 } 126 }, 127 128 QWORD(8, AMD64Kind.QWORD) { 129 @Override 130 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 131 asm.emitInt(imm); 132 } 133 134 @Override 135 protected int immediateSize() { 136 return 4; 137 } 138 }, 139 140 SS(4, AMD64Kind.SINGLE, 0xF3, true), 141 142 SD(8, AMD64Kind.DOUBLE, 0xF2, true), 143 144 PS(16, AMD64Kind.V128_SINGLE, true), 145 146 PD(16, AMD64Kind.V128_DOUBLE, 0x66, true); 147 148 private final int sizePrefix; 149 private final int bytes; 150 private final boolean xmm; 151 private final AMD64Kind kind; 152 153 OperandSize(int bytes, AMD64Kind kind) { 154 this(bytes, kind, 0); 155 } 156 157 OperandSize(int bytes, AMD64Kind kind, int sizePrefix) { 158 this(bytes, kind, sizePrefix, false); 159 } 160 161 OperandSize(int bytes, AMD64Kind kind, boolean xmm) { 162 this(bytes, kind, 0, xmm); 163 } 164 165 OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm) { 166 this.sizePrefix = sizePrefix; 167 this.bytes = bytes; 168 this.kind = kind; 169 this.xmm = xmm; 170 } 171 172 public int getSizePrefix() { 173 return sizePrefix; 174 } 175 176 public int getBytes() { 177 return bytes; 178 } 179 180 public boolean isXmmType() { 181 return xmm; 182 } 183 184 public AMD64Kind getKind() { 185 return kind; 186 } 187 188 public static OperandSize get(PlatformKind kind) { 189 for (OperandSize operandSize : OperandSize.values()) { 190 if (operandSize.kind.equals(kind)) { 191 return operandSize; 192 } 193 } 194 throw GraalError.shouldNotReachHere("Unexpected kind: " + kind.toString()); 195 } 196 197 /** 198 * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded 199 * as sign-extended 32-bit values. 200 * 201 * @param asm 202 * @param imm 203 */ 204 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 205 throw new UnsupportedOperationException(); 206 } 207 208 protected int immediateSize() { 209 throw new UnsupportedOperationException(); 210 } 211 } 212 213 public static class OperandDataAnnotation extends CodeAnnotation { 214 /** 215 * The position (bytes from the beginning of the method) of the operand. 216 */ 217 public final int operandPosition; 218 /** 219 * The size of the operand, in bytes. 220 */ 221 public final int operandSize; 222 /** 223 * The position (bytes from the beginning of the method) of the next instruction. On AMD64, 224 * RIP-relative operands are relative to this position. 225 */ 226 public final int nextInstructionPosition; 227 228 OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) { 229 super(instructionPosition); 230 231 this.operandPosition = operandPosition; 232 this.operandSize = operandSize; 233 this.nextInstructionPosition = nextInstructionPosition; 234 } 235 236 @Override 237 public String toString() { 238 return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize; 239 } 240 } 241 242 protected void annotatePatchingImmediate(int operandOffset, int operandSize) { 243 if (codePatchingAnnotationConsumer != null) { 244 int pos = position(); 245 codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(pos, pos + operandOffset, operandSize, pos + operandOffset + operandSize)); 246 } 247 } 248 249 public final boolean supports(CPUFeature feature) { 250 return ((AMD64) target.arch).getFeatures().contains(feature); 251 } 252 253 protected static boolean inRC(RegisterCategory rc, Register r) { 254 return r.getRegisterCategory().equals(rc); 255 } 256 257 protected static int encode(Register r) { 258 assert r.encoding >= 0 && (inRC(XMM, r) ? r.encoding < 32 : r.encoding < 16) : "encoding out of range: " + r.encoding; 259 return r.encoding & 0x7; 260 } 261 262 private static final int MinEncodingNeedsRex = 8; 263 264 /** 265 * Constants for X86 prefix bytes. 266 */ 267 private static class Prefix { 268 private static final int REX = 0x40; 269 private static final int REXB = 0x41; 270 private static final int REXX = 0x42; 271 private static final int REXXB = 0x43; 272 private static final int REXR = 0x44; 273 private static final int REXRB = 0x45; 274 private static final int REXRX = 0x46; 275 private static final int REXRXB = 0x47; 276 private static final int REXW = 0x48; 277 private static final int REXWB = 0x49; 278 private static final int REXWX = 0x4A; 279 private static final int REXWXB = 0x4B; 280 private static final int REXWR = 0x4C; 281 private static final int REXWRB = 0x4D; 282 private static final int REXWRX = 0x4E; 283 private static final int REXWRXB = 0x4F; 284 285 private static final int VEX2 = 0xC5; 286 private static final int VEX3 = 0xC4; 287 private static final int EVEX = 0x62; 288 } 289 290 protected final void rexw() { 291 emitByte(Prefix.REXW); 292 } 293 294 protected final void prefix(Register reg) { 295 prefix(reg, false); 296 } 297 298 protected final void prefix(Register reg, boolean byteinst) { 299 int regEnc = reg.encoding; 300 if (regEnc >= 8) { 301 emitByte(Prefix.REXB); 302 } else if (byteinst && regEnc >= 4) { 303 emitByte(Prefix.REX); 304 } 305 } 306 307 protected final void prefixq(Register reg) { 308 if (reg.encoding < 8) { 309 emitByte(Prefix.REXW); 310 } else { 311 emitByte(Prefix.REXWB); 312 } 313 } 314 315 protected final void prefix(Register dst, Register src) { 316 prefix(dst, false, src, false); 317 } 318 319 protected final void prefix(Register dst, boolean dstIsByte, Register src, boolean srcIsByte) { 320 int dstEnc = dst.encoding; 321 int srcEnc = src.encoding; 322 if (dstEnc < 8) { 323 if (srcEnc >= 8) { 324 emitByte(Prefix.REXB); 325 } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) { 326 emitByte(Prefix.REX); 327 } 328 } else { 329 if (srcEnc < 8) { 330 emitByte(Prefix.REXR); 331 } else { 332 emitByte(Prefix.REXRB); 333 } 334 } 335 } 336 337 /** 338 * Creates prefix for the operands. If the given operands exceed 3 bits, the 4th bit is encoded 339 * in the prefix. 340 */ 341 protected final void prefixq(Register reg, Register rm) { 342 int regEnc = reg.encoding; 343 int rmEnc = rm.encoding; 344 if (regEnc < 8) { 345 if (rmEnc < 8) { 346 emitByte(Prefix.REXW); 347 } else { 348 emitByte(Prefix.REXWB); 349 } 350 } else { 351 if (rmEnc < 8) { 352 emitByte(Prefix.REXWR); 353 } else { 354 emitByte(Prefix.REXWRB); 355 } 356 } 357 } 358 359 private static boolean needsRex(Register reg) { 360 return reg.encoding >= MinEncodingNeedsRex; 361 } 362 363 protected final void prefix(AMD64Address adr) { 364 if (needsRex(adr.getBase())) { 365 if (needsRex(adr.getIndex())) { 366 emitByte(Prefix.REXXB); 367 } else { 368 emitByte(Prefix.REXB); 369 } 370 } else { 371 if (needsRex(adr.getIndex())) { 372 emitByte(Prefix.REXX); 373 } 374 } 375 } 376 377 protected final void prefixq(AMD64Address adr) { 378 if (needsRex(adr.getBase())) { 379 if (needsRex(adr.getIndex())) { 380 emitByte(Prefix.REXWXB); 381 } else { 382 emitByte(Prefix.REXWB); 383 } 384 } else { 385 if (needsRex(adr.getIndex())) { 386 emitByte(Prefix.REXWX); 387 } else { 388 emitByte(Prefix.REXW); 389 } 390 } 391 } 392 393 protected void prefixb(AMD64Address adr, Register reg) { 394 prefix(adr, reg, true); 395 } 396 397 protected void prefix(AMD64Address adr, Register reg) { 398 prefix(adr, reg, false); 399 } 400 401 protected void prefix(AMD64Address adr, Register reg, boolean byteinst) { 402 if (reg.encoding < 8) { 403 if (needsRex(adr.getBase())) { 404 if (needsRex(adr.getIndex())) { 405 emitByte(Prefix.REXXB); 406 } else { 407 emitByte(Prefix.REXB); 408 } 409 } else { 410 if (needsRex(adr.getIndex())) { 411 emitByte(Prefix.REXX); 412 } else if (byteinst && reg.encoding >= 4) { 413 emitByte(Prefix.REX); 414 } 415 } 416 } else { 417 if (needsRex(adr.getBase())) { 418 if (needsRex(adr.getIndex())) { 419 emitByte(Prefix.REXRXB); 420 } else { 421 emitByte(Prefix.REXRB); 422 } 423 } else { 424 if (needsRex(adr.getIndex())) { 425 emitByte(Prefix.REXRX); 426 } else { 427 emitByte(Prefix.REXR); 428 } 429 } 430 } 431 } 432 433 protected void prefixq(AMD64Address adr, Register src) { 434 if (src.encoding < 8) { 435 if (needsRex(adr.getBase())) { 436 if (needsRex(adr.getIndex())) { 437 emitByte(Prefix.REXWXB); 438 } else { 439 emitByte(Prefix.REXWB); 440 } 441 } else { 442 if (needsRex(adr.getIndex())) { 443 emitByte(Prefix.REXWX); 444 } else { 445 emitByte(Prefix.REXW); 446 } 447 } 448 } else { 449 if (needsRex(adr.getBase())) { 450 if (needsRex(adr.getIndex())) { 451 emitByte(Prefix.REXWRXB); 452 } else { 453 emitByte(Prefix.REXWRB); 454 } 455 } else { 456 if (needsRex(adr.getIndex())) { 457 emitByte(Prefix.REXWRX); 458 } else { 459 emitByte(Prefix.REXWR); 460 } 461 } 462 } 463 } 464 465 /** 466 * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a 467 * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm 468 * field. The X bit must be 0. 469 */ 470 protected static int getRXB(Register reg, Register rm) { 471 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 472 rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3; 473 return rxb; 474 } 475 476 /** 477 * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There 478 * are two cases for the memory operand:<br> 479 * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0. 480 * <br> 481 * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base. 482 */ 483 protected static int getRXB(Register reg, AMD64Address rm) { 484 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 485 if (!rm.getIndex().equals(Register.None)) { 486 rxb |= (rm.getIndex().encoding & 0x08) >> 2; 487 } 488 if (!rm.getBase().equals(Register.None)) { 489 rxb |= (rm.getBase().encoding & 0x08) >> 3; 490 } 491 return rxb; 492 } 493 494 /** 495 * Emit the ModR/M byte for one register operand and an opcode extension in the R field. 496 * <p> 497 * Format: [ 11 reg r/m ] 498 */ 499 protected final void emitModRM(int reg, Register rm) { 500 assert (reg & 0x07) == reg; 501 emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07)); 502 } 503 504 /** 505 * Emit the ModR/M byte for two register operands. 506 * <p> 507 * Format: [ 11 reg r/m ] 508 */ 509 protected final void emitModRM(Register reg, Register rm) { 510 emitModRM(reg.encoding & 0x07, rm); 511 } 512 513 /** 514 * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand. 515 * 516 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte 517 */ 518 protected final void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) { 519 assert !reg.equals(Register.None); 520 emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize, 1); 521 } 522 523 protected final void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) { 524 emitOperandHelper(reg, addr, false, additionalInstructionSize, 1); 525 } 526 527 protected final void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) { 528 assert !reg.equals(Register.None); 529 emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, 1); 530 } 531 532 protected final void emitEVEXOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize, int evexDisp8Scale) { 533 assert !reg.equals(Register.None); 534 emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, evexDisp8Scale); 535 } 536 537 /** 538 * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode 539 * extension in the R field. 540 * 541 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte 542 * @param additionalInstructionSize the number of bytes that will be emitted after the operand, 543 * so that the start position of the next instruction can be computed even though 544 * this instruction has not been completely emitted yet. 545 * @param evexDisp8Scale the scaling factor for computing the compressed displacement of 546 * EVEX-encoded instructions. This scaling factor only matters when the emitted 547 * instruction uses one-byte-displacement form. 548 */ 549 private void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize, int evexDisp8Scale) { 550 assert (reg & 0x07) == reg; 551 int regenc = reg << 3; 552 553 Register base = addr.getBase(); 554 Register index = addr.getIndex(); 555 556 Scale scale = addr.getScale(); 557 int disp = addr.getDisplacement(); 558 559 if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder() 560 // [00 000 101] disp32 561 assert index.equals(Register.None) : "cannot use RIP relative addressing with index register"; 562 emitByte(0x05 | regenc); 563 if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) { 564 codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize)); 565 } 566 emitInt(disp); 567 } else if (base.isValid()) { 568 boolean overriddenForce4Byte = force4Byte; 569 int baseenc = base.isValid() ? encode(base) : 0; 570 571 if (index.isValid()) { 572 int indexenc = encode(index) << 3; 573 // [base + indexscale + disp] 574 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 575 // [base + indexscale] 576 // [00 reg 100][ss index base] 577 assert !index.equals(rsp) : "illegal addressing mode"; 578 emitByte(0x04 | regenc); 579 emitByte(scale.log2 << 6 | indexenc | baseenc); 580 } else { 581 if (evexDisp8Scale > 1 && !overriddenForce4Byte) { 582 if (disp % evexDisp8Scale == 0) { 583 int newDisp = disp / evexDisp8Scale; 584 if (isByte(newDisp)) { 585 disp = newDisp; 586 assert isByte(disp) && !overriddenForce4Byte; 587 } 588 } else { 589 overriddenForce4Byte = true; 590 } 591 } 592 if (isByte(disp) && !overriddenForce4Byte) { 593 // [base + indexscale + imm8] 594 // [01 reg 100][ss index base] imm8 595 assert !index.equals(rsp) : "illegal addressing mode"; 596 emitByte(0x44 | regenc); 597 emitByte(scale.log2 << 6 | indexenc | baseenc); 598 emitByte(disp & 0xFF); 599 } else { 600 // [base + indexscale + disp32] 601 // [10 reg 100][ss index base] disp32 602 assert !index.equals(rsp) : "illegal addressing mode"; 603 emitByte(0x84 | regenc); 604 emitByte(scale.log2 << 6 | indexenc | baseenc); 605 emitInt(disp); 606 } 607 } 608 } else if (base.equals(rsp) || base.equals(r12)) { 609 // [rsp + disp] 610 if (disp == 0) { 611 // [rsp] 612 // [00 reg 100][00 100 100] 613 emitByte(0x04 | regenc); 614 emitByte(0x24); 615 } else { 616 if (evexDisp8Scale > 1 && !overriddenForce4Byte) { 617 if (disp % evexDisp8Scale == 0) { 618 int newDisp = disp / evexDisp8Scale; 619 if (isByte(newDisp)) { 620 disp = newDisp; 621 assert isByte(disp) && !overriddenForce4Byte; 622 } 623 } else { 624 overriddenForce4Byte = true; 625 } 626 } 627 if (isByte(disp) && !overriddenForce4Byte) { 628 // [rsp + imm8] 629 // [01 reg 100][00 100 100] disp8 630 emitByte(0x44 | regenc); 631 emitByte(0x24); 632 emitByte(disp & 0xFF); 633 } else { 634 // [rsp + imm32] 635 // [10 reg 100][00 100 100] disp32 636 emitByte(0x84 | regenc); 637 emitByte(0x24); 638 emitInt(disp); 639 } 640 } 641 } else { 642 // [base + disp] 643 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode"; 644 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 645 // [base] 646 // [00 reg base] 647 emitByte(0x00 | regenc | baseenc); 648 } else { 649 if (evexDisp8Scale > 1 && !overriddenForce4Byte) { 650 if (disp % evexDisp8Scale == 0) { 651 int newDisp = disp / evexDisp8Scale; 652 if (isByte(newDisp)) { 653 disp = newDisp; 654 assert isByte(disp) && !overriddenForce4Byte; 655 } 656 } else { 657 overriddenForce4Byte = true; 658 } 659 } 660 if (isByte(disp) && !overriddenForce4Byte) { 661 // [base + disp8] 662 // [01 reg base] disp8 663 emitByte(0x40 | regenc | baseenc); 664 emitByte(disp & 0xFF); 665 } else { 666 // [base + disp32] 667 // [10 reg base] disp32 668 emitByte(0x80 | regenc | baseenc); 669 emitInt(disp); 670 } 671 } 672 } 673 } else { 674 if (index.isValid()) { 675 int indexenc = encode(index) << 3; 676 // [indexscale + disp] 677 // [00 reg 100][ss index 101] disp32 678 assert !index.equals(rsp) : "illegal addressing mode"; 679 emitByte(0x04 | regenc); 680 emitByte(scale.log2 << 6 | indexenc | 0x05); 681 emitInt(disp); 682 } else { 683 // [disp] ABSOLUTE 684 // [00 reg 100][00 100 101] disp32 685 emitByte(0x04 | regenc); 686 emitByte(0x25); 687 emitInt(disp); 688 } 689 } 690 } 691 692 private interface SIMDEncoder { 693 694 void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW); 695 696 void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW); 697 698 } 699 700 private class SSEEncoderImpl implements SIMDEncoder { 701 702 @Override 703 public void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 704 assert (!nds.isValid()) || nds.equals(xreg); 705 if (sizePrefix > 0) { 706 emitByte(sizePrefix); 707 } 708 if (isRexW) { 709 prefixq(adr, xreg); 710 } else { 711 prefix(adr, xreg); 712 } 713 if (opcodeEscapePrefix > 0xFF) { 714 emitShort(opcodeEscapePrefix); 715 } else if (opcodeEscapePrefix > 0) { 716 emitByte(opcodeEscapePrefix); 717 } 718 } 719 720 @Override 721 public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 722 assert (!nds.isValid()) || nds.equals(dst) || nds.equals(src); 723 if (sizePrefix > 0) { 724 emitByte(sizePrefix); 725 } 726 if (isRexW) { 727 prefixq(dst, src); 728 } else { 729 prefix(dst, src); 730 } 731 if (opcodeEscapePrefix > 0xFF) { 732 emitShort(opcodeEscapePrefix); 733 } else if (opcodeEscapePrefix > 0) { 734 emitByte(opcodeEscapePrefix); 735 } 736 } 737 } 738 739 public static final class VEXPrefixConfig { 740 public static final int L128 = 0; 741 public static final int L256 = 1; 742 public static final int LZ = 0; 743 744 public static final int W0 = 0; 745 public static final int W1 = 1; 746 public static final int WIG = 0; 747 748 public static final int P_ = 0x0; 749 public static final int P_66 = 0x1; 750 public static final int P_F3 = 0x2; 751 public static final int P_F2 = 0x3; 752 753 public static final int M_0F = 0x1; 754 public static final int M_0F38 = 0x2; 755 public static final int M_0F3A = 0x3; 756 757 private VEXPrefixConfig() { 758 } 759 } 760 761 private class VEXEncoderImpl implements SIMDEncoder { 762 763 private int sizePrefixToPP(int sizePrefix) { 764 switch (sizePrefix) { 765 case 0x66: 766 return P_66; 767 case 0xF2: 768 return P_F2; 769 case 0xF3: 770 return P_F3; 771 default: 772 return P_; 773 } 774 } 775 776 private int opcodeEscapePrefixToMMMMM(int opcodeEscapePrefix) { 777 switch (opcodeEscapePrefix) { 778 case 0x0F: 779 return M_0F; 780 case 0x380F: 781 return M_0F38; 782 case 0x3A0F: 783 return M_0F3A; 784 default: 785 return 0; 786 } 787 } 788 789 @Override 790 public void simdPrefix(Register reg, Register nds, AMD64Address rm, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 791 assert reg.encoding < 16 : "encoding out of range: " + reg.encoding; 792 assert nds.encoding < 16 : "encoding out of range: " + nds.encoding; 793 emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(reg, rm), nds.isValid() ? nds.encoding : 0, true); 794 } 795 796 @Override 797 public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 798 assert dst.encoding < 16 : "encoding out of range: " + dst.encoding; 799 assert src.encoding < 16 : "encoding out of range: " + src.encoding; 800 assert nds.encoding < 16 : "encoding out of range: " + nds.encoding; 801 emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(dst, src), nds.isValid() ? nds.encoding : 0, true); 802 } 803 } 804 805 protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) { 806 simdEncoder.simdPrefix(xreg, nds, adr, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW); 807 } 808 809 protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int opcodeEscapePrefix, boolean isRexW) { 810 simdEncoder.simdPrefix(xreg, nds, adr, size.sizePrefix, opcodeEscapePrefix, isRexW); 811 } 812 813 protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) { 814 simdEncoder.simdPrefix(dst, nds, src, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW); 815 } 816 817 protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int opcodeEscapePrefix, boolean isRexW) { 818 simdEncoder.simdPrefix(dst, nds, src, size.sizePrefix, opcodeEscapePrefix, isRexW); 819 } 820 821 // @formatter:off 822 // 823 // Instruction Format and VEX illustrated below (optional []): 824 // 825 // #of bytes: 2,3 1 1 1 1,2,4 1 826 // [Prefixes] VEX OpCode ModR/M [SIB] [Disp8*N] [Immediate] 827 // [Disp16,32] 828 // 829 // VEX: 0xC4 | P1 | P2 830 // 831 // 7 6 5 4 3 2 1 0 832 // P1 R X B m m m m m P[ 7:0] 833 // P2 W v v v v L p p P[15:8] 834 // 835 // VEX: 0xC5 | B1 836 // 837 // 7 6 5 4 3 2 1 0 838 // P1 R v v v v L p p P[7:0] 839 // 840 // Figure. Bit Field Layout of the VEX Prefix 841 // 842 // Table. VEX Prefix Bit Field Functional Grouping 843 // 844 // Notation Bit field Group Position Comment 845 // ---------- ------------------------- -------- ------------------- 846 // VEX.RXB Next-8 register specifier P[7:5] Combine with ModR/M.reg, ModR/M.rm (base, index/vidx). 847 // VEX.R REX.R inverse P[7] Combine with EVEX.R and ModR/M.reg. 848 // VEX.X REX.X inverse P[6] Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent. 849 // VEX.B REX.B inverse P[5] 850 // VEX.mmmmmm 0F, 0F_38, 0F_3A encoding P[4:0] b01/0x0F, b10/0F_38, b11/0F_3A (all other reserved) 851 // 852 // VEX.W Opcode specific P[15] 853 // VEX.vvvv A register specifier P[14:11] In inverse form, b1111 if not used. 854 // P[6:3] 855 // VEX.L Vector length/RC P[10] b0/scalar or 128b vec, b1/256b vec. 856 // P[2] 857 // VEX.pp Compressed legacy prefix P[9:8] b00/None, b01/0x66, b10/0xF3, b11/0xF2 858 // P[1:0] 859 // @formatter:on 860 861 /** 862 * Low-level function to encode and emit the VEX prefix. 863 * <p> 864 * 2 byte form: [1100 0101] [R vvvv L pp]<br> 865 * 3 byte form: [1100 0100] [RXB m-mmmm] [W vvvv L pp] 866 * <p> 867 * The RXB and vvvv fields are stored in 1's complement in the prefix encoding. This function 868 * performs the 1s complement conversion, the caller is expected to pass plain unencoded 869 * arguments. 870 * <p> 871 * The pp field encodes an extension to the opcode:<br> 872 * 00: no extension<br> 873 * 01: 66<br> 874 * 10: F3<br> 875 * 11: F2 876 * <p> 877 * The m-mmmm field encodes the leading bytes of the opcode:<br> 878 * 00001: implied 0F leading opcode byte (default in 2-byte encoding)<br> 879 * 00010: implied 0F 38 leading opcode bytes<br> 880 * 00011: implied 0F 3A leading opcode bytes 881 * <p> 882 * This function automatically chooses the 2 or 3 byte encoding, based on the XBW flags and the 883 * m-mmmm field. 884 */ 885 protected final void emitVEX(int l, int pp, int mmmmm, int w, int rxb, int vvvv, boolean checkAVX) { 886 assert !checkAVX || ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX) : "emitting VEX prefix on a CPU without AVX support"; 887 888 assert l == L128 || l == L256 : "invalid value for VEX.L"; 889 assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for VEX.pp"; 890 assert mmmmm == M_0F || mmmmm == M_0F38 || mmmmm == M_0F3A : "invalid value for VEX.m-mmmm"; 891 assert w == W0 || w == W1 : "invalid value for VEX.W"; 892 893 assert (rxb & 0x07) == rxb : "invalid value for VEX.RXB"; 894 assert (vvvv & 0x0F) == vvvv : "invalid value for VEX.vvvv"; 895 896 int rxb1s = rxb ^ 0x07; 897 int vvvv1s = vvvv ^ 0x0F; 898 if ((rxb & 0x03) == 0 && w == WIG && mmmmm == M_0F) { 899 // 2 byte encoding 900 int byte2 = 0; 901 byte2 |= (rxb1s & 0x04) << 5; 902 byte2 |= vvvv1s << 3; 903 byte2 |= l << 2; 904 byte2 |= pp; 905 906 emitByte(Prefix.VEX2); 907 emitByte(byte2); 908 } else { 909 // 3 byte encoding 910 int byte2 = 0; 911 byte2 = (rxb1s & 0x07) << 5; 912 byte2 |= mmmmm; 913 914 int byte3 = 0; 915 byte3 |= w << 7; 916 byte3 |= vvvv1s << 3; 917 byte3 |= l << 2; 918 byte3 |= pp; 919 920 emitByte(Prefix.VEX3); 921 emitByte(byte2); 922 emitByte(byte3); 923 } 924 } 925 926 public static int getLFlag(AVXSize size) { 927 switch (size) { 928 case XMM: 929 return L128; 930 case YMM: 931 return L256; 932 case ZMM: 933 return L512; 934 default: 935 return LZ; 936 } 937 } 938 939 public final void vexPrefix(Register dst, Register nds, Register src, AVXSize size, int pp, int mmmmm, int w, boolean checkAVX) { 940 emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0, checkAVX); 941 } 942 943 public final void vexPrefix(Register dst, Register nds, AMD64Address src, AVXSize size, int pp, int mmmmm, int w, boolean checkAVX) { 944 emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0, checkAVX); 945 } 946 947 protected static final class EVEXPrefixConfig { 948 public static final int L512 = 2; 949 public static final int LIG = 0; 950 951 public static final int Z0 = 0x0; 952 public static final int Z1 = 0x1; 953 954 public static final int B0 = 0x0; 955 public static final int B1 = 0x1; 956 957 private EVEXPrefixConfig() { 958 } 959 } 960 961 private static final int NOT_SUPPORTED_VECTOR_LENGTH = -1; 962 963 /** 964 * EVEX-encoded instructions use a compressed displacement scheme by multiplying disp8 with a 965 * scaling factor N depending on the tuple type and the vector length. 966 * 967 * Reference: Intel Software Developer's Manual Volume 2, Section 2.6.5 968 */ 969 protected enum EVEXTuple { 970 FV_NO_BROADCAST_32BIT(16, 32, 64), 971 FV_BROADCAST_32BIT(4, 4, 4), 972 FV_NO_BROADCAST_64BIT(16, 32, 64), 973 FV_BROADCAST_64BIT(8, 8, 8), 974 HV_NO_BROADCAST_32BIT(8, 16, 32), 975 HV_BROADCAST_32BIT(4, 4, 4), 976 FVM(16, 32, 64), 977 T1S_8BIT(1, 1, 1), 978 T1S_16BIT(2, 2, 2), 979 T1S_32BIT(4, 4, 4), 980 T1S_64BIT(8, 8, 8), 981 T1F_32BIT(4, 4, 4), 982 T1F_64BIT(8, 8, 8), 983 T2_32BIT(8, 8, 8), 984 T2_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16), 985 T4_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16), 986 T4_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32), 987 T8_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32), 988 HVM(8, 16, 32), 989 QVM(4, 8, 16), 990 OVM(2, 4, 8), 991 M128(16, 16, 16), 992 DUP(8, 32, 64); 993 994 private final int scalingFactorVL128; 995 private final int scalingFactorVL256; 996 private final int scalingFactorVL512; 997 998 EVEXTuple(int scalingFactorVL128, int scalingFactorVL256, int scalingFactorVL512) { 999 this.scalingFactorVL128 = scalingFactorVL128; 1000 this.scalingFactorVL256 = scalingFactorVL256; 1001 this.scalingFactorVL512 = scalingFactorVL512; 1002 } 1003 1004 private static int verifyScalingFactor(int scalingFactor) { 1005 if (scalingFactor == NOT_SUPPORTED_VECTOR_LENGTH) { 1006 throw GraalError.shouldNotReachHere("Invalid scaling factor."); 1007 } 1008 return scalingFactor; 1009 } 1010 1011 public int getDisp8ScalingFactor(AVXSize size) { 1012 switch (size) { 1013 case XMM: 1014 return verifyScalingFactor(scalingFactorVL128); 1015 case YMM: 1016 return verifyScalingFactor(scalingFactorVL256); 1017 case ZMM: 1018 return verifyScalingFactor(scalingFactorVL512); 1019 default: 1020 throw GraalError.shouldNotReachHere("Unsupported vector size."); 1021 } 1022 } 1023 } 1024 1025 // @formatter:off 1026 // 1027 // Instruction Format and EVEX illustrated below (optional []): 1028 // 1029 // #of bytes: 4 1 1 1 1,2,4 1 1030 // [Prefixes] EVEX OpCode ModR/M [SIB] [Disp8*N] [Immediate] 1031 // [Disp16,32] 1032 // 1033 // The EVEX prefix is a 4-byte prefix, with the first two bytes derived from unused encoding 1034 // form of the 32-bit-mode-only BOUND instruction. The layout of the EVEX prefix is shown in 1035 // the figure below. The first byte must be 0x62, followed by three pay-load bytes, denoted 1036 // as P1, P2, and P3 individually or collectively as P[23:0] (see below). 1037 // 1038 // EVEX: 0x62 | P1 | P2 | P3 1039 // 1040 // 7 6 5 4 3 2 1 0 1041 // P1 R X B R' 0 0 m m P[ 7: 0] 1042 // P2 W v v v v 1 p p P[15: 8] 1043 // P3 z L' L b V' a a a P[23:16] 1044 // 1045 // Figure. Bit Field Layout of the EVEX Prefix 1046 // 1047 // Table. EVEX Prefix Bit Field Functional Grouping 1048 // 1049 // Notation Bit field Group Position Comment 1050 // --------- -------------------------- -------- ----------------------- 1051 // EVEX.RXB Next-8 register specifier P[7:5] Combine with ModR/M.reg, ModR/M.rm (base, index/vidx). 1052 // EVEX.X High-16 register specifier P[6] Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent. 1053 // EVEX.R' High-16 register specifier P[4] Combine with EVEX.R and ModR/M.reg. 1054 // -- Reserved P[3:2] Must be 0. 1055 // EVEX.mm Compressed legacy escape P[1:0] Identical to low two bits of VEX.mmmmm. 1056 // 1057 // EVEX.W Osize promotion/Opcode ext P[15] 1058 // EVEX.vvvv NDS register specifier P[14:11] Same as VEX.vvvv. 1059 // -- Fixed Value P[10] Must be 1. 1060 // EVEX.pp Compressed legacy prefix P[9:8] Identical to VEX.pp. 1061 // 1062 // EVEX.z Zeroing/Merging P[23] 1063 // EVEX.L'L Vector length/RC P[22:21] 1064 // EVEX.b Broadcast/RC/SAE Context P[20] 1065 // EVEX.V' High-16 NDS/VIDX register P[19] Combine with EVEX.vvvv or VSIB when present. 1066 // EVEX.aaa Embedded opmask register P[18:16] 1067 // 1068 // @formatter:on 1069 1070 /** 1071 * Low-level function to encode and emit the EVEX prefix. 1072 * <p> 1073 * 62 [0 1 1 0 0 0 1 0]<br> 1074 * P1 [R X B R'0 0 m m]<br> 1075 * P2 [W v v v v 1 p p]<br> 1076 * P3 [z L'L b V'a a a] 1077 * <p> 1078 * The pp field encodes an extension to the opcode:<br> 1079 * 00: no extension<br> 1080 * 01: 66<br> 1081 * 10: F3<br> 1082 * 11: F2 1083 * <p> 1084 * The mm field encodes the leading bytes of the opcode:<br> 1085 * 01: implied 0F leading opcode byte<br> 1086 * 10: implied 0F 38 leading opcode bytes<br> 1087 * 11: implied 0F 3A leading opcode bytes 1088 * <p> 1089 * The z field encodes the merging mode (merge or zero). 1090 * <p> 1091 * The b field encodes the source broadcast or data rounding modes. 1092 * <p> 1093 * The aaa field encodes the operand mask register. 1094 */ 1095 private void emitEVEX(int l, int pp, int mm, int w, int rxb, int reg, int vvvvv, int z, int b, int aaa) { 1096 assert ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX512F) : "emitting EVEX prefix on a CPU without AVX512 support"; 1097 1098 assert l == L128 || l == L256 || l == L512 : "invalid value for EVEX.L'L"; 1099 assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for EVEX.pp"; 1100 assert mm == M_0F || mm == M_0F38 || mm == M_0F3A : "invalid value for EVEX.mm"; 1101 assert w == W0 || w == W1 : "invalid value for EVEX.W"; 1102 1103 assert (rxb & 0x07) == rxb : "invalid value for EVEX.RXB"; 1104 assert (reg & 0x1F) == reg : "invalid value for EVEX.R'"; 1105 assert (vvvvv & 0x1F) == vvvvv : "invalid value for EVEX.V'vvvv"; 1106 1107 assert z == Z0 || z == Z1 : "invalid value for EVEX.z"; 1108 assert b == B0 || b == B1 : "invalid value for EVEX.b"; 1109 assert (aaa & 0x07) == aaa : "invalid value for EVEX.aaa"; 1110 1111 emitByte(Prefix.EVEX); 1112 int p1 = 0; 1113 p1 |= ((rxb ^ 0x07) & 0x07) << 5; 1114 p1 |= reg < 16 ? 0x10 : 0; 1115 p1 |= mm; 1116 emitByte(p1); 1117 1118 int p2 = 0; 1119 p2 |= w << 7; 1120 p2 |= ((vvvvv ^ 0x0F) & 0x0F) << 3; 1121 p2 |= 0x04; 1122 p2 |= pp; 1123 emitByte(p2); 1124 1125 int p3 = 0; 1126 p3 |= z << 7; 1127 p3 |= l << 5; 1128 p3 |= b << 4; 1129 p3 |= vvvvv < 16 ? 0x08 : 0; 1130 p3 |= aaa; 1131 emitByte(p3); 1132 } 1133 1134 /** 1135 * Get RXB bits for register-register instructions in EVEX-encoding, where ModRM.rm contains a 1136 * register index. The R bit extends the ModRM.reg field and the X and B bits extends the 1137 * ModRM.rm field. 1138 */ 1139 private static int getRXBForEVEX(Register reg, Register rm) { 1140 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 1141 rxb |= (rm == null ? 0 : rm.encoding & 0x018) >> 3; 1142 return rxb; 1143 } 1144 1145 /** 1146 * Helper method for emitting EVEX prefix in the form of RRRR. 1147 */ 1148 protected final void evexPrefix(Register dst, Register mask, Register nds, Register src, AVXSize size, int pp, int mm, int w, int z, int b) { 1149 assert !mask.isValid() || inRC(MASK, mask); 1150 emitEVEX(getLFlag(size), pp, mm, w, getRXBForEVEX(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0); 1151 } 1152 1153 /** 1154 * Helper method for emitting EVEX prefix in the form of RRRM. Because the memory addressing in 1155 * EVEX-encoded instructions employ a compressed displacement scheme when using disp8 form, the 1156 * user of this API should make sure to encode the operands using 1157 * {@link #emitEVEXOperandHelper(Register, AMD64Address, int, int)}. 1158 */ 1159 protected final void evexPrefix(Register dst, Register mask, Register nds, AMD64Address src, AVXSize size, int pp, int mm, int w, int z, int b) { 1160 assert !mask.isValid() || inRC(MASK, mask); 1161 emitEVEX(getLFlag(size), pp, mm, w, getRXB(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0); 1162 } 1163 1164 }