1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.asm.amd64; 26 27 import static jdk.vm.ci.amd64.AMD64.MASK; 28 import static jdk.vm.ci.amd64.AMD64.XMM; 29 import static jdk.vm.ci.amd64.AMD64.r12; 30 import static jdk.vm.ci.amd64.AMD64.r13; 31 import static jdk.vm.ci.amd64.AMD64.rbp; 32 import static jdk.vm.ci.amd64.AMD64.rsp; 33 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0; 34 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B1; 35 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.L512; 36 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0; 37 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1; 38 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128; 39 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256; 40 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ; 41 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F; 42 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38; 43 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A; 44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_; 45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66; 46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2; 47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3; 48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0; 49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1; 50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG; 51 import static org.graalvm.compiler.core.common.NumUtil.isByte; 52 53 import org.graalvm.compiler.asm.Assembler; 54 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 55 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize; 56 import org.graalvm.compiler.debug.GraalError; 57 58 import jdk.vm.ci.amd64.AMD64; 59 import jdk.vm.ci.amd64.AMD64.CPUFeature; 60 import jdk.vm.ci.amd64.AMD64Kind; 61 import jdk.vm.ci.code.Register; 62 import jdk.vm.ci.code.TargetDescription; 63 import jdk.vm.ci.meta.PlatformKind; 64 65 /** 66 * This class implements an assembler that can encode most X86 instructions. 67 */ 68 public abstract class AMD64BaseAssembler extends Assembler { 69 70 private final SIMDEncoder simdEncoder; 71 72 /** 73 * Constructs an assembler for the AMD64 architecture. 74 */ 75 public AMD64BaseAssembler(TargetDescription target) { 76 super(target); 77 78 if (supports(CPUFeature.AVX)) { 79 simdEncoder = new VEXEncoderImpl(); 80 } else { 81 simdEncoder = new SSEEncoderImpl(); 82 } 83 } 84 85 /** 86 * The x86 operand sizes. 87 */ 88 public enum OperandSize { 89 BYTE(1, AMD64Kind.BYTE) { 90 @Override 91 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 92 assert imm == (byte) imm; 93 asm.emitByte(imm); 94 } 95 96 @Override 97 protected int immediateSize() { 98 return 1; 99 } 100 }, 101 102 WORD(2, AMD64Kind.WORD, 0x66) { 103 @Override 104 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 105 assert imm == (short) imm; 106 asm.emitShort(imm); 107 } 108 109 @Override 110 protected int immediateSize() { 111 return 2; 112 } 113 }, 114 115 DWORD(4, AMD64Kind.DWORD) { 116 @Override 117 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 118 asm.emitInt(imm); 119 } 120 121 @Override 122 protected int immediateSize() { 123 return 4; 124 } 125 }, 126 127 QWORD(8, AMD64Kind.QWORD) { 128 @Override 129 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 130 asm.emitInt(imm); 131 } 132 133 @Override 134 protected int immediateSize() { 135 return 4; 136 } 137 }, 138 139 SS(4, AMD64Kind.SINGLE, 0xF3, true), 140 141 SD(8, AMD64Kind.DOUBLE, 0xF2, true), 142 143 PS(16, AMD64Kind.V128_SINGLE, true), 144 145 PD(16, AMD64Kind.V128_DOUBLE, 0x66, true); 146 147 private final int sizePrefix; 148 private final int bytes; 149 private final boolean xmm; 150 private final AMD64Kind kind; 151 152 OperandSize(int bytes, AMD64Kind kind) { 153 this(bytes, kind, 0); 154 } 155 156 OperandSize(int bytes, AMD64Kind kind, int sizePrefix) { 157 this(bytes, kind, sizePrefix, false); 158 } 159 160 OperandSize(int bytes, AMD64Kind kind, boolean xmm) { 161 this(bytes, kind, 0, xmm); 162 } 163 164 OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm) { 165 this.sizePrefix = sizePrefix; 166 this.bytes = bytes; 167 this.kind = kind; 168 this.xmm = xmm; 169 } 170 171 public int getSizePrefix() { 172 return sizePrefix; 173 } 174 175 public int getBytes() { 176 return bytes; 177 } 178 179 public boolean isXmmType() { 180 return xmm; 181 } 182 183 public AMD64Kind getKind() { 184 return kind; 185 } 186 187 public static OperandSize get(PlatformKind kind) { 188 for (OperandSize operandSize : OperandSize.values()) { 189 if (operandSize.kind.equals(kind)) { 190 return operandSize; 191 } 192 } 193 throw GraalError.shouldNotReachHere("Unexpected kind: " + kind.toString()); 194 } 195 196 /** 197 * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded 198 * as sign-extended 32-bit values. 199 * 200 * @param asm 201 * @param imm 202 */ 203 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 204 throw new UnsupportedOperationException(); 205 } 206 207 protected int immediateSize() { 208 throw new UnsupportedOperationException(); 209 } 210 } 211 212 public abstract static class OperandDataAnnotation extends CodeAnnotation { 213 /** 214 * The position (bytes from the beginning of the method) of the operand. 215 */ 216 public final int operandPosition; 217 /** 218 * The size of the operand, in bytes. 219 */ 220 public final int operandSize; 221 /** 222 * The position (bytes from the beginning of the method) of the next instruction. On AMD64, 223 * RIP-relative operands are relative to this position. 224 */ 225 public final int nextInstructionPosition; 226 227 OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) { 228 super(instructionPosition); 229 230 this.operandPosition = operandPosition; 231 this.operandSize = operandSize; 232 this.nextInstructionPosition = nextInstructionPosition; 233 } 234 235 @Override 236 public String toString() { 237 return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize; 238 } 239 } 240 241 /** 242 * Annotation that stores additional information about the displacement of a 243 * {@link Assembler#getPlaceholder placeholder address} that needs patching. 244 */ 245 protected static class AddressDisplacementAnnotation extends OperandDataAnnotation { 246 AddressDisplacementAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) { 247 super(instructionPosition, operandPosition, operandSize, nextInstructionPosition); 248 } 249 } 250 251 /** 252 * Annotation that stores additional information about the immediate operand, e.g., of a call 253 * instruction, that needs patching. 254 */ 255 protected static class ImmediateOperandAnnotation extends OperandDataAnnotation { 256 ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) { 257 super(instructionPosition, operandPosition, operandSize, nextInstructionPosition); 258 } 259 } 260 261 protected void annotatePatchingImmediate(int operandOffset, int operandSize) { 262 if (codePatchingAnnotationConsumer != null) { 263 int pos = position(); 264 codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(pos, pos + operandOffset, operandSize, pos + operandOffset + operandSize)); 265 } 266 } 267 268 public final boolean supports(CPUFeature feature) { 269 return ((AMD64) target.arch).getFeatures().contains(feature); 270 } 271 272 protected static int encode(Register r) { 273 assert r.encoding >= 0 && (r.getRegisterCategory().equals(XMM) ? r.encoding < 32 : r.encoding < 16) : "encoding out of range: " + r.encoding; 274 return r.encoding & 0x7; 275 } 276 277 private static final int MinEncodingNeedsRex = 8; 278 279 /** 280 * Constants for X86 prefix bytes. 281 */ 282 private static class Prefix { 283 private static final int REX = 0x40; 284 private static final int REXB = 0x41; 285 private static final int REXX = 0x42; 286 private static final int REXXB = 0x43; 287 private static final int REXR = 0x44; 288 private static final int REXRB = 0x45; 289 private static final int REXRX = 0x46; 290 private static final int REXRXB = 0x47; 291 private static final int REXW = 0x48; 292 private static final int REXWB = 0x49; 293 private static final int REXWX = 0x4A; 294 private static final int REXWXB = 0x4B; 295 private static final int REXWR = 0x4C; 296 private static final int REXWRB = 0x4D; 297 private static final int REXWRX = 0x4E; 298 private static final int REXWRXB = 0x4F; 299 } 300 301 protected final void rexw() { 302 emitByte(Prefix.REXW); 303 } 304 305 protected final void prefix(Register reg) { 306 prefix(reg, false); 307 } 308 309 protected final void prefix(Register reg, boolean byteinst) { 310 int regEnc = reg.encoding; 311 if (regEnc >= 8) { 312 emitByte(Prefix.REXB); 313 } else if (byteinst && regEnc >= 4) { 314 emitByte(Prefix.REX); 315 } 316 } 317 318 protected final void prefixq(Register reg) { 319 if (reg.encoding < 8) { 320 emitByte(Prefix.REXW); 321 } else { 322 emitByte(Prefix.REXWB); 323 } 324 } 325 326 protected final void prefix(Register dst, Register src) { 327 prefix(dst, false, src, false); 328 } 329 330 protected final void prefix(Register dst, boolean dstIsByte, Register src, boolean srcIsByte) { 331 int dstEnc = dst.encoding; 332 int srcEnc = src.encoding; 333 if (dstEnc < 8) { 334 if (srcEnc >= 8) { 335 emitByte(Prefix.REXB); 336 } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) { 337 emitByte(Prefix.REX); 338 } 339 } else { 340 if (srcEnc < 8) { 341 emitByte(Prefix.REXR); 342 } else { 343 emitByte(Prefix.REXRB); 344 } 345 } 346 } 347 348 /** 349 * Creates prefix for the operands. If the given operands exceed 3 bits, the 4th bit is encoded 350 * in the prefix. 351 */ 352 protected final void prefixq(Register reg, Register rm) { 353 int regEnc = reg.encoding; 354 int rmEnc = rm.encoding; 355 if (regEnc < 8) { 356 if (rmEnc < 8) { 357 emitByte(Prefix.REXW); 358 } else { 359 emitByte(Prefix.REXWB); 360 } 361 } else { 362 if (rmEnc < 8) { 363 emitByte(Prefix.REXWR); 364 } else { 365 emitByte(Prefix.REXWRB); 366 } 367 } 368 } 369 370 private static boolean needsRex(Register reg) { 371 return reg.encoding >= MinEncodingNeedsRex; 372 } 373 374 protected final void prefix(AMD64Address adr) { 375 if (needsRex(adr.getBase())) { 376 if (needsRex(adr.getIndex())) { 377 emitByte(Prefix.REXXB); 378 } else { 379 emitByte(Prefix.REXB); 380 } 381 } else { 382 if (needsRex(adr.getIndex())) { 383 emitByte(Prefix.REXX); 384 } 385 } 386 } 387 388 protected final void prefixq(AMD64Address adr) { 389 if (needsRex(adr.getBase())) { 390 if (needsRex(adr.getIndex())) { 391 emitByte(Prefix.REXWXB); 392 } else { 393 emitByte(Prefix.REXWB); 394 } 395 } else { 396 if (needsRex(adr.getIndex())) { 397 emitByte(Prefix.REXWX); 398 } else { 399 emitByte(Prefix.REXW); 400 } 401 } 402 } 403 404 protected void prefixb(AMD64Address adr, Register reg) { 405 prefix(adr, reg, true); 406 } 407 408 protected void prefix(AMD64Address adr, Register reg) { 409 prefix(adr, reg, false); 410 } 411 412 protected void prefix(AMD64Address adr, Register reg, boolean byteinst) { 413 if (reg.encoding < 8) { 414 if (needsRex(adr.getBase())) { 415 if (needsRex(adr.getIndex())) { 416 emitByte(Prefix.REXXB); 417 } else { 418 emitByte(Prefix.REXB); 419 } 420 } else { 421 if (needsRex(adr.getIndex())) { 422 emitByte(Prefix.REXX); 423 } else if (byteinst && reg.encoding >= 4) { 424 emitByte(Prefix.REX); 425 } 426 } 427 } else { 428 if (needsRex(adr.getBase())) { 429 if (needsRex(adr.getIndex())) { 430 emitByte(Prefix.REXRXB); 431 } else { 432 emitByte(Prefix.REXRB); 433 } 434 } else { 435 if (needsRex(adr.getIndex())) { 436 emitByte(Prefix.REXRX); 437 } else { 438 emitByte(Prefix.REXR); 439 } 440 } 441 } 442 } 443 444 protected void prefixq(AMD64Address adr, Register src) { 445 if (src.encoding < 8) { 446 if (needsRex(adr.getBase())) { 447 if (needsRex(adr.getIndex())) { 448 emitByte(Prefix.REXWXB); 449 } else { 450 emitByte(Prefix.REXWB); 451 } 452 } else { 453 if (needsRex(adr.getIndex())) { 454 emitByte(Prefix.REXWX); 455 } else { 456 emitByte(Prefix.REXW); 457 } 458 } 459 } else { 460 if (needsRex(adr.getBase())) { 461 if (needsRex(adr.getIndex())) { 462 emitByte(Prefix.REXWRXB); 463 } else { 464 emitByte(Prefix.REXWRB); 465 } 466 } else { 467 if (needsRex(adr.getIndex())) { 468 emitByte(Prefix.REXWRX); 469 } else { 470 emitByte(Prefix.REXWR); 471 } 472 } 473 } 474 } 475 476 /** 477 * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a 478 * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm 479 * field. The X bit must be 0. 480 */ 481 protected static int getRXB(Register reg, Register rm) { 482 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 483 rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3; 484 return rxb; 485 } 486 487 /** 488 * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There 489 * are two cases for the memory operand:<br> 490 * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0. 491 * <br> 492 * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base. 493 */ 494 protected static int getRXB(Register reg, AMD64Address rm) { 495 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 496 if (!rm.getIndex().equals(Register.None)) { 497 rxb |= (rm.getIndex().encoding & 0x08) >> 2; 498 } 499 if (!rm.getBase().equals(Register.None)) { 500 rxb |= (rm.getBase().encoding & 0x08) >> 3; 501 } 502 return rxb; 503 } 504 505 /** 506 * Emit the ModR/M byte for one register operand and an opcode extension in the R field. 507 * <p> 508 * Format: [ 11 reg r/m ] 509 */ 510 protected final void emitModRM(int reg, Register rm) { 511 assert (reg & 0x07) == reg; 512 emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07)); 513 } 514 515 /** 516 * Emit the ModR/M byte for two register operands. 517 * <p> 518 * Format: [ 11 reg r/m ] 519 */ 520 protected final void emitModRM(Register reg, Register rm) { 521 emitModRM(reg.encoding & 0x07, rm); 522 } 523 524 /** 525 * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand. 526 * 527 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte 528 */ 529 protected final void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) { 530 assert !reg.equals(Register.None); 531 emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize, 1); 532 } 533 534 protected final void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) { 535 emitOperandHelper(reg, addr, false, additionalInstructionSize, 1); 536 } 537 538 protected final void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) { 539 assert !reg.equals(Register.None); 540 emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, 1); 541 } 542 543 protected final void emitEVEXOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize, int evexDisp8Scale) { 544 assert !reg.equals(Register.None); 545 emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, evexDisp8Scale); 546 } 547 548 /** 549 * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode 550 * extension in the R field. 551 * 552 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte 553 * @param additionalInstructionSize the number of bytes that will be emitted after the operand, 554 * so that the start position of the next instruction can be computed even though 555 * this instruction has not been completely emitted yet. 556 * @param evexDisp8Scale the scaling factor for computing the compressed displacement of 557 * EVEX-encoded instructions. This scaling factor only matters when the emitted 558 * instruction uses one-byte-displacement form. 559 */ 560 private void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize, int evexDisp8Scale) { 561 assert (reg & 0x07) == reg; 562 int regenc = reg << 3; 563 564 Register base = addr.getBase(); 565 Register index = addr.getIndex(); 566 567 Scale scale = addr.getScale(); 568 int disp = addr.getDisplacement(); 569 570 if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder() 571 // [00 000 101] disp32 572 assert index.equals(Register.None) : "cannot use RIP relative addressing with index register"; 573 emitByte(0x05 | regenc); 574 if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) { 575 codePatchingAnnotationConsumer.accept(new AddressDisplacementAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize)); 576 } 577 emitInt(disp); 578 } else if (base.isValid()) { 579 boolean overriddenForce4Byte = force4Byte; 580 int baseenc = base.isValid() ? encode(base) : 0; 581 582 if (index.isValid()) { 583 int indexenc = encode(index) << 3; 584 // [base + indexscale + disp] 585 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 586 // [base + indexscale] 587 // [00 reg 100][ss index base] 588 assert !index.equals(rsp) : "illegal addressing mode"; 589 emitByte(0x04 | regenc); 590 emitByte(scale.log2 << 6 | indexenc | baseenc); 591 } else { 592 if (evexDisp8Scale > 1 && !overriddenForce4Byte) { 593 if (disp % evexDisp8Scale == 0) { 594 int newDisp = disp / evexDisp8Scale; 595 if (isByte(newDisp)) { 596 disp = newDisp; 597 assert isByte(disp) && !overriddenForce4Byte; 598 } 599 } else { 600 overriddenForce4Byte = true; 601 } 602 } 603 if (isByte(disp) && !overriddenForce4Byte) { 604 // [base + indexscale + imm8] 605 // [01 reg 100][ss index base] imm8 606 assert !index.equals(rsp) : "illegal addressing mode"; 607 emitByte(0x44 | regenc); 608 emitByte(scale.log2 << 6 | indexenc | baseenc); 609 emitByte(disp & 0xFF); 610 } else { 611 // [base + indexscale + disp32] 612 // [10 reg 100][ss index base] disp32 613 assert !index.equals(rsp) : "illegal addressing mode"; 614 emitByte(0x84 | regenc); 615 emitByte(scale.log2 << 6 | indexenc | baseenc); 616 emitInt(disp); 617 } 618 } 619 } else if (base.equals(rsp) || base.equals(r12)) { 620 // [rsp + disp] 621 if (disp == 0) { 622 // [rsp] 623 // [00 reg 100][00 100 100] 624 emitByte(0x04 | regenc); 625 emitByte(0x24); 626 } else { 627 if (evexDisp8Scale > 1 && !overriddenForce4Byte) { 628 if (disp % evexDisp8Scale == 0) { 629 int newDisp = disp / evexDisp8Scale; 630 if (isByte(newDisp)) { 631 disp = newDisp; 632 assert isByte(disp) && !overriddenForce4Byte; 633 } 634 } else { 635 overriddenForce4Byte = true; 636 } 637 } 638 if (isByte(disp) && !overriddenForce4Byte) { 639 // [rsp + imm8] 640 // [01 reg 100][00 100 100] disp8 641 emitByte(0x44 | regenc); 642 emitByte(0x24); 643 emitByte(disp & 0xFF); 644 } else { 645 // [rsp + imm32] 646 // [10 reg 100][00 100 100] disp32 647 emitByte(0x84 | regenc); 648 emitByte(0x24); 649 emitInt(disp); 650 } 651 } 652 } else { 653 // [base + disp] 654 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode"; 655 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 656 // [base] 657 // [00 reg base] 658 emitByte(0x00 | regenc | baseenc); 659 } else { 660 if (evexDisp8Scale > 1 && !overriddenForce4Byte) { 661 if (disp % evexDisp8Scale == 0) { 662 int newDisp = disp / evexDisp8Scale; 663 if (isByte(newDisp)) { 664 disp = newDisp; 665 assert isByte(disp) && !overriddenForce4Byte; 666 } 667 } else { 668 overriddenForce4Byte = true; 669 } 670 } 671 if (isByte(disp) && !overriddenForce4Byte) { 672 // [base + disp8] 673 // [01 reg base] disp8 674 emitByte(0x40 | regenc | baseenc); 675 emitByte(disp & 0xFF); 676 } else { 677 // [base + disp32] 678 // [10 reg base] disp32 679 emitByte(0x80 | regenc | baseenc); 680 emitInt(disp); 681 } 682 } 683 } 684 } else { 685 if (index.isValid()) { 686 int indexenc = encode(index) << 3; 687 // [indexscale + disp] 688 // [00 reg 100][ss index 101] disp32 689 assert !index.equals(rsp) : "illegal addressing mode"; 690 emitByte(0x04 | regenc); 691 emitByte(scale.log2 << 6 | indexenc | 0x05); 692 emitInt(disp); 693 } else { 694 // [disp] ABSOLUTE 695 // [00 reg 100][00 100 101] disp32 696 emitByte(0x04 | regenc); 697 emitByte(0x25); 698 emitInt(disp); 699 } 700 } 701 } 702 703 private interface SIMDEncoder { 704 705 void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW); 706 707 void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW); 708 709 } 710 711 private class SSEEncoderImpl implements SIMDEncoder { 712 713 @Override 714 public void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 715 if (sizePrefix > 0) { 716 emitByte(sizePrefix); 717 } 718 if (isRexW) { 719 prefixq(adr, xreg); 720 } else { 721 prefix(adr, xreg); 722 } 723 if (opcodeEscapePrefix > 0xFF) { 724 emitShort(opcodeEscapePrefix); 725 } else if (opcodeEscapePrefix > 0) { 726 emitByte(opcodeEscapePrefix); 727 } 728 } 729 730 @Override 731 public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 732 if (sizePrefix > 0) { 733 emitByte(sizePrefix); 734 } 735 if (isRexW) { 736 prefixq(dst, src); 737 } else { 738 prefix(dst, src); 739 } 740 if (opcodeEscapePrefix > 0xFF) { 741 emitShort(opcodeEscapePrefix); 742 } else if (opcodeEscapePrefix > 0) { 743 emitByte(opcodeEscapePrefix); 744 } 745 } 746 } 747 748 public static final class VEXPrefixConfig { 749 public static final int L128 = 0; 750 public static final int L256 = 1; 751 public static final int LZ = 0; 752 753 public static final int W0 = 0; 754 public static final int W1 = 1; 755 public static final int WIG = 0; 756 757 public static final int P_ = 0x0; 758 public static final int P_66 = 0x1; 759 public static final int P_F3 = 0x2; 760 public static final int P_F2 = 0x3; 761 762 public static final int M_0F = 0x1; 763 public static final int M_0F38 = 0x2; 764 public static final int M_0F3A = 0x3; 765 766 private VEXPrefixConfig() { 767 } 768 } 769 770 private class VEXEncoderImpl implements SIMDEncoder { 771 772 private int sizePrefixToPP(int sizePrefix) { 773 switch (sizePrefix) { 774 case 0x66: 775 return P_66; 776 case 0xF2: 777 return P_F2; 778 case 0xF3: 779 return P_F3; 780 default: 781 return P_; 782 } 783 } 784 785 private int opcodeEscapePrefixToMMMMM(int opcodeEscapePrefix) { 786 switch (opcodeEscapePrefix) { 787 case 0x0F: 788 return M_0F; 789 case 0x380F: 790 return M_0F38; 791 case 0x3A0F: 792 return M_0F3A; 793 default: 794 return 0; 795 } 796 } 797 798 @Override 799 public void simdPrefix(Register reg, Register nds, AMD64Address rm, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 800 emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(reg, rm), nds.isValid() ? nds.encoding : 0); 801 } 802 803 @Override 804 public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 805 emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(dst, src), nds.isValid() ? nds.encoding : 0); 806 } 807 } 808 809 protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) { 810 simdEncoder.simdPrefix(xreg, nds, adr, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW); 811 } 812 813 protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int opcodeEscapePrefix, boolean isRexW) { 814 simdEncoder.simdPrefix(xreg, nds, adr, size.sizePrefix, opcodeEscapePrefix, isRexW); 815 } 816 817 protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) { 818 simdEncoder.simdPrefix(dst, nds, src, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW); 819 } 820 821 protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int opcodeEscapePrefix, boolean isRexW) { 822 simdEncoder.simdPrefix(dst, nds, src, size.sizePrefix, opcodeEscapePrefix, isRexW); 823 } 824 825 /** 826 * Low-level function to encode and emit the VEX prefix. 827 * <p> 828 * 2 byte form: [1100 0101] [R vvvv L pp]<br> 829 * 3 byte form: [1100 0100] [RXB m-mmmm] [W vvvv L pp] 830 * <p> 831 * The RXB and vvvv fields are stored in 1's complement in the prefix encoding. This function 832 * performs the 1s complement conversion, the caller is expected to pass plain unencoded 833 * arguments. 834 * <p> 835 * The pp field encodes an extension to the opcode:<br> 836 * 00: no extension<br> 837 * 01: 66<br> 838 * 10: F3<br> 839 * 11: F2 840 * <p> 841 * The m-mmmm field encodes the leading bytes of the opcode:<br> 842 * 00001: implied 0F leading opcode byte (default in 2-byte encoding)<br> 843 * 00010: implied 0F 38 leading opcode bytes<br> 844 * 00011: implied 0F 3A leading opcode bytes 845 * <p> 846 * This function automatically chooses the 2 or 3 byte encoding, based on the XBW flags and the 847 * m-mmmm field. 848 */ 849 protected final void emitVEX(int l, int pp, int mmmmm, int w, int rxb, int vvvv) { 850 assert ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX) : "emitting VEX prefix on a CPU without AVX support"; 851 852 assert l == L128 || l == L256 : "invalid value for VEX.L"; 853 assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for VEX.pp"; 854 assert mmmmm == M_0F || mmmmm == M_0F38 || mmmmm == M_0F3A : "invalid value for VEX.m-mmmm"; 855 assert w == W0 || w == W1 : "invalid value for VEX.W"; 856 857 assert (rxb & 0x07) == rxb : "invalid value for VEX.RXB"; 858 assert (vvvv & 0x0F) == vvvv : "invalid value for VEX.vvvv"; 859 860 int rxb1s = rxb ^ 0x07; 861 int vvvv1s = vvvv ^ 0x0F; 862 if ((rxb & 0x03) == 0 && w == WIG && mmmmm == M_0F) { 863 // 2 byte encoding 864 int byte2 = 0; 865 byte2 |= (rxb1s & 0x04) << 5; 866 byte2 |= vvvv1s << 3; 867 byte2 |= l << 2; 868 byte2 |= pp; 869 870 emitByte(0xC5); 871 emitByte(byte2); 872 } else { 873 // 3 byte encoding 874 int byte2 = 0; 875 byte2 = (rxb1s & 0x07) << 5; 876 byte2 |= mmmmm; 877 878 int byte3 = 0; 879 byte3 |= w << 7; 880 byte3 |= vvvv1s << 3; 881 byte3 |= l << 2; 882 byte3 |= pp; 883 884 emitByte(0xC4); 885 emitByte(byte2); 886 emitByte(byte3); 887 } 888 } 889 890 public static int getLFlag(AVXSize size) { 891 switch (size) { 892 case XMM: 893 return L128; 894 case YMM: 895 return L256; 896 case ZMM: 897 return L512; 898 default: 899 return LZ; 900 } 901 } 902 903 public final void vexPrefix(Register dst, Register nds, Register src, AVXSize size, int pp, int mmmmm, int w) { 904 emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0); 905 } 906 907 public final void vexPrefix(Register dst, Register nds, AMD64Address src, AVXSize size, int pp, int mmmmm, int w) { 908 emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0); 909 } 910 911 protected static final class EVEXPrefixConfig { 912 public static final int L512 = 2; 913 public static final int LIG = 0; 914 915 public static final int Z0 = 0x0; 916 public static final int Z1 = 0x1; 917 918 public static final int B0 = 0x0; 919 public static final int B1 = 0x1; 920 921 private EVEXPrefixConfig() { 922 } 923 } 924 925 private static final int NOT_SUPPORTED_VECTOR_LENGTH = -1; 926 927 /** 928 * EVEX-encoded instructions use a compressed displacement scheme by multiplying disp8 with a 929 * scaling factor N depending on the tuple type and the vector length. 930 * 931 * Reference: Intel Software Developer's Manual Volume 2, Section 2.6.5 932 */ 933 protected enum EVEXTuple { 934 FV_NO_BROADCAST_32BIT(16, 32, 64), 935 FV_BROADCAST_32BIT(4, 4, 4), 936 FV_NO_BROADCAST_64BIT(16, 32, 64), 937 FV_BROADCAST_64BIT(8, 8, 8), 938 HV_NO_BROADCAST_32BIT(8, 16, 32), 939 HV_BROADCAST_32BIT(4, 4, 4), 940 FVM(16, 32, 64), 941 T1S_8BIT(1, 1, 1), 942 T1S_16BIT(2, 2, 2), 943 T1S_32BIT(4, 4, 4), 944 T1S_64BIT(8, 8, 8), 945 T1F_32BIT(4, 4, 4), 946 T1F_64BIT(8, 8, 8), 947 T2_32BIT(8, 8, 8), 948 T2_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16), 949 T4_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16), 950 T4_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32), 951 T8_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32), 952 HVM(8, 16, 32), 953 QVM(4, 8, 16), 954 OVM(2, 4, 8), 955 M128(16, 16, 16), 956 DUP(8, 32, 64); 957 958 private final int scalingFactorVL128; 959 private final int scalingFactorVL256; 960 private final int scalingFactorVL512; 961 962 EVEXTuple(int scalingFactorVL128, int scalingFactorVL256, int scalingFactorVL512) { 963 this.scalingFactorVL128 = scalingFactorVL128; 964 this.scalingFactorVL256 = scalingFactorVL256; 965 this.scalingFactorVL512 = scalingFactorVL512; 966 } 967 968 private static int verifyScalingFactor(int scalingFactor) { 969 if (scalingFactor == NOT_SUPPORTED_VECTOR_LENGTH) { 970 throw GraalError.shouldNotReachHere("Invalid scaling factor."); 971 } 972 return scalingFactor; 973 } 974 975 public int getDisp8ScalingFactor(AVXSize size) { 976 switch (size) { 977 case XMM: 978 return verifyScalingFactor(scalingFactorVL128); 979 case YMM: 980 return verifyScalingFactor(scalingFactorVL256); 981 case ZMM: 982 return verifyScalingFactor(scalingFactorVL512); 983 default: 984 throw GraalError.shouldNotReachHere("Unsupported vector size."); 985 } 986 } 987 } 988 989 /** 990 * Low-level function to encode and emit the EVEX prefix. 991 * <p> 992 * 62 [0 1 1 0 0 0 1 0]<br> 993 * P1 [R X B R'0 0 m m]<br> 994 * P2 [W v v v v 1 p p]<br> 995 * P3 [z L'L b V'a a a] 996 * <p> 997 * The pp field encodes an extension to the opcode:<br> 998 * 00: no extension<br> 999 * 01: 66<br> 1000 * 10: F3<br> 1001 * 11: F2 1002 * <p> 1003 * The mm field encodes the leading bytes of the opcode:<br> 1004 * 01: implied 0F leading opcode byte<br> 1005 * 10: implied 0F 38 leading opcode bytes<br> 1006 * 11: implied 0F 3A leading opcode bytes 1007 * <p> 1008 * The z field encodes the merging mode (merge or zero). 1009 * <p> 1010 * The b field encodes the source broadcast or data rounding modes. 1011 * <p> 1012 * The aaa field encodes the operand mask register. 1013 */ 1014 private void emitEVEX(int l, int pp, int mm, int w, int rxb, int reg, int vvvvv, int z, int b, int aaa) { 1015 assert ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX512F) : "emitting EVEX prefix on a CPU without AVX512 support"; 1016 1017 assert l == L128 || l == L256 || l == L512 : "invalid value for EVEX.L'L"; 1018 assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for EVEX.pp"; 1019 assert mm == M_0F || mm == M_0F38 || mm == M_0F3A : "invalid value for EVEX.mm"; 1020 assert w == W0 || w == W1 : "invalid value for EVEX.W"; 1021 1022 assert (rxb & 0x07) == rxb : "invalid value for EVEX.RXB"; 1023 assert (reg & 0x1F) == reg : "invalid value for EVEX.R'"; 1024 assert (vvvvv & 0x1F) == vvvvv : "invalid value for EVEX.vvvvv"; 1025 1026 assert z == Z0 || z == Z1 : "invalid value for EVEX.z"; 1027 assert b == B0 || b == B1 : "invalid value for EVEX.b"; 1028 assert (aaa & 0x07) == aaa : "invalid value for EVEX.aaa"; 1029 1030 emitByte(0x62); 1031 int p1 = 0; 1032 p1 |= ((rxb ^ 0x07) & 0x07) << 5; 1033 p1 |= reg < 16 ? 0x10 : 0; 1034 p1 |= mm; 1035 emitByte(p1); 1036 1037 int p2 = 0; 1038 p2 |= w << 7; 1039 p2 |= ((vvvvv ^ 0x0F) & 0x0F) << 3; 1040 p2 |= 0x4; 1041 p2 |= pp; 1042 emitByte(p2); 1043 1044 int p3 = 0; 1045 p3 |= z << 7; 1046 p3 |= l << 5; 1047 p3 |= b << 4; 1048 p3 |= vvvvv < 16 ? 0x08 : 0; 1049 p3 |= aaa; 1050 emitByte(p3); 1051 } 1052 1053 private static int getRXBForEVEX(Register reg, Register rm) { 1054 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 1055 rxb |= (rm == null ? 0 : rm.encoding & 0x018) >> 3; 1056 return rxb; 1057 } 1058 1059 /** 1060 * Helper method for emitting EVEX prefix in the form of RRRR. 1061 */ 1062 protected final void evexPrefix(Register dst, Register mask, Register nds, Register src, AVXSize size, int pp, int mm, int w, int z, int b) { 1063 assert !mask.isValid() || mask.getRegisterCategory().equals(MASK); 1064 emitEVEX(getLFlag(size), pp, mm, w, getRXBForEVEX(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0); 1065 } 1066 1067 /** 1068 * Helper method for emitting EVEX prefix in the form of RRRM. Because the memory addressing in 1069 * EVEX-encoded instructions employ a compressed displacement scheme when using disp8 form, the 1070 * user of this API should make sure to encode the operands using 1071 * {@link #emitEVEXOperandHelper(Register, AMD64Address, int, int)}. 1072 */ 1073 protected final void evexPrefix(Register dst, Register mask, Register nds, AMD64Address src, AVXSize size, int pp, int mm, int w, int z, int b) { 1074 assert !mask.isValid() || mask.getRegisterCategory().equals(MASK); 1075 emitEVEX(getLFlag(size), pp, mm, w, getRXB(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0); 1076 } 1077 1078 }