1 /* 2 * Copyright (c) 2009, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 package com.oracle.graal.asm.amd64; 24 25 import static com.oracle.graal.amd64.AMD64.*; 26 import static com.oracle.graal.api.code.MemoryBarriers.*; 27 import static com.oracle.graal.asm.NumUtil.*; 28 import static com.oracle.graal.asm.amd64.AMD64AsmOptions.*; 29 30 import com.oracle.graal.amd64.*; 31 import com.oracle.graal.amd64.AMD64.CPUFeature; 32 import com.oracle.graal.api.code.*; 33 import com.oracle.graal.asm.*; 34 35 /** 36 * This class implements an assembler that can encode most X86 instructions. 37 */ 38 public class AMD64Assembler extends Assembler { 39 40 private static final int MinEncodingNeedsRex = 8; 41 42 /** 43 * A sentinel value used as a place holder in an instruction stream for an address that will be 44 * patched. 45 */ 46 private static final AMD64Address Placeholder = new AMD64Address(rip); 47 48 /** 49 * The x86 condition codes used for conditional jumps/moves. 50 */ 51 public enum ConditionFlag { 52 Zero(0x4, "|zero|"), 53 NotZero(0x5, "|nzero|"), 54 Equal(0x4, "="), 55 NotEqual(0x5, "!="), 56 Less(0xc, "<"), 57 LessEqual(0xe, "<="), 58 Greater(0xf, ">"), 59 GreaterEqual(0xd, ">="), 60 Below(0x2, "|<|"), 61 BelowEqual(0x6, "|<=|"), 62 Above(0x7, "|>|"), 63 AboveEqual(0x3, "|>=|"), 64 Overflow(0x0, "|of|"), 65 NoOverflow(0x1, "|nof|"), 66 CarrySet(0x2, "|carry|"), 67 CarryClear(0x3, "|ncarry|"), 68 Negative(0x8, "|neg|"), 69 Positive(0x9, "|pos|"), 70 Parity(0xa, "|par|"), 71 NoParity(0xb, "|npar|"); 72 73 private final int value; 74 private final String operator; 75 76 private ConditionFlag(int value, String operator) { 77 this.value = value; 78 this.operator = operator; 79 } 80 81 public ConditionFlag negate() { 82 switch (this) { 83 case Zero: 84 return NotZero; 85 case NotZero: 86 return Zero; 87 case Equal: 88 return NotEqual; 89 case NotEqual: 90 return Equal; 91 case Less: 92 return GreaterEqual; 93 case LessEqual: 94 return Greater; 95 case Greater: 96 return LessEqual; 97 case GreaterEqual: 98 return Less; 99 case Below: 100 return AboveEqual; 101 case BelowEqual: 102 return Above; 103 case Above: 104 return BelowEqual; 105 case AboveEqual: 106 return Below; 107 case Overflow: 108 return NoOverflow; 109 case NoOverflow: 110 return Overflow; 111 case CarrySet: 112 return CarryClear; 113 case CarryClear: 114 return CarrySet; 115 case Negative: 116 return Positive; 117 case Positive: 118 return Negative; 119 case Parity: 120 return NoParity; 121 case NoParity: 122 return Parity; 123 } 124 throw new IllegalArgumentException(); 125 } 126 127 public int getValue() { 128 return value; 129 } 130 131 @Override 132 public String toString() { 133 return operator; 134 } 135 } 136 137 /** 138 * Constants for X86 prefix bytes. 139 */ 140 private static class Prefix { 141 142 private static final int REX = 0x40; 143 private static final int REXB = 0x41; 144 private static final int REXX = 0x42; 145 private static final int REXXB = 0x43; 146 private static final int REXR = 0x44; 147 private static final int REXRB = 0x45; 148 private static final int REXRX = 0x46; 149 private static final int REXRXB = 0x47; 150 private static final int REXW = 0x48; 151 private static final int REXWB = 0x49; 152 private static final int REXWX = 0x4A; 153 private static final int REXWXB = 0x4B; 154 private static final int REXWR = 0x4C; 155 private static final int REXWRB = 0x4D; 156 private static final int REXWRX = 0x4E; 157 private static final int REXWRXB = 0x4F; 158 } 159 160 /** 161 * The register to which {@link Register#Frame} and {@link Register#CallerFrame} are bound. 162 */ 163 public final Register frameRegister; 164 165 /** 166 * Constructs an assembler for the AMD64 architecture. 167 * 168 * @param registerConfig the register configuration used to bind {@link Register#Frame} and 169 * {@link Register#CallerFrame} to physical registers. This value can be null if this 170 * assembler instance will not be used to assemble instructions using these logical 171 * registers. 172 */ 173 public AMD64Assembler(TargetDescription target, RegisterConfig registerConfig) { 174 super(target); 175 this.frameRegister = registerConfig == null ? null : registerConfig.getFrameRegister(); 176 } 177 178 private boolean supports(CPUFeature feature) { 179 return ((AMD64) target.arch).getFeatures().contains(feature); 180 } 181 182 private static int encode(Register r) { 183 assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding; 184 return r.encoding & 0x7; 185 } 186 187 private void emitArithImm8(int op, Register dst, int imm8) { 188 int encode = prefixAndEncode(op, false, dst.encoding, true); 189 emitByte(0x80); 190 emitByte(0xC0 | encode); 191 emitByte(imm8); 192 } 193 194 private void emitArithImm16(int op, Register dst, int imm16) { 195 emitByte(0x66); 196 int encode = prefixAndEncode(op, dst.encoding); 197 if (isByte(imm16)) { 198 emitByte(0x83); // imm8 sign extend 199 emitByte(0xC0 | encode); 200 emitByte(imm16 & 0xFF); 201 } else { 202 emitByte(0x81); 203 emitByte(0xC0 | encode); 204 emitShort(imm16); 205 } 206 } 207 208 private void emitArithImm32(int op, Register dst, int imm32) { 209 int encode = prefixAndEncode(op, dst.encoding); 210 if (isByte(imm32)) { 211 emitByte(0x83); // imm8 sign extend 212 emitByte(0xC0 | encode); 213 emitByte(imm32 & 0xFF); 214 } else { 215 emitByte(0x81); 216 emitByte(0xC0 | encode); 217 emitInt(imm32); 218 } 219 } 220 221 private void emitArithImm32q(int op, Register dst, int imm32) { 222 emitArithImm32q(op, dst, imm32, false); 223 } 224 225 private void emitArithImm32q(int op, Register dst, int imm32, boolean force32Imm) { 226 int encode = prefixqAndEncode(op, dst.encoding); 227 if (isByte(imm32) && !force32Imm) { 228 emitByte(0x83); // imm8 sign extend 229 emitByte(0xC0 | encode); 230 emitByte(imm32 & 0xFF); 231 } else { 232 emitByte(0x81); 233 emitByte(0xC0 | encode); 234 emitInt(imm32); 235 } 236 } 237 238 // immediate-to-memory forms 239 private void emitArithImm8(int op, AMD64Address adr, int imm8) { 240 prefix(adr); 241 emitByte(0x80); 242 emitOperandHelper(op, adr); 243 emitByte(imm8); 244 } 245 246 private void emitArithImm16(int op, AMD64Address adr, int imm16) { 247 emitByte(0x66); 248 prefix(adr); 249 if (isByte(imm16)) { 250 emitByte(0x83); // imm8 sign extend 251 emitOperandHelper(op, adr); 252 emitByte(imm16 & 0xFF); 253 } else { 254 emitByte(0x81); 255 emitOperandHelper(op, adr); 256 emitShort(imm16); 257 } 258 } 259 260 private void emitArithImm32(int op, AMD64Address adr, int imm32) { 261 prefix(adr); 262 if (isByte(imm32)) { 263 emitByte(0x83); // imm8 sign extend 264 emitOperandHelper(op, adr); 265 emitByte(imm32 & 0xFF); 266 } else { 267 emitByte(0x81); 268 emitOperandHelper(op, adr); 269 emitInt(imm32); 270 } 271 } 272 273 protected void emitOperandHelper(Register reg, AMD64Address addr) { 274 assert !reg.equals(Register.None); 275 emitOperandHelper(encode(reg), addr); 276 } 277 278 protected void emitOperandHelper(int reg, AMD64Address addr) { 279 assert (reg & 0x07) == reg; 280 int regenc = reg << 3; 281 282 Register base = addr.getBase(); 283 Register index = addr.getIndex(); 284 285 AMD64Address.Scale scale = addr.getScale(); 286 int disp = addr.getDisplacement(); 287 288 if (base.equals(Register.Frame)) { 289 assert frameRegister != null : "cannot use register " + Register.Frame + " in assembler with null register configuration"; 290 base = frameRegister; 291 } 292 293 if (base.equals(AMD64.rip)) { // also matches Placeholder 294 // [00 000 101] disp32 295 assert index.equals(Register.None) : "cannot use RIP relative addressing with index register"; 296 emitByte(0x05 | regenc); 297 emitInt(disp); 298 } else if (base.isValid()) { 299 int baseenc = base.isValid() ? encode(base) : 0; 300 if (index.isValid()) { 301 int indexenc = encode(index) << 3; 302 // [base + indexscale + disp] 303 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 304 // [base + indexscale] 305 // [00 reg 100][ss index base] 306 assert !index.equals(rsp) : "illegal addressing mode"; 307 emitByte(0x04 | regenc); 308 emitByte(scale.log2 << 6 | indexenc | baseenc); 309 } else if (isByte(disp)) { 310 // [base + indexscale + imm8] 311 // [01 reg 100][ss index base] imm8 312 assert !index.equals(rsp) : "illegal addressing mode"; 313 emitByte(0x44 | regenc); 314 emitByte(scale.log2 << 6 | indexenc | baseenc); 315 emitByte(disp & 0xFF); 316 } else { 317 // [base + indexscale + disp32] 318 // [10 reg 100][ss index base] disp32 319 assert !index.equals(rsp) : "illegal addressing mode"; 320 emitByte(0x84 | regenc); 321 emitByte(scale.log2 << 6 | indexenc | baseenc); 322 emitInt(disp); 323 } 324 } else if (base.equals(rsp) || base.equals(r12)) { 325 // [rsp + disp] 326 if (disp == 0) { 327 // [rsp] 328 // [00 reg 100][00 100 100] 329 emitByte(0x04 | regenc); 330 emitByte(0x24); 331 } else if (isByte(disp)) { 332 // [rsp + imm8] 333 // [01 reg 100][00 100 100] disp8 334 emitByte(0x44 | regenc); 335 emitByte(0x24); 336 emitByte(disp & 0xFF); 337 } else { 338 // [rsp + imm32] 339 // [10 reg 100][00 100 100] disp32 340 emitByte(0x84 | regenc); 341 emitByte(0x24); 342 emitInt(disp); 343 } 344 } else { 345 // [base + disp] 346 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode"; 347 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 348 // [base] 349 // [00 reg base] 350 emitByte(0x00 | regenc | baseenc); 351 } else if (isByte(disp)) { 352 // [base + disp8] 353 // [01 reg base] disp8 354 emitByte(0x40 | regenc | baseenc); 355 emitByte(disp & 0xFF); 356 } else { 357 // [base + disp32] 358 // [10 reg base] disp32 359 emitByte(0x80 | regenc | baseenc); 360 emitInt(disp); 361 } 362 } 363 } else { 364 if (index.isValid()) { 365 int indexenc = encode(index) << 3; 366 // [indexscale + disp] 367 // [00 reg 100][ss index 101] disp32 368 assert !index.equals(rsp) : "illegal addressing mode"; 369 emitByte(0x04 | regenc); 370 emitByte(scale.log2 << 6 | indexenc | 0x05); 371 emitInt(disp); 372 } else { 373 // [disp] ABSOLUTE 374 // [00 reg 100][00 100 101] disp32 375 emitByte(0x04 | regenc); 376 emitByte(0x25); 377 emitInt(disp); 378 } 379 } 380 } 381 382 public final void addl(AMD64Address dst, int imm32) { 383 emitArithImm32(0, dst, imm32); 384 } 385 386 public final void addl(Register dst, int imm32) { 387 emitArithImm32(0, dst, imm32); 388 } 389 390 public final void addl(Register dst, AMD64Address src) { 391 prefix(src, dst); 392 emitByte(0x03); 393 emitOperandHelper(dst, src); 394 } 395 396 public final void addl(Register dst, Register src) { 397 int encode = prefixAndEncode(dst.encoding, src.encoding); 398 emitByte(0x03); 399 emitByte(0xC0 | encode); 400 } 401 402 private void addrNop4() { 403 // 4 bytes: NOP DWORD PTR [EAX+0] 404 emitByte(0x0F); 405 emitByte(0x1F); 406 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc); 407 emitByte(0); // 8-bits offset (1 byte) 408 } 409 410 private void addrNop5() { 411 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 412 emitByte(0x0F); 413 emitByte(0x1F); 414 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4); 415 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 416 emitByte(0); // 8-bits offset (1 byte) 417 } 418 419 private void addrNop7() { 420 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 421 emitByte(0x0F); 422 emitByte(0x1F); 423 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc); 424 emitInt(0); // 32-bits offset (4 bytes) 425 } 426 427 private void addrNop8() { 428 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 429 emitByte(0x0F); 430 emitByte(0x1F); 431 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4); 432 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 433 emitInt(0); // 32-bits offset (4 bytes) 434 } 435 436 public final void addsd(Register dst, Register src) { 437 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 438 emitByte(0xF2); 439 int encode = prefixAndEncode(dst.encoding, src.encoding); 440 emitByte(0x0F); 441 emitByte(0x58); 442 emitByte(0xC0 | encode); 443 } 444 445 public final void addsd(Register dst, AMD64Address src) { 446 assert dst.getRegisterCategory().equals(AMD64.XMM); 447 emitByte(0xF2); 448 prefix(src, dst); 449 emitByte(0x0F); 450 emitByte(0x58); 451 emitOperandHelper(dst, src); 452 } 453 454 public final void addss(Register dst, Register src) { 455 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 456 emitByte(0xF3); 457 int encode = prefixAndEncode(dst.encoding, src.encoding); 458 emitByte(0x0F); 459 emitByte(0x58); 460 emitByte(0xC0 | encode); 461 } 462 463 public final void addss(Register dst, AMD64Address src) { 464 assert dst.getRegisterCategory().equals(AMD64.XMM); 465 emitByte(0xF3); 466 prefix(src, dst); 467 emitByte(0x0F); 468 emitByte(0x58); 469 emitOperandHelper(dst, src); 470 } 471 472 public final void andl(Register dst, int imm32) { 473 emitArithImm32(4, dst, imm32); 474 } 475 476 public final void andl(Register dst, AMD64Address src) { 477 prefix(src, dst); 478 emitByte(0x23); 479 emitOperandHelper(dst, src); 480 } 481 482 public final void andl(Register dst, Register src) { 483 int encode = prefixAndEncode(dst.encoding, src.encoding); 484 emitByte(0x23); 485 emitByte(0xC0 | encode); 486 } 487 488 public final void bsfq(Register dst, Register src) { 489 int encode = prefixqAndEncode(dst.encoding, src.encoding); 490 emitByte(0x0F); 491 emitByte(0xBC); 492 emitByte(0xC0 | encode); 493 } 494 495 public final void bsfq(Register dst, AMD64Address src) { 496 prefixq(src, dst); 497 emitByte(0x0F); 498 emitByte(0xBC); 499 emitOperandHelper(dst, src); 500 } 501 502 public final void bsrq(Register dst, Register src) { 503 int encode = prefixqAndEncode(dst.encoding, src.encoding); 504 emitByte(0x0F); 505 emitByte(0xBD); 506 emitByte(0xC0 | encode); 507 } 508 509 public final void bsrq(Register dst, AMD64Address src) { 510 prefixq(src, dst); 511 emitByte(0x0F); 512 emitByte(0xBD); 513 emitOperandHelper(dst, src); 514 } 515 516 public final void bsrl(Register dst, Register src) { 517 int encode = prefixAndEncode(dst.encoding, src.encoding); 518 emitByte(0x0F); 519 emitByte(0xBD); 520 emitByte(0xC0 | encode); 521 } 522 523 public final void bsrl(Register dst, AMD64Address src) { 524 prefix(src, dst); 525 emitByte(0x0F); 526 emitByte(0xBD); 527 emitOperandHelper(dst, src); 528 } 529 530 public final void bswapl(Register reg) { 531 int encode = prefixAndEncode(reg.encoding); 532 emitByte(0x0F); 533 emitByte(0xC8 | encode); 534 } 535 536 public final void cdql() { 537 emitByte(0x99); 538 } 539 540 public final void cmovl(ConditionFlag cc, Register dst, Register src) { 541 int encode = prefixAndEncode(dst.encoding, src.encoding); 542 emitByte(0x0F); 543 emitByte(0x40 | cc.getValue()); 544 emitByte(0xC0 | encode); 545 } 546 547 public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) { 548 prefix(src, dst); 549 emitByte(0x0F); 550 emitByte(0x40 | cc.getValue()); 551 emitOperandHelper(dst, src); 552 } 553 554 public final void cmpb(Register dst, int imm8) { 555 emitArithImm8(7, dst, imm8); 556 } 557 558 public final void cmpb(Register dst, Register src) { 559 int encode = prefixAndEncode(dst.encoding, true, src.encoding, true); 560 emitByte(0x3A); 561 emitByte(0xC0 | encode); 562 } 563 564 public final void cmpb(Register dst, AMD64Address src) { 565 prefix(src, dst, true); 566 emitByte(0x3A); 567 emitOperandHelper(dst, src); 568 } 569 570 public final void cmpb(AMD64Address dst, int imm8) { 571 emitArithImm8(7, dst, imm8); 572 } 573 574 public final void cmpw(Register dst, int imm16) { 575 emitArithImm16(7, dst, imm16); 576 } 577 578 public final void cmpw(Register dst, Register src) { 579 emitByte(0x66); 580 int encode = prefixAndEncode(dst.encoding, src.encoding); 581 emitByte(0x3B); 582 emitByte(0xC0 | encode); 583 } 584 585 public final void cmpw(Register dst, AMD64Address src) { 586 emitByte(0x66); 587 prefix(src, dst); 588 emitByte(0x3B); 589 emitOperandHelper(dst, src); 590 } 591 592 public final void cmpw(AMD64Address dst, int imm16) { 593 emitArithImm16(7, dst, imm16); 594 } 595 596 public final void cmpl(Register dst, int imm32) { 597 emitArithImm32(7, dst, imm32); 598 } 599 600 public final void cmpl(Register dst, Register src) { 601 int encode = prefixAndEncode(dst.encoding, src.encoding); 602 emitByte(0x3B); 603 emitByte(0xC0 | encode); 604 } 605 606 public final void cmpl(Register dst, AMD64Address src) { 607 prefix(src, dst); 608 emitByte(0x3B); 609 emitOperandHelper(dst, src); 610 } 611 612 public final void cmpl(AMD64Address dst, int imm32) { 613 emitArithImm32(7, dst, imm32); 614 } 615 616 // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax, 617 // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,. 618 // The ZF is set if the compared values were equal, and cleared otherwise. 619 public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg 620 prefix(adr, reg); 621 emitByte(0x0F); 622 emitByte(0xB1); 623 emitOperandHelper(reg, adr); 624 } 625 626 public final void cvtsd2ss(Register dst, AMD64Address src) { 627 assert dst.getRegisterCategory().equals(AMD64.XMM); 628 emitByte(0xF2); 629 prefix(src, dst); 630 emitByte(0x0F); 631 emitByte(0x5A); 632 emitOperandHelper(dst, src); 633 } 634 635 public final void cvtsd2ss(Register dst, Register src) { 636 assert dst.getRegisterCategory().equals(AMD64.XMM); 637 assert src.getRegisterCategory().equals(AMD64.XMM); 638 emitByte(0xF2); 639 int encode = prefixAndEncode(dst.encoding, src.encoding); 640 emitByte(0x0F); 641 emitByte(0x5A); 642 emitByte(0xC0 | encode); 643 } 644 645 public final void cvtsi2sdl(Register dst, AMD64Address src) { 646 assert dst.getRegisterCategory().equals(AMD64.XMM); 647 emitByte(0xF2); 648 prefix(src, dst); 649 emitByte(0x0F); 650 emitByte(0x2A); 651 emitOperandHelper(dst, src); 652 } 653 654 public final void cvtsi2sdl(Register dst, Register src) { 655 assert dst.getRegisterCategory().equals(AMD64.XMM); 656 emitByte(0xF2); 657 int encode = prefixAndEncode(dst.encoding, src.encoding); 658 emitByte(0x0F); 659 emitByte(0x2A); 660 emitByte(0xC0 | encode); 661 } 662 663 public final void cvtsi2ssl(Register dst, AMD64Address src) { 664 assert dst.getRegisterCategory().equals(AMD64.XMM); 665 emitByte(0xF3); 666 prefix(src, dst); 667 emitByte(0x0F); 668 emitByte(0x2A); 669 emitOperandHelper(dst, src); 670 } 671 672 public final void cvtsi2ssl(Register dst, Register src) { 673 assert dst.getRegisterCategory().equals(AMD64.XMM); 674 emitByte(0xF3); 675 int encode = prefixAndEncode(dst.encoding, src.encoding); 676 emitByte(0x0F); 677 emitByte(0x2A); 678 emitByte(0xC0 | encode); 679 } 680 681 public final void cvtss2sd(Register dst, AMD64Address src) { 682 assert dst.getRegisterCategory().equals(AMD64.XMM); 683 emitByte(0xF3); 684 prefix(src, dst); 685 emitByte(0x0F); 686 emitByte(0x5A); 687 emitOperandHelper(dst, src); 688 } 689 690 public final void cvtss2sd(Register dst, Register src) { 691 assert dst.getRegisterCategory().equals(AMD64.XMM); 692 assert src.getRegisterCategory().equals(AMD64.XMM); 693 emitByte(0xF3); 694 int encode = prefixAndEncode(dst.encoding, src.encoding); 695 emitByte(0x0F); 696 emitByte(0x5A); 697 emitByte(0xC0 | encode); 698 } 699 700 public final void cvttsd2sil(Register dst, AMD64Address src) { 701 emitByte(0xF2); 702 prefix(src, dst); 703 emitByte(0x0F); 704 emitByte(0x2C); 705 emitOperandHelper(dst, src); 706 } 707 708 public final void cvttsd2sil(Register dst, Register src) { 709 assert src.getRegisterCategory().equals(AMD64.XMM); 710 emitByte(0xF2); 711 int encode = prefixAndEncode(dst.encoding, src.encoding); 712 emitByte(0x0F); 713 emitByte(0x2C); 714 emitByte(0xC0 | encode); 715 } 716 717 public final void cvttss2sil(Register dst, AMD64Address src) { 718 emitByte(0xF3); 719 prefix(src, dst); 720 emitByte(0x0F); 721 emitByte(0x2C); 722 emitOperandHelper(dst, src); 723 } 724 725 public final void cvttss2sil(Register dst, Register src) { 726 assert src.getRegisterCategory().equals(AMD64.XMM); 727 emitByte(0xF3); 728 int encode = prefixAndEncode(dst.encoding, src.encoding); 729 emitByte(0x0F); 730 emitByte(0x2C); 731 emitByte(0xC0 | encode); 732 } 733 734 protected final void decl(AMD64Address dst) { 735 prefix(dst); 736 emitByte(0xFF); 737 emitOperandHelper(1, dst); 738 } 739 740 public final void divsd(Register dst, AMD64Address src) { 741 assert dst.getRegisterCategory().equals(AMD64.XMM); 742 emitByte(0xF2); 743 prefix(src, dst); 744 emitByte(0x0F); 745 emitByte(0x5E); 746 emitOperandHelper(dst, src); 747 } 748 749 public final void divsd(Register dst, Register src) { 750 assert dst.getRegisterCategory().equals(AMD64.XMM); 751 assert src.getRegisterCategory().equals(AMD64.XMM); 752 emitByte(0xF2); 753 int encode = prefixAndEncode(dst.encoding, src.encoding); 754 emitByte(0x0F); 755 emitByte(0x5E); 756 emitByte(0xC0 | encode); 757 } 758 759 public final void divss(Register dst, AMD64Address src) { 760 assert dst.getRegisterCategory().equals(AMD64.XMM); 761 emitByte(0xF3); 762 prefix(src, dst); 763 emitByte(0x0F); 764 emitByte(0x5E); 765 emitOperandHelper(dst, src); 766 } 767 768 public final void divss(Register dst, Register src) { 769 assert dst.getRegisterCategory().equals(AMD64.XMM); 770 assert src.getRegisterCategory().equals(AMD64.XMM); 771 emitByte(0xF3); 772 int encode = prefixAndEncode(dst.encoding, src.encoding); 773 emitByte(0x0F); 774 emitByte(0x5E); 775 emitByte(0xC0 | encode); 776 } 777 778 public final void hlt() { 779 emitByte(0xF4); 780 } 781 782 public final void idivl(Register src) { 783 int encode = prefixAndEncode(7, src.encoding); 784 emitByte(0xF7); 785 emitByte(0xC0 | encode); 786 } 787 788 public final void divl(Register src) { 789 int encode = prefixAndEncode(6, src.encoding); 790 emitByte(0xF7); 791 emitByte(0xC0 | encode); 792 } 793 794 public final void mull(Register src) { 795 int encode = prefixAndEncode(4, src.encoding); 796 emitByte(0xF7); 797 emitByte(0xC0 | encode); 798 } 799 800 public final void mull(AMD64Address src) { 801 prefix(src); 802 emitByte(0xF7); 803 emitOperandHelper(4, src); 804 } 805 806 public final void imull(Register src) { 807 int encode = prefixAndEncode(5, src.encoding); 808 emitByte(0xF7); 809 emitByte(0xC0 | encode); 810 } 811 812 public final void imull(AMD64Address src) { 813 prefix(src); 814 emitByte(0xF7); 815 emitOperandHelper(5, src); 816 } 817 818 public final void imull(Register dst, Register src) { 819 int encode = prefixAndEncode(dst.encoding, src.encoding); 820 emitByte(0x0F); 821 emitByte(0xAF); 822 emitByte(0xC0 | encode); 823 } 824 825 public final void imull(Register dst, AMD64Address src) { 826 prefix(src, dst); 827 emitByte(0x0F); 828 emitByte(0xAF); 829 emitOperandHelper(dst, src); 830 } 831 832 public final void imull(Register dst, Register src, int value) { 833 int encode = prefixAndEncode(dst.encoding, src.encoding); 834 if (isByte(value)) { 835 emitByte(0x6B); 836 emitByte(0xC0 | encode); 837 emitByte(value & 0xFF); 838 } else { 839 emitByte(0x69); 840 emitByte(0xC0 | encode); 841 emitInt(value); 842 } 843 } 844 845 protected final void incl(AMD64Address dst) { 846 prefix(dst); 847 emitByte(0xFF); 848 emitOperandHelper(0, dst); 849 } 850 851 public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { 852 int shortSize = 2; 853 int longSize = 6; 854 long disp = jumpTarget - position(); 855 if (!forceDisp32 && isByte(disp - shortSize)) { 856 // 0111 tttn #8-bit disp 857 emitByte(0x70 | cc.getValue()); 858 emitByte((int) ((disp - shortSize) & 0xFF)); 859 } else { 860 // 0000 1111 1000 tttn #32-bit disp 861 assert isInt(disp - longSize) : "must be 32bit offset (call4)"; 862 emitByte(0x0F); 863 emitByte(0x80 | cc.getValue()); 864 emitInt((int) (disp - longSize)); 865 } 866 } 867 868 public final void jcc(ConditionFlag cc, Label l) { 869 assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc"; 870 if (l.isBound()) { 871 jcc(cc, l.position(), false); 872 } else { 873 // Note: could eliminate cond. jumps to this jump if condition 874 // is the same however, seems to be rather unlikely case. 875 // Note: use jccb() if label to be bound is very close to get 876 // an 8-bit displacement 877 l.addPatchAt(position()); 878 emitByte(0x0F); 879 emitByte(0x80 | cc.getValue()); 880 emitInt(0); 881 } 882 883 } 884 885 public final void jccb(ConditionFlag cc, Label l) { 886 if (l.isBound()) { 887 int shortSize = 2; 888 int entry = l.position(); 889 assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp"; 890 long disp = entry - position(); 891 // 0111 tttn #8-bit disp 892 emitByte(0x70 | cc.getValue()); 893 emitByte((int) ((disp - shortSize) & 0xFF)); 894 } else { 895 l.addPatchAt(position()); 896 emitByte(0x70 | cc.getValue()); 897 emitByte(0); 898 } 899 } 900 901 public final void jmp(int jumpTarget, boolean forceDisp32) { 902 int shortSize = 2; 903 int longSize = 5; 904 long disp = jumpTarget - position(); 905 if (!forceDisp32 && isByte(disp - shortSize)) { 906 emitByte(0xEB); 907 emitByte((int) ((disp - shortSize) & 0xFF)); 908 } else { 909 emitByte(0xE9); 910 emitInt((int) (disp - longSize)); 911 } 912 } 913 914 @Override 915 public final void jmp(Label l) { 916 if (l.isBound()) { 917 jmp(l.position(), false); 918 } else { 919 // By default, forward jumps are always 32-bit displacements, since 920 // we can't yet know where the label will be bound. If you're sure that 921 // the forward jump will not run beyond 256 bytes, use jmpb to 922 // force an 8-bit displacement. 923 924 l.addPatchAt(position()); 925 emitByte(0xE9); 926 emitInt(0); 927 } 928 } 929 930 public final void jmp(Register entry) { 931 int encode = prefixAndEncode(entry.encoding); 932 emitByte(0xFF); 933 emitByte(0xE0 | encode); 934 } 935 936 public final void jmpb(Label l) { 937 if (l.isBound()) { 938 int shortSize = 2; 939 int entry = l.position(); 940 assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp"; 941 long offs = entry - position(); 942 emitByte(0xEB); 943 emitByte((int) ((offs - shortSize) & 0xFF)); 944 } else { 945 946 l.addPatchAt(position()); 947 emitByte(0xEB); 948 emitByte(0); 949 } 950 } 951 952 public final void leaq(Register dst, AMD64Address src) { 953 prefixq(src, dst); 954 emitByte(0x8D); 955 emitOperandHelper(dst, src); 956 } 957 958 public final void leave() { 959 emitByte(0xC9); 960 } 961 962 public final void lock() { 963 emitByte(0xF0); 964 } 965 966 public final void movapd(Register dst, Register src) { 967 assert dst.getRegisterCategory().equals(AMD64.XMM); 968 assert src.getRegisterCategory().equals(AMD64.XMM); 969 int dstenc = dst.encoding; 970 int srcenc = src.encoding; 971 emitByte(0x66); 972 if (dstenc < 8) { 973 if (srcenc >= 8) { 974 emitByte(Prefix.REXB); 975 srcenc -= 8; 976 } 977 } else { 978 if (srcenc < 8) { 979 emitByte(Prefix.REXR); 980 } else { 981 emitByte(Prefix.REXRB); 982 srcenc -= 8; 983 } 984 dstenc -= 8; 985 } 986 emitByte(0x0F); 987 emitByte(0x28); 988 emitByte(0xC0 | dstenc << 3 | srcenc); 989 } 990 991 public final void movaps(Register dst, Register src) { 992 assert dst.getRegisterCategory().equals(AMD64.XMM); 993 assert src.getRegisterCategory().equals(AMD64.XMM); 994 int dstenc = dst.encoding; 995 int srcenc = src.encoding; 996 if (dstenc < 8) { 997 if (srcenc >= 8) { 998 emitByte(Prefix.REXB); 999 srcenc -= 8; 1000 } 1001 } else { 1002 if (srcenc < 8) { 1003 emitByte(Prefix.REXR); 1004 } else { 1005 emitByte(Prefix.REXRB); 1006 srcenc -= 8; 1007 } 1008 dstenc -= 8; 1009 } 1010 emitByte(0x0F); 1011 emitByte(0x28); 1012 emitByte(0xC0 | dstenc << 3 | srcenc); 1013 } 1014 1015 public final void movb(AMD64Address dst, int imm8) { 1016 prefix(dst); 1017 emitByte(0xC6); 1018 emitOperandHelper(0, dst); 1019 emitByte(imm8); 1020 } 1021 1022 public final void movb(AMD64Address dst, Register src) { 1023 assert src.getRegisterCategory().equals(AMD64.CPU) : "must have byte register"; 1024 prefix(dst, src, true); 1025 emitByte(0x88); 1026 emitOperandHelper(src, dst); 1027 } 1028 1029 public final void movdl(Register dst, Register src) { 1030 if (dst.getRegisterCategory().equals(AMD64.XMM)) { 1031 assert !src.getRegisterCategory().equals(AMD64.XMM) : "does this hold?"; 1032 emitByte(0x66); 1033 int encode = prefixAndEncode(dst.encoding, src.encoding); 1034 emitByte(0x0F); 1035 emitByte(0x6E); 1036 emitByte(0xC0 | encode); 1037 } else if (src.getRegisterCategory().equals(AMD64.XMM)) { 1038 assert !dst.getRegisterCategory().equals(AMD64.XMM); 1039 emitByte(0x66); 1040 // swap src/dst to get correct prefix 1041 int encode = prefixAndEncode(src.encoding, dst.encoding); 1042 emitByte(0x0F); 1043 emitByte(0x7E); 1044 emitByte(0xC0 | encode); 1045 } 1046 } 1047 1048 public final void movl(Register dst, int imm32) { 1049 int encode = prefixAndEncode(dst.encoding); 1050 emitByte(0xB8 | encode); 1051 emitInt(imm32); 1052 } 1053 1054 public final void movl(Register dst, Register src) { 1055 int encode = prefixAndEncode(dst.encoding, src.encoding); 1056 emitByte(0x8B); 1057 emitByte(0xC0 | encode); 1058 } 1059 1060 public final void movl(Register dst, AMD64Address src) { 1061 prefix(src, dst); 1062 emitByte(0x8B); 1063 emitOperandHelper(dst, src); 1064 } 1065 1066 public final void movl(AMD64Address dst, int imm32) { 1067 prefix(dst); 1068 emitByte(0xC7); 1069 emitOperandHelper(0, dst); 1070 emitInt(imm32); 1071 } 1072 1073 public final void movl(AMD64Address dst, Register src) { 1074 prefix(dst, src); 1075 emitByte(0x89); 1076 emitOperandHelper(src, dst); 1077 } 1078 1079 /** 1080 * New CPUs require use of movsd and movss to avoid partial register stall when loading from 1081 * memory. But for old Opteron use movlpd instead of movsd. The selection is done in 1082 * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and 1083 * {@link AMD64MacroAssembler#movflt(Register, Register)}. 1084 */ 1085 public final void movlpd(Register dst, AMD64Address src) { 1086 assert dst.getRegisterCategory().equals(AMD64.XMM); 1087 emitByte(0x66); 1088 prefix(src, dst); 1089 emitByte(0x0F); 1090 emitByte(0x12); 1091 emitOperandHelper(dst, src); 1092 } 1093 1094 public final void movq(Register dst, AMD64Address src) { 1095 if (dst.getRegisterCategory().equals(AMD64.XMM)) { 1096 emitByte(0xF3); 1097 prefixq(src, dst); 1098 emitByte(0x0F); 1099 emitByte(0x7E); 1100 emitOperandHelper(dst, src); 1101 } else { 1102 prefixq(src, dst); 1103 emitByte(0x8B); 1104 emitOperandHelper(dst, src); 1105 } 1106 } 1107 1108 public final void movq(Register dst, Register src) { 1109 int encode = prefixqAndEncode(dst.encoding, src.encoding); 1110 emitByte(0x8B); 1111 emitByte(0xC0 | encode); 1112 } 1113 1114 public final void movq(AMD64Address dst, Register src) { 1115 if (src.getRegisterCategory().equals(AMD64.XMM)) { 1116 emitByte(0x66); 1117 prefixq(dst, src); 1118 emitByte(0x0F); 1119 emitByte(0xD6); 1120 emitOperandHelper(src, dst); 1121 } else { 1122 prefixq(dst, src); 1123 emitByte(0x89); 1124 emitOperandHelper(src, dst); 1125 } 1126 } 1127 1128 public final void movsbl(Register dst, AMD64Address src) { 1129 prefix(src, dst); 1130 emitByte(0x0F); 1131 emitByte(0xBE); 1132 emitOperandHelper(dst, src); 1133 } 1134 1135 public final void movsbl(Register dst, Register src) { 1136 int encode = prefixAndEncode(dst.encoding, false, src.encoding, true); 1137 emitByte(0x0F); 1138 emitByte(0xBE); 1139 emitByte(0xC0 | encode); 1140 } 1141 1142 public final void movsbq(Register dst, AMD64Address src) { 1143 prefixq(src, dst); 1144 emitByte(0x0F); 1145 emitByte(0xBE); 1146 emitOperandHelper(dst, src); 1147 } 1148 1149 public final void movsbq(Register dst, Register src) { 1150 int encode = prefixqAndEncode(dst.encoding, src.encoding); 1151 emitByte(0x0F); 1152 emitByte(0xBE); 1153 emitByte(0xC0 | encode); 1154 } 1155 1156 public final void movsd(Register dst, Register src) { 1157 assert dst.getRegisterCategory().equals(AMD64.XMM); 1158 assert src.getRegisterCategory().equals(AMD64.XMM); 1159 emitByte(0xF2); 1160 int encode = prefixAndEncode(dst.encoding, src.encoding); 1161 emitByte(0x0F); 1162 emitByte(0x10); 1163 emitByte(0xC0 | encode); 1164 } 1165 1166 public final void movsd(Register dst, AMD64Address src) { 1167 assert dst.getRegisterCategory().equals(AMD64.XMM); 1168 emitByte(0xF2); 1169 prefix(src, dst); 1170 emitByte(0x0F); 1171 emitByte(0x10); 1172 emitOperandHelper(dst, src); 1173 } 1174 1175 public final void movsd(AMD64Address dst, Register src) { 1176 assert src.getRegisterCategory().equals(AMD64.XMM); 1177 emitByte(0xF2); 1178 prefix(dst, src); 1179 emitByte(0x0F); 1180 emitByte(0x11); 1181 emitOperandHelper(src, dst); 1182 } 1183 1184 public final void movss(Register dst, Register src) { 1185 assert dst.getRegisterCategory().equals(AMD64.XMM); 1186 assert src.getRegisterCategory().equals(AMD64.XMM); 1187 emitByte(0xF3); 1188 int encode = prefixAndEncode(dst.encoding, src.encoding); 1189 emitByte(0x0F); 1190 emitByte(0x10); 1191 emitByte(0xC0 | encode); 1192 } 1193 1194 public final void movss(Register dst, AMD64Address src) { 1195 assert dst.getRegisterCategory().equals(AMD64.XMM); 1196 emitByte(0xF3); 1197 prefix(src, dst); 1198 emitByte(0x0F); 1199 emitByte(0x10); 1200 emitOperandHelper(dst, src); 1201 } 1202 1203 public final void movss(AMD64Address dst, Register src) { 1204 assert src.getRegisterCategory().equals(AMD64.XMM); 1205 emitByte(0xF3); 1206 prefix(dst, src); 1207 emitByte(0x0F); 1208 emitByte(0x11); 1209 emitOperandHelper(src, dst); 1210 } 1211 1212 public final void movswl(Register dst, AMD64Address src) { 1213 prefix(src, dst); 1214 emitByte(0x0F); 1215 emitByte(0xBF); 1216 emitOperandHelper(dst, src); 1217 } 1218 1219 public final void movswl(Register dst, Register src) { 1220 int encode = prefixAndEncode(dst.encoding, src.encoding); 1221 emitByte(0x0F); 1222 emitByte(0xBF); 1223 emitByte(0xC0 | encode); 1224 } 1225 1226 public final void movswq(Register dst, AMD64Address src) { 1227 prefixq(src, dst); 1228 emitByte(0x0F); 1229 emitByte(0xBF); 1230 emitOperandHelper(dst, src); 1231 } 1232 1233 public final void movswq(Register dst, Register src) { 1234 int encode = prefixqAndEncode(dst.encoding, src.encoding); 1235 emitByte(0x0F); 1236 emitByte(0xBF); 1237 emitByte(0xC0 | encode); 1238 } 1239 1240 public final void movw(AMD64Address dst, int imm16) { 1241 emitByte(0x66); // switch to 16-bit mode 1242 prefix(dst); 1243 emitByte(0xC7); 1244 emitOperandHelper(0, dst); 1245 emitShort(imm16); 1246 } 1247 1248 public final void movw(AMD64Address dst, Register src) { 1249 emitByte(0x66); 1250 prefix(dst, src); 1251 emitByte(0x89); 1252 emitOperandHelper(src, dst); 1253 } 1254 1255 public final void movzbl(Register dst, AMD64Address src) { 1256 prefix(src, dst); 1257 emitByte(0x0F); 1258 emitByte(0xB6); 1259 emitOperandHelper(dst, src); 1260 } 1261 1262 public final void movzwl(Register dst, AMD64Address src) { 1263 prefix(src, dst); 1264 emitByte(0x0F); 1265 emitByte(0xB7); 1266 emitOperandHelper(dst, src); 1267 } 1268 1269 public final void mulsd(Register dst, AMD64Address src) { 1270 assert dst.getRegisterCategory().equals(AMD64.XMM); 1271 emitByte(0xF2); 1272 prefix(src, dst); 1273 emitByte(0x0F); 1274 emitByte(0x59); 1275 emitOperandHelper(dst, src); 1276 } 1277 1278 public final void mulsd(Register dst, Register src) { 1279 assert dst.getRegisterCategory().equals(AMD64.XMM); 1280 assert src.getRegisterCategory().equals(AMD64.XMM); 1281 1282 emitByte(0xF2); 1283 int encode = prefixAndEncode(dst.encoding, src.encoding); 1284 emitByte(0x0F); 1285 emitByte(0x59); 1286 emitByte(0xC0 | encode); 1287 } 1288 1289 public final void mulss(Register dst, AMD64Address src) { 1290 assert dst.getRegisterCategory().equals(AMD64.XMM); 1291 1292 emitByte(0xF3); 1293 prefix(src, dst); 1294 emitByte(0x0F); 1295 emitByte(0x59); 1296 emitOperandHelper(dst, src); 1297 } 1298 1299 public final void mulss(Register dst, Register src) { 1300 assert dst.getRegisterCategory().equals(AMD64.XMM); 1301 assert src.getRegisterCategory().equals(AMD64.XMM); 1302 emitByte(0xF3); 1303 int encode = prefixAndEncode(dst.encoding, src.encoding); 1304 emitByte(0x0F); 1305 emitByte(0x59); 1306 emitByte(0xC0 | encode); 1307 } 1308 1309 public final void negl(Register dst) { 1310 int encode = prefixAndEncode(dst.encoding); 1311 emitByte(0xF7); 1312 emitByte(0xD8 | encode); 1313 } 1314 1315 public final void notl(Register dst) { 1316 int encode = prefixAndEncode(dst.encoding); 1317 emitByte(0xF7); 1318 emitByte(0xD0 | encode); 1319 } 1320 1321 @Override 1322 public final void ensureUniquePC() { 1323 nop(); 1324 } 1325 1326 public final void lzcntl(Register dst, Register src) { 1327 assert supports(CPUFeature.LZCNT); 1328 emitByte(0xF3); 1329 int encode = prefixAndEncode(dst.encoding, src.encoding); 1330 emitByte(0x0F); 1331 emitByte(0xBD); 1332 emitByte(0xC0 | encode); 1333 } 1334 1335 public final void lzcntq(Register dst, Register src) { 1336 assert supports(CPUFeature.LZCNT); 1337 emitByte(0xF3); 1338 int encode = prefixqAndEncode(dst.encoding, src.encoding); 1339 emitByte(0x0F); 1340 emitByte(0xBD); 1341 emitByte(0xC0 | encode); 1342 } 1343 1344 public final void lzcntl(Register dst, AMD64Address src) { 1345 assert supports(CPUFeature.LZCNT); 1346 emitByte(0xF3); 1347 prefix(src, dst); 1348 emitByte(0x0F); 1349 emitByte(0xBD); 1350 emitOperandHelper(dst, src); 1351 } 1352 1353 public final void lzcntq(Register dst, AMD64Address src) { 1354 assert supports(CPUFeature.LZCNT); 1355 emitByte(0xF3); 1356 prefixq(src, dst); 1357 emitByte(0x0F); 1358 emitByte(0xBD); 1359 emitOperandHelper(dst, src); 1360 } 1361 1362 public final void nop() { 1363 nop(1); 1364 } 1365 1366 public void nop(int count) { 1367 int i = count; 1368 if (UseNormalNop) { 1369 assert i > 0 : " "; 1370 // The fancy nops aren't currently recognized by debuggers making it a 1371 // pain to disassemble code while debugging. If assert are on clearly 1372 // speed is not an issue so simply use the single byte traditional nop 1373 // to do alignment. 1374 1375 for (; i > 0; i--) { 1376 emitByte(0x90); 1377 } 1378 return; 1379 } 1380 1381 if (UseAddressNop) { 1382 // 1383 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD. 1384 // 1: 0x90 1385 // 2: 0x66 0x90 1386 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 1387 // 4: 0x0F 0x1F 0x40 0x00 1388 // 5: 0x0F 0x1F 0x44 0x00 0x00 1389 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 1390 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1391 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1392 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1393 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1394 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1395 1396 // The rest coding is AMD specific - use consecutive Address nops 1397 1398 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 1399 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 1400 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1401 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1402 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1403 // Size prefixes (0x66) are added for larger sizes 1404 1405 while (i >= 22) { 1406 i -= 11; 1407 emitByte(0x66); // size prefix 1408 emitByte(0x66); // size prefix 1409 emitByte(0x66); // size prefix 1410 addrNop8(); 1411 } 1412 // Generate first nop for size between 21-12 1413 switch (i) { 1414 case 21: 1415 i -= 1; 1416 emitByte(0x66); // size prefix 1417 // fall through 1418 case 20: 1419 // fall through 1420 case 19: 1421 i -= 1; 1422 emitByte(0x66); // size prefix 1423 // fall through 1424 case 18: 1425 // fall through 1426 case 17: 1427 i -= 1; 1428 emitByte(0x66); // size prefix 1429 // fall through 1430 case 16: 1431 // fall through 1432 case 15: 1433 i -= 8; 1434 addrNop8(); 1435 break; 1436 case 14: 1437 case 13: 1438 i -= 7; 1439 addrNop7(); 1440 break; 1441 case 12: 1442 i -= 6; 1443 emitByte(0x66); // size prefix 1444 addrNop5(); 1445 break; 1446 default: 1447 assert i < 12; 1448 } 1449 1450 // Generate second nop for size between 11-1 1451 switch (i) { 1452 case 11: 1453 emitByte(0x66); // size prefix 1454 emitByte(0x66); // size prefix 1455 emitByte(0x66); // size prefix 1456 addrNop8(); 1457 break; 1458 case 10: 1459 emitByte(0x66); // size prefix 1460 emitByte(0x66); // size prefix 1461 addrNop8(); 1462 break; 1463 case 9: 1464 emitByte(0x66); // size prefix 1465 addrNop8(); 1466 break; 1467 case 8: 1468 addrNop8(); 1469 break; 1470 case 7: 1471 addrNop7(); 1472 break; 1473 case 6: 1474 emitByte(0x66); // size prefix 1475 addrNop5(); 1476 break; 1477 case 5: 1478 addrNop5(); 1479 break; 1480 case 4: 1481 addrNop4(); 1482 break; 1483 case 3: 1484 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 1485 emitByte(0x66); // size prefix 1486 emitByte(0x66); // size prefix 1487 emitByte(0x90); // nop 1488 break; 1489 case 2: 1490 emitByte(0x66); // size prefix 1491 emitByte(0x90); // nop 1492 break; 1493 case 1: 1494 emitByte(0x90); // nop 1495 break; 1496 default: 1497 assert i == 0; 1498 } 1499 return; 1500 } 1501 1502 // Using nops with size prefixes "0x66 0x90". 1503 // From AMD Optimization Guide: 1504 // 1: 0x90 1505 // 2: 0x66 0x90 1506 // 3: 0x66 0x66 0x90 1507 // 4: 0x66 0x66 0x66 0x90 1508 // 5: 0x66 0x66 0x90 0x66 0x90 1509 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 1510 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 1511 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 1512 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 1513 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 1514 // 1515 while (i > 12) { 1516 i -= 4; 1517 emitByte(0x66); // size prefix 1518 emitByte(0x66); 1519 emitByte(0x66); 1520 emitByte(0x90); // nop 1521 } 1522 // 1 - 12 nops 1523 if (i > 8) { 1524 if (i > 9) { 1525 i -= 1; 1526 emitByte(0x66); 1527 } 1528 i -= 3; 1529 emitByte(0x66); 1530 emitByte(0x66); 1531 emitByte(0x90); 1532 } 1533 // 1 - 8 nops 1534 if (i > 4) { 1535 if (i > 6) { 1536 i -= 1; 1537 emitByte(0x66); 1538 } 1539 i -= 3; 1540 emitByte(0x66); 1541 emitByte(0x66); 1542 emitByte(0x90); 1543 } 1544 switch (i) { 1545 case 4: 1546 emitByte(0x66); 1547 emitByte(0x66); 1548 emitByte(0x66); 1549 emitByte(0x90); 1550 break; 1551 case 3: 1552 emitByte(0x66); 1553 emitByte(0x66); 1554 emitByte(0x90); 1555 break; 1556 case 2: 1557 emitByte(0x66); 1558 emitByte(0x90); 1559 break; 1560 case 1: 1561 emitByte(0x90); 1562 break; 1563 default: 1564 assert i == 0; 1565 } 1566 } 1567 1568 public final void orl(Register dst, int imm32) { 1569 emitArithImm32(1, dst, imm32); 1570 } 1571 1572 public final void orl(Register dst, AMD64Address src) { 1573 prefix(src, dst); 1574 emitByte(0x0B); 1575 emitOperandHelper(dst, src); 1576 } 1577 1578 public final void orl(Register dst, Register src) { 1579 int encode = prefixAndEncode(dst.encoding, src.encoding); 1580 emitByte(0x0B); 1581 emitByte(0xC0 | encode); 1582 } 1583 1584 public final void popcntl(Register dst, AMD64Address src) { 1585 assert supports(CPUFeature.POPCNT); 1586 emitByte(0xF3); 1587 prefix(src, dst); 1588 emitByte(0x0F); 1589 emitByte(0xB8); 1590 emitOperandHelper(dst, src); 1591 } 1592 1593 public final void popcntl(Register dst, Register src) { 1594 assert supports(CPUFeature.POPCNT); 1595 emitByte(0xF3); 1596 int encode = prefixAndEncode(dst.encoding, src.encoding); 1597 emitByte(0x0F); 1598 emitByte(0xB8); 1599 emitByte(0xC0 | encode); 1600 } 1601 1602 public final void popcntq(Register dst, AMD64Address src) { 1603 assert supports(CPUFeature.POPCNT); 1604 emitByte(0xF3); 1605 prefixq(src, dst); 1606 emitByte(0x0F); 1607 emitByte(0xB8); 1608 emitOperandHelper(dst, src); 1609 } 1610 1611 public final void popcntq(Register dst, Register src) { 1612 assert supports(CPUFeature.POPCNT); 1613 emitByte(0xF3); 1614 int encode = prefixqAndEncode(dst.encoding, src.encoding); 1615 emitByte(0x0F); 1616 emitByte(0xB8); 1617 emitByte(0xC0 | encode); 1618 } 1619 1620 public final void pop(Register dst) { 1621 int encode = prefixAndEncode(dst.encoding); 1622 emitByte(0x58 | encode); 1623 } 1624 1625 public void popfq() { 1626 emitByte(0x9D); 1627 } 1628 1629 public final void ptest(Register dst, Register src) { 1630 assert supports(CPUFeature.SSE4_1); 1631 emitByte(0x66); 1632 int encode = prefixAndEncode(dst.encoding, src.encoding); 1633 emitByte(0x0F); 1634 emitByte(0x38); 1635 emitByte(0x17); 1636 emitByte(0xC0 | encode); 1637 } 1638 1639 public final void push(Register src) { 1640 int encode = prefixAndEncode(src.encoding); 1641 emitByte(0x50 | encode); 1642 } 1643 1644 public void pushfq() { 1645 emitByte(0x9c); 1646 } 1647 1648 public final void pxor(Register dst, Register src) { 1649 emitByte(0x66); 1650 int encode = prefixAndEncode(dst.encoding, src.encoding); 1651 emitByte(0x0F); 1652 emitByte(0xEF); 1653 emitByte(0xC0 | encode); 1654 } 1655 1656 public final void ret(int imm16) { 1657 if (imm16 == 0) { 1658 emitByte(0xC3); 1659 } else { 1660 emitByte(0xC2); 1661 emitShort(imm16); 1662 } 1663 } 1664 1665 public final void sarl(Register dst, int imm8) { 1666 int encode = prefixAndEncode(dst.encoding); 1667 assert isShiftCount(imm8) : "illegal shift count"; 1668 if (imm8 == 1) { 1669 emitByte(0xD1); 1670 emitByte(0xF8 | encode); 1671 } else { 1672 emitByte(0xC1); 1673 emitByte(0xF8 | encode); 1674 emitByte(imm8); 1675 } 1676 } 1677 1678 public final void sarl(Register dst) { 1679 int encode = prefixAndEncode(dst.encoding); 1680 emitByte(0xD3); 1681 emitByte(0xF8 | encode); 1682 } 1683 1684 public final void shll(Register dst, int imm8) { 1685 assert isShiftCount(imm8) : "illegal shift count"; 1686 int encode = prefixAndEncode(dst.encoding); 1687 if (imm8 == 1) { 1688 emitByte(0xD1); 1689 emitByte(0xE0 | encode); 1690 } else { 1691 emitByte(0xC1); 1692 emitByte(0xE0 | encode); 1693 emitByte(imm8); 1694 } 1695 } 1696 1697 public final void shll(Register dst) { 1698 int encode = prefixAndEncode(dst.encoding); 1699 emitByte(0xD3); 1700 emitByte(0xE0 | encode); 1701 } 1702 1703 public final void shrl(Register dst, int imm8) { 1704 assert isShiftCount(imm8) : "illegal shift count"; 1705 int encode = prefixAndEncode(dst.encoding); 1706 if (imm8 == 1) { 1707 emitByte(0xD1); 1708 emitByte(0xE8 | encode); 1709 } else { 1710 emitByte(0xC1); 1711 emitByte(0xE8 | encode); 1712 emitByte(imm8); 1713 } 1714 } 1715 1716 public final void shrl(Register dst) { 1717 int encode = prefixAndEncode(dst.encoding); 1718 emitByte(0xD3); 1719 emitByte(0xE8 | encode); 1720 } 1721 1722 public final void roll(Register dst, int imm8) { 1723 assert isShiftCount(imm8) : "illegal shift count"; 1724 int encode = prefixAndEncode(dst.encoding); 1725 if (imm8 == 1) { 1726 emitByte(0xD1); 1727 emitByte(0xC0 | encode); 1728 } else { 1729 emitByte(0xC1); 1730 emitByte(0xC0 | encode); 1731 emitByte(imm8); 1732 } 1733 } 1734 1735 public final void roll(Register dst) { 1736 int encode = prefixAndEncode(dst.encoding); 1737 emitByte(0xD3); 1738 emitByte(0xC0 | encode); 1739 } 1740 1741 public final void rorl(Register dst, int imm8) { 1742 assert isShiftCount(imm8) : "illegal shift count"; 1743 int encode = prefixAndEncode(dst.encoding); 1744 if (imm8 == 1) { 1745 emitByte(0xD1); 1746 emitByte(0xC8 | encode); 1747 } else { 1748 emitByte(0xC1); 1749 emitByte(0xC8 | encode); 1750 emitByte(imm8); 1751 } 1752 } 1753 1754 public final void rorl(Register dst) { 1755 int encode = prefixAndEncode(dst.encoding); 1756 emitByte(0xD3); 1757 emitByte(0xC8 | encode); 1758 } 1759 1760 public final void rolq(Register dst, int imm8) { 1761 assert isShiftCount(imm8) : "illegal shift count"; 1762 int encode = prefixqAndEncode(dst.encoding); 1763 if (imm8 == 1) { 1764 emitByte(0xD1); 1765 emitByte(0xC0 | encode); 1766 } else { 1767 emitByte(0xC1); 1768 emitByte(0xC0 | encode); 1769 emitByte(imm8); 1770 } 1771 } 1772 1773 public final void rolq(Register dst) { 1774 int encode = prefixqAndEncode(dst.encoding); 1775 emitByte(0xD3); 1776 emitByte(0xC0 | encode); 1777 } 1778 1779 public final void rorq(Register dst, int imm8) { 1780 assert isShiftCount(imm8) : "illegal shift count"; 1781 int encode = prefixqAndEncode(dst.encoding); 1782 if (imm8 == 1) { 1783 emitByte(0xD1); 1784 emitByte(0xC8 | encode); 1785 } else { 1786 emitByte(0xC1); 1787 emitByte(0xC8 | encode); 1788 emitByte(imm8); 1789 } 1790 } 1791 1792 public final void rorq(Register dst) { 1793 int encode = prefixqAndEncode(dst.encoding); 1794 emitByte(0xD3); 1795 emitByte(0xC8 | encode); 1796 } 1797 1798 public final void sqrtsd(Register dst, AMD64Address src) { 1799 assert dst.getRegisterCategory().equals(AMD64.XMM); 1800 emitByte(0xF2); 1801 prefix(src, dst); 1802 emitByte(0x0F); 1803 emitByte(0x51); 1804 emitOperandHelper(dst, src); 1805 } 1806 1807 public final void sqrtsd(Register dst, Register src) { 1808 assert dst.getRegisterCategory().equals(AMD64.XMM); 1809 assert src.getRegisterCategory().equals(AMD64.XMM); 1810 // HMM Table D-1 says sse2 1811 // assert is64 || target.supportsSSE(); 1812 emitByte(0xF2); 1813 int encode = prefixAndEncode(dst.encoding, src.encoding); 1814 emitByte(0x0F); 1815 emitByte(0x51); 1816 emitByte(0xC0 | encode); 1817 } 1818 1819 public final void subl(AMD64Address dst, int imm32) { 1820 emitArithImm32(5, dst, imm32); 1821 } 1822 1823 public final void subl(Register dst, int imm32) { 1824 emitArithImm32(5, dst, imm32); 1825 } 1826 1827 public final void subl(Register dst, AMD64Address src) { 1828 prefix(src, dst); 1829 emitByte(0x2B); 1830 emitOperandHelper(dst, src); 1831 } 1832 1833 public final void subl(Register dst, Register src) { 1834 int encode = prefixAndEncode(dst.encoding, src.encoding); 1835 emitByte(0x2B); 1836 emitByte(0xC0 | encode); 1837 } 1838 1839 public final void subsd(Register dst, Register src) { 1840 assert dst.getRegisterCategory().equals(AMD64.XMM); 1841 assert src.getRegisterCategory().equals(AMD64.XMM); 1842 emitByte(0xF2); 1843 int encode = prefixAndEncode(dst.encoding, src.encoding); 1844 emitByte(0x0F); 1845 emitByte(0x5C); 1846 emitByte(0xC0 | encode); 1847 } 1848 1849 public final void subsd(Register dst, AMD64Address src) { 1850 assert dst.getRegisterCategory().equals(AMD64.XMM); 1851 1852 emitByte(0xF2); 1853 prefix(src, dst); 1854 emitByte(0x0F); 1855 emitByte(0x5C); 1856 emitOperandHelper(dst, src); 1857 } 1858 1859 public final void subss(Register dst, Register src) { 1860 assert dst.getRegisterCategory().equals(AMD64.XMM); 1861 assert src.getRegisterCategory().equals(AMD64.XMM); 1862 emitByte(0xF3); 1863 int encode = prefixAndEncode(dst.encoding, src.encoding); 1864 emitByte(0x0F); 1865 emitByte(0x5C); 1866 emitByte(0xC0 | encode); 1867 } 1868 1869 public final void subss(Register dst, AMD64Address src) { 1870 assert dst.getRegisterCategory().equals(AMD64.XMM); 1871 1872 emitByte(0xF3); 1873 prefix(src, dst); 1874 emitByte(0x0F); 1875 emitByte(0x5C); 1876 emitOperandHelper(dst, src); 1877 } 1878 1879 public final void testl(Register dst, int imm32) { 1880 // not using emitArith because test 1881 // doesn't support sign-extension of 1882 // 8bit operands 1883 int encode = dst.encoding; 1884 if (encode == 0) { 1885 emitByte(0xA9); 1886 } else { 1887 encode = prefixAndEncode(encode); 1888 emitByte(0xF7); 1889 emitByte(0xC0 | encode); 1890 } 1891 emitInt(imm32); 1892 } 1893 1894 public final void testl(AMD64Address dst, int imm32) { 1895 prefix(dst); 1896 emitByte(0xF7); 1897 emitOperandHelper(0, dst); 1898 emitInt(imm32); 1899 } 1900 1901 public final void testl(Register dst, Register src) { 1902 int encode = prefixAndEncode(dst.encoding, src.encoding); 1903 emitByte(0x85); 1904 emitByte(0xC0 | encode); 1905 } 1906 1907 public final void testl(Register dst, AMD64Address src) { 1908 prefix(src, dst); 1909 emitByte(0x85); 1910 emitOperandHelper(dst, src); 1911 } 1912 1913 public final void tzcntl(Register dst, Register src) { 1914 assert supports(CPUFeature.BMI1); 1915 emitByte(0xF3); 1916 int encode = prefixAndEncode(dst.encoding, src.encoding); 1917 emitByte(0x0F); 1918 emitByte(0xBC); 1919 emitByte(0xC0 | encode); 1920 } 1921 1922 public final void tzcntq(Register dst, Register src) { 1923 assert supports(CPUFeature.BMI1); 1924 emitByte(0xF3); 1925 int encode = prefixqAndEncode(dst.encoding, src.encoding); 1926 emitByte(0x0F); 1927 emitByte(0xBC); 1928 emitByte(0xC0 | encode); 1929 } 1930 1931 public final void tzcntl(Register dst, AMD64Address src) { 1932 assert supports(CPUFeature.BMI1); 1933 emitByte(0xF3); 1934 prefix(src, dst); 1935 emitByte(0x0F); 1936 emitByte(0xBC); 1937 emitOperandHelper(dst, src); 1938 } 1939 1940 public final void tzcntq(Register dst, AMD64Address src) { 1941 assert supports(CPUFeature.BMI1); 1942 emitByte(0xF3); 1943 prefixq(src, dst); 1944 emitByte(0x0F); 1945 emitByte(0xBC); 1946 emitOperandHelper(dst, src); 1947 } 1948 1949 public final void ucomisd(Register dst, AMD64Address src) { 1950 assert dst.getRegisterCategory().equals(AMD64.XMM); 1951 emitByte(0x66); 1952 ucomiss(dst, src); 1953 } 1954 1955 public final void ucomisd(Register dst, Register src) { 1956 assert dst.getRegisterCategory().equals(AMD64.XMM); 1957 assert src.getRegisterCategory().equals(AMD64.XMM); 1958 emitByte(0x66); 1959 ucomiss(dst, src); 1960 } 1961 1962 public final void ucomiss(Register dst, AMD64Address src) { 1963 assert dst.getRegisterCategory().equals(AMD64.XMM); 1964 1965 prefix(src, dst); 1966 emitByte(0x0F); 1967 emitByte(0x2E); 1968 emitOperandHelper(dst, src); 1969 } 1970 1971 public final void ucomiss(Register dst, Register src) { 1972 assert dst.getRegisterCategory().equals(AMD64.XMM); 1973 assert src.getRegisterCategory().equals(AMD64.XMM); 1974 int encode = prefixAndEncode(dst.encoding, src.encoding); 1975 emitByte(0x0F); 1976 emitByte(0x2E); 1977 emitByte(0xC0 | encode); 1978 } 1979 1980 public final void xorl(Register dst, int imm32) { 1981 emitArithImm32(6, dst, imm32); 1982 } 1983 1984 public final void xorl(Register dst, AMD64Address src) { 1985 prefix(src, dst); 1986 emitByte(0x33); 1987 emitOperandHelper(dst, src); 1988 } 1989 1990 public final void xorl(Register dst, Register src) { 1991 int encode = prefixAndEncode(dst.encoding, src.encoding); 1992 emitByte(0x33); 1993 emitByte(0xC0 | encode); 1994 } 1995 1996 public final void andpd(Register dst, Register src) { 1997 emitByte(0x66); 1998 andps(dst, src); 1999 } 2000 2001 public final void andpd(Register dst, AMD64Address src) { 2002 emitByte(0x66); 2003 andps(dst, src); 2004 } 2005 2006 public final void andps(Register dst, Register src) { 2007 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2008 int encode = prefixAndEncode(dst.encoding, src.encoding); 2009 emitByte(0x0F); 2010 emitByte(0x54); 2011 emitByte(0xC0 | encode); 2012 } 2013 2014 public final void andps(Register dst, AMD64Address src) { 2015 assert dst.getRegisterCategory().equals(AMD64.XMM); 2016 prefix(src, dst); 2017 emitByte(0x0F); 2018 emitByte(0x54); 2019 emitOperandHelper(dst, src); 2020 } 2021 2022 public final void orpd(Register dst, Register src) { 2023 emitByte(0x66); 2024 orps(dst, src); 2025 } 2026 2027 public final void orpd(Register dst, AMD64Address src) { 2028 emitByte(0x66); 2029 orps(dst, src); 2030 } 2031 2032 public final void orps(Register dst, Register src) { 2033 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2034 int encode = prefixAndEncode(dst.encoding, src.encoding); 2035 emitByte(0x0F); 2036 emitByte(0x56); 2037 emitByte(0xC0 | encode); 2038 } 2039 2040 public final void orps(Register dst, AMD64Address src) { 2041 assert dst.getRegisterCategory().equals(AMD64.XMM); 2042 prefix(src, dst); 2043 emitByte(0x0F); 2044 emitByte(0x56); 2045 emitOperandHelper(dst, src); 2046 } 2047 2048 public final void xorpd(Register dst, Register src) { 2049 emitByte(0x66); 2050 xorps(dst, src); 2051 } 2052 2053 public final void xorpd(Register dst, AMD64Address src) { 2054 emitByte(0x66); 2055 xorps(dst, src); 2056 } 2057 2058 public final void xorps(Register dst, Register src) { 2059 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2060 int encode = prefixAndEncode(dst.encoding, src.encoding); 2061 emitByte(0x0F); 2062 emitByte(0x57); 2063 emitByte(0xC0 | encode); 2064 } 2065 2066 public final void xorps(Register dst, AMD64Address src) { 2067 assert dst.getRegisterCategory().equals(AMD64.XMM); 2068 prefix(src, dst); 2069 emitByte(0x0F); 2070 emitByte(0x57); 2071 emitOperandHelper(dst, src); 2072 } 2073 2074 protected final void decl(Register dst) { 2075 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 2076 int encode = prefixAndEncode(dst.encoding); 2077 emitByte(0xFF); 2078 emitByte(0xC8 | encode); 2079 } 2080 2081 protected final void incl(Register dst) { 2082 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 2083 int encode = prefixAndEncode(dst.encoding); 2084 emitByte(0xFF); 2085 emitByte(0xC0 | encode); 2086 } 2087 2088 private int prefixAndEncode(int regEnc) { 2089 return prefixAndEncode(regEnc, false); 2090 } 2091 2092 private int prefixAndEncode(int regEnc, boolean byteinst) { 2093 if (regEnc >= 8) { 2094 emitByte(Prefix.REXB); 2095 return regEnc - 8; 2096 } else if (byteinst && regEnc >= 4) { 2097 emitByte(Prefix.REX); 2098 } 2099 return regEnc; 2100 } 2101 2102 private int prefixqAndEncode(int regEnc) { 2103 if (regEnc < 8) { 2104 emitByte(Prefix.REXW); 2105 return regEnc; 2106 } else { 2107 emitByte(Prefix.REXWB); 2108 return regEnc - 8; 2109 } 2110 } 2111 2112 private int prefixAndEncode(int dstEnc, int srcEnc) { 2113 return prefixAndEncode(dstEnc, false, srcEnc, false); 2114 } 2115 2116 private int prefixAndEncode(int dstEncoding, boolean dstIsByte, int srcEncoding, boolean srcIsByte) { 2117 int srcEnc = srcEncoding; 2118 int dstEnc = dstEncoding; 2119 if (dstEnc < 8) { 2120 if (srcEnc >= 8) { 2121 emitByte(Prefix.REXB); 2122 srcEnc -= 8; 2123 } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) { 2124 emitByte(Prefix.REX); 2125 } 2126 } else { 2127 if (srcEnc < 8) { 2128 emitByte(Prefix.REXR); 2129 } else { 2130 emitByte(Prefix.REXRB); 2131 srcEnc -= 8; 2132 } 2133 dstEnc -= 8; 2134 } 2135 return dstEnc << 3 | srcEnc; 2136 } 2137 2138 /** 2139 * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand 2140 * prefix. If the given operands exceed 3 bits, the 4th bit is encoded in the prefix. 2141 * 2142 * @param regEncoding the encoding of the register part of the ModRM-Byte 2143 * @param rmEncoding the encoding of the r/m part of the ModRM-Byte 2144 * @return the lower 6 bits of the ModRM-Byte that should be emitted 2145 */ 2146 private int prefixqAndEncode(int regEncoding, int rmEncoding) { 2147 int rmEnc = rmEncoding; 2148 int regEnc = regEncoding; 2149 if (regEnc < 8) { 2150 if (rmEnc < 8) { 2151 emitByte(Prefix.REXW); 2152 } else { 2153 emitByte(Prefix.REXWB); 2154 rmEnc -= 8; 2155 } 2156 } else { 2157 if (rmEnc < 8) { 2158 emitByte(Prefix.REXWR); 2159 } else { 2160 emitByte(Prefix.REXWRB); 2161 rmEnc -= 8; 2162 } 2163 regEnc -= 8; 2164 } 2165 return regEnc << 3 | rmEnc; 2166 } 2167 2168 private static boolean needsRex(Register reg) { 2169 return reg.encoding >= MinEncodingNeedsRex; 2170 } 2171 2172 private void prefix(AMD64Address adr) { 2173 if (needsRex(adr.getBase())) { 2174 if (needsRex(adr.getIndex())) { 2175 emitByte(Prefix.REXXB); 2176 } else { 2177 emitByte(Prefix.REXB); 2178 } 2179 } else { 2180 if (needsRex(adr.getIndex())) { 2181 emitByte(Prefix.REXX); 2182 } 2183 } 2184 } 2185 2186 private void prefixq(AMD64Address adr) { 2187 if (needsRex(adr.getBase())) { 2188 if (needsRex(adr.getIndex())) { 2189 emitByte(Prefix.REXWXB); 2190 } else { 2191 emitByte(Prefix.REXWB); 2192 } 2193 } else { 2194 if (needsRex(adr.getIndex())) { 2195 emitByte(Prefix.REXWX); 2196 } else { 2197 emitByte(Prefix.REXW); 2198 } 2199 } 2200 } 2201 2202 private void prefix(AMD64Address adr, Register reg) { 2203 prefix(adr, reg, false); 2204 } 2205 2206 private void prefix(AMD64Address adr, Register reg, boolean byteinst) { 2207 if (reg.encoding < 8) { 2208 if (needsRex(adr.getBase())) { 2209 if (needsRex(adr.getIndex())) { 2210 emitByte(Prefix.REXXB); 2211 } else { 2212 emitByte(Prefix.REXB); 2213 } 2214 } else { 2215 if (needsRex(adr.getIndex())) { 2216 emitByte(Prefix.REXX); 2217 } else if (byteinst && reg.encoding >= 4) { 2218 emitByte(Prefix.REX); 2219 } 2220 } 2221 } else { 2222 if (needsRex(adr.getBase())) { 2223 if (needsRex(adr.getIndex())) { 2224 emitByte(Prefix.REXRXB); 2225 } else { 2226 emitByte(Prefix.REXRB); 2227 } 2228 } else { 2229 if (needsRex(adr.getIndex())) { 2230 emitByte(Prefix.REXRX); 2231 } else { 2232 emitByte(Prefix.REXR); 2233 } 2234 } 2235 } 2236 } 2237 2238 private void prefixq(AMD64Address adr, Register src) { 2239 if (src.encoding < 8) { 2240 if (needsRex(adr.getBase())) { 2241 if (needsRex(adr.getIndex())) { 2242 emitByte(Prefix.REXWXB); 2243 } else { 2244 emitByte(Prefix.REXWB); 2245 } 2246 } else { 2247 if (needsRex(adr.getIndex())) { 2248 emitByte(Prefix.REXWX); 2249 } else { 2250 emitByte(Prefix.REXW); 2251 } 2252 } 2253 } else { 2254 if (needsRex(adr.getBase())) { 2255 if (needsRex(adr.getIndex())) { 2256 emitByte(Prefix.REXWRXB); 2257 } else { 2258 emitByte(Prefix.REXWRB); 2259 } 2260 } else { 2261 if (needsRex(adr.getIndex())) { 2262 emitByte(Prefix.REXWRX); 2263 } else { 2264 emitByte(Prefix.REXWR); 2265 } 2266 } 2267 } 2268 } 2269 2270 public final void addq(Register dst, int imm32) { 2271 emitArithImm32q(0, dst, imm32); 2272 } 2273 2274 public final void addq(Register dst, AMD64Address src) { 2275 prefixq(src, dst); 2276 emitByte(0x03); 2277 emitOperandHelper(dst, src); 2278 } 2279 2280 public final void addq(Register dst, Register src) { 2281 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2282 emitByte(0x03); 2283 emitByte(0xC0 | encode); 2284 } 2285 2286 public final void andq(Register dst, int imm32) { 2287 emitArithImm32q(4, dst, imm32); 2288 } 2289 2290 public final void andq(Register dst, AMD64Address src) { 2291 prefixq(src, dst); 2292 emitByte(0x23); 2293 emitOperandHelper(dst, src); 2294 } 2295 2296 public final void andq(Register dst, Register src) { 2297 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2298 emitByte(0x23); 2299 emitByte(0xC0 | encode); 2300 } 2301 2302 public final void bswapq(Register reg) { 2303 int encode = prefixqAndEncode(reg.encoding); 2304 emitByte(0x0F); 2305 emitByte(0xC8 | encode); 2306 } 2307 2308 public final void cdqq() { 2309 emitByte(Prefix.REXW); 2310 emitByte(0x99); 2311 } 2312 2313 public final void cmovq(ConditionFlag cc, Register dst, Register src) { 2314 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2315 emitByte(0x0F); 2316 emitByte(0x40 | cc.getValue()); 2317 emitByte(0xC0 | encode); 2318 } 2319 2320 public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) { 2321 prefixq(src, dst); 2322 emitByte(0x0F); 2323 emitByte(0x40 | cc.getValue()); 2324 emitOperandHelper(dst, src); 2325 } 2326 2327 public final void cmpq(AMD64Address dst, int imm32) { 2328 prefixq(dst); 2329 emitByte(0x81); 2330 emitOperandHelper(7, dst); 2331 emitInt(imm32); 2332 } 2333 2334 public final void cmpq(Register dst, int imm32) { 2335 emitArithImm32q(7, dst, imm32); 2336 } 2337 2338 public final void cmpq(Register dst, Register src) { 2339 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2340 emitByte(0x3B); 2341 emitByte(0xC0 | encode); 2342 } 2343 2344 public final void cmpq(Register dst, AMD64Address src) { 2345 prefixq(src, dst); 2346 emitByte(0x3B); 2347 emitOperandHelper(dst, src); 2348 } 2349 2350 public final void cmpxchgq(Register reg, AMD64Address adr) { 2351 prefixq(adr, reg); 2352 emitByte(0x0F); 2353 emitByte(0xB1); 2354 emitOperandHelper(reg, adr); 2355 } 2356 2357 public final void cvtsi2sdq(Register dst, AMD64Address src) { 2358 assert dst.getRegisterCategory().equals(AMD64.XMM); 2359 emitByte(0xF2); 2360 prefixq(src, dst); 2361 emitByte(0x0F); 2362 emitByte(0x2A); 2363 emitOperandHelper(dst, src); 2364 } 2365 2366 public final void cvtsi2sdq(Register dst, Register src) { 2367 assert dst.getRegisterCategory().equals(AMD64.XMM); 2368 emitByte(0xF2); 2369 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2370 emitByte(0x0F); 2371 emitByte(0x2A); 2372 emitByte(0xC0 | encode); 2373 } 2374 2375 public final void cvtsi2ssq(Register dst, AMD64Address src) { 2376 assert dst.getRegisterCategory().equals(AMD64.XMM); 2377 emitByte(0xF3); 2378 prefixq(src, dst); 2379 emitByte(0x0F); 2380 emitByte(0x2A); 2381 emitOperandHelper(dst, src); 2382 } 2383 2384 public final void cvtsi2ssq(Register dst, Register src) { 2385 assert dst.getRegisterCategory().equals(AMD64.XMM); 2386 emitByte(0xF3); 2387 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2388 emitByte(0x0F); 2389 emitByte(0x2A); 2390 emitByte(0xC0 | encode); 2391 } 2392 2393 public final void cvttsd2siq(Register dst, AMD64Address src) { 2394 emitByte(0xF2); 2395 prefixq(src, dst); 2396 emitByte(0x0F); 2397 emitByte(0x2C); 2398 emitOperandHelper(dst, src); 2399 } 2400 2401 public final void cvttsd2siq(Register dst, Register src) { 2402 assert src.getRegisterCategory().equals(AMD64.XMM); 2403 emitByte(0xF2); 2404 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2405 emitByte(0x0F); 2406 emitByte(0x2C); 2407 emitByte(0xC0 | encode); 2408 } 2409 2410 public final void cvttss2siq(Register dst, AMD64Address src) { 2411 emitByte(0xF3); 2412 prefixq(src, dst); 2413 emitByte(0x0F); 2414 emitByte(0x2C); 2415 emitOperandHelper(dst, src); 2416 } 2417 2418 public final void cvttss2siq(Register dst, Register src) { 2419 assert src.getRegisterCategory().equals(AMD64.XMM); 2420 emitByte(0xF3); 2421 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2422 emitByte(0x0F); 2423 emitByte(0x2C); 2424 emitByte(0xC0 | encode); 2425 } 2426 2427 protected final void decq(Register dst) { 2428 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 2429 int encode = prefixqAndEncode(dst.encoding); 2430 emitByte(0xFF); 2431 emitByte(0xC8 | encode); 2432 } 2433 2434 protected final void decq(AMD64Address dst) { 2435 prefixq(dst); 2436 emitByte(0xFF); 2437 emitOperandHelper(1, dst); 2438 } 2439 2440 public final void divq(Register src) { 2441 int encode = prefixqAndEncode(6, src.encoding); 2442 emitByte(0xF7); 2443 emitByte(0xC0 | encode); 2444 } 2445 2446 public final void idivq(Register src) { 2447 int encode = prefixqAndEncode(7, src.encoding); 2448 emitByte(0xF7); 2449 emitByte(0xC0 | encode); 2450 } 2451 2452 public final void mulq(Register src) { 2453 int encode = prefixqAndEncode(4, src.encoding); 2454 emitByte(0xF7); 2455 emitByte(0xC0 | encode); 2456 } 2457 2458 public final void mulq(AMD64Address src) { 2459 prefixq(src); 2460 emitByte(0xF7); 2461 emitOperandHelper(4, src); 2462 } 2463 2464 public final void imulq(Register src) { 2465 int encode = prefixqAndEncode(5, src.encoding); 2466 emitByte(0xF7); 2467 emitByte(0xC0 | encode); 2468 } 2469 2470 public final void imulq(AMD64Address src) { 2471 prefixq(src); 2472 emitByte(0xF7); 2473 emitOperandHelper(5, src); 2474 } 2475 2476 public final void imulq(Register dst, Register src) { 2477 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2478 emitByte(0x0F); 2479 emitByte(0xAF); 2480 emitByte(0xC0 | encode); 2481 } 2482 2483 public final void imulq(Register dst, AMD64Address src) { 2484 prefixq(src, dst); 2485 emitByte(0x0F); 2486 emitByte(0xAF); 2487 emitOperandHelper(dst, src); 2488 } 2489 2490 public final void imulq(Register dst, Register src, int value) { 2491 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2492 if (isByte(value)) { 2493 emitByte(0x6B); 2494 emitByte(0xC0 | encode); 2495 emitByte(value & 0xFF); 2496 } else { 2497 emitByte(0x69); 2498 emitByte(0xC0 | encode); 2499 emitInt(value); 2500 } 2501 } 2502 2503 public final void incq(Register dst) { 2504 // Don't use it directly. Use Macroincrementq() instead. 2505 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 2506 int encode = prefixqAndEncode(dst.encoding); 2507 emitByte(0xFF); 2508 emitByte(0xC0 | encode); 2509 } 2510 2511 public final void movq(Register dst, long imm64) { 2512 int encode = prefixqAndEncode(dst.encoding); 2513 emitByte(0xB8 | encode); 2514 emitLong(imm64); 2515 } 2516 2517 public final void movslq(Register dst, int imm32) { 2518 int encode = prefixqAndEncode(dst.encoding); 2519 emitByte(0xC7); 2520 emitByte(0xC0 | encode); 2521 emitInt(imm32); 2522 } 2523 2524 public final void movdq(Register dst, Register src) { 2525 2526 // table D-1 says MMX/SSE2 2527 emitByte(0x66); 2528 2529 if (dst.getRegisterCategory().equals(AMD64.XMM)) { 2530 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2531 emitByte(0x0F); 2532 emitByte(0x6E); 2533 emitByte(0xC0 | encode); 2534 } else if (src.getRegisterCategory().equals(AMD64.XMM)) { 2535 2536 // swap src/dst to get correct prefix 2537 int encode = prefixqAndEncode(src.encoding, dst.encoding); 2538 emitByte(0x0F); 2539 emitByte(0x7E); 2540 emitByte(0xC0 | encode); 2541 } else { 2542 throw new InternalError("should not reach here"); 2543 } 2544 } 2545 2546 public final void movdqu(Register dst, AMD64Address src) { 2547 emitByte(0xF3); 2548 prefix(src, dst); 2549 emitByte(0x0F); 2550 emitByte(0x6F); 2551 emitOperandHelper(dst, src); 2552 } 2553 2554 public final void movslq(AMD64Address dst, int imm32) { 2555 prefixq(dst); 2556 emitByte(0xC7); 2557 emitOperandHelper(0, dst); 2558 emitInt(imm32); 2559 } 2560 2561 public final void movslq(Register dst, AMD64Address src) { 2562 prefixq(src, dst); 2563 emitByte(0x63); 2564 emitOperandHelper(dst, src); 2565 } 2566 2567 public final void movslq(Register dst, Register src) { 2568 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2569 emitByte(0x63); 2570 emitByte(0xC0 | encode); 2571 } 2572 2573 public final void negq(Register dst) { 2574 int encode = prefixqAndEncode(dst.encoding); 2575 emitByte(0xF7); 2576 emitByte(0xD8 | encode); 2577 } 2578 2579 public final void notq(Register dst) { 2580 int encode = prefixqAndEncode(dst.encoding); 2581 emitByte(0xF7); 2582 emitByte(0xD0 | encode); 2583 } 2584 2585 public final void orq(Register dst, int imm32) { 2586 emitArithImm32q(1, dst, imm32); 2587 } 2588 2589 public final void orq(Register dst, AMD64Address src) { 2590 prefixq(src, dst); 2591 emitByte(0x0B); 2592 emitOperandHelper(dst, src); 2593 } 2594 2595 public final void orq(Register dst, Register src) { 2596 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2597 emitByte(0x0B); 2598 emitByte(0xC0 | encode); 2599 } 2600 2601 public final void sarq(Register dst, int imm8) { 2602 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2603 int encode = prefixqAndEncode(dst.encoding); 2604 if (imm8 == 1) { 2605 emitByte(0xD1); 2606 emitByte(0xF8 | encode); 2607 } else { 2608 emitByte(0xC1); 2609 emitByte(0xF8 | encode); 2610 emitByte(imm8); 2611 } 2612 } 2613 2614 public final void sarq(Register dst) { 2615 int encode = prefixqAndEncode(dst.encoding); 2616 emitByte(0xD3); 2617 emitByte(0xF8 | encode); 2618 } 2619 2620 public final void shlq(Register dst, int imm8) { 2621 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2622 int encode = prefixqAndEncode(dst.encoding); 2623 if (imm8 == 1) { 2624 emitByte(0xD1); 2625 emitByte(0xE0 | encode); 2626 } else { 2627 emitByte(0xC1); 2628 emitByte(0xE0 | encode); 2629 emitByte(imm8); 2630 } 2631 } 2632 2633 public final void shlq(Register dst) { 2634 int encode = prefixqAndEncode(dst.encoding); 2635 emitByte(0xD3); 2636 emitByte(0xE0 | encode); 2637 } 2638 2639 public final void shrq(Register dst, int imm8) { 2640 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2641 int encode = prefixqAndEncode(dst.encoding); 2642 if (imm8 == 1) { 2643 emitByte(0xD1); 2644 emitByte(0xE8 | encode); 2645 } else { 2646 emitByte(0xC1); 2647 emitByte(0xE8 | encode); 2648 emitByte(imm8); 2649 } 2650 } 2651 2652 public final void shrq(Register dst) { 2653 int encode = prefixqAndEncode(dst.encoding); 2654 emitByte(0xD3); 2655 emitByte(0xE8 | encode); 2656 } 2657 2658 public final void subq(Register dst, int imm32) { 2659 subq(dst, imm32, false); 2660 } 2661 2662 public final void subqWide(Register dst, int imm32) { 2663 subq(dst, imm32, true); 2664 } 2665 2666 private void subq(Register dst, int imm32, boolean force32Imm) { 2667 emitArithImm32q(5, dst, imm32, force32Imm); 2668 } 2669 2670 public final void subq(Register dst, AMD64Address src) { 2671 prefixq(src, dst); 2672 emitByte(0x2B); 2673 emitOperandHelper(dst, src); 2674 } 2675 2676 public final void subq(Register dst, Register src) { 2677 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2678 emitByte(0x2B); 2679 emitByte(0xC0 | encode); 2680 } 2681 2682 public final void testq(Register dst, int imm32) { 2683 // not using emitArith because test 2684 // doesn't support sign-extension of 2685 // 8bit operands 2686 int encode = dst.encoding; 2687 if (encode == 0) { 2688 emitByte(Prefix.REXW); 2689 emitByte(0xA9); 2690 } else { 2691 encode = prefixqAndEncode(encode); 2692 emitByte(0xF7); 2693 emitByte(0xC0 | encode); 2694 } 2695 emitInt(imm32); 2696 } 2697 2698 public final void testq(Register dst, Register src) { 2699 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2700 emitByte(0x85); 2701 emitByte(0xC0 | encode); 2702 } 2703 2704 public final void testq(Register dst, AMD64Address src) { 2705 prefixq(src, dst); 2706 emitByte(0x85); 2707 emitOperandHelper(dst, src); 2708 } 2709 2710 public final void testq(AMD64Address dst, int imm32) { 2711 prefixq(dst); 2712 emitByte(0xF7); 2713 emitOperandHelper(0, dst); 2714 emitInt(imm32); 2715 } 2716 2717 public final void xaddl(AMD64Address dst, Register src) { 2718 prefix(dst, src); 2719 emitByte(0x0F); 2720 emitByte(0xC1); 2721 emitOperandHelper(src, dst); 2722 } 2723 2724 public final void xaddq(AMD64Address dst, Register src) { 2725 prefixq(dst, src); 2726 emitByte(0x0F); 2727 emitByte(0xC1); 2728 emitOperandHelper(src, dst); 2729 } 2730 2731 public final void xchgl(Register dst, AMD64Address src) { 2732 prefix(src, dst); 2733 emitByte(0x87); 2734 emitOperandHelper(dst, src); 2735 } 2736 2737 public final void xchgq(Register dst, AMD64Address src) { 2738 prefixq(src, dst); 2739 emitByte(0x87); 2740 emitOperandHelper(dst, src); 2741 } 2742 2743 public final void xorq(Register dst, int imm32) { 2744 emitArithImm32q(6, dst, imm32); 2745 } 2746 2747 public final void xorq(Register dst, Register src) { 2748 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2749 emitByte(0x33); 2750 emitByte(0xC0 | encode); 2751 } 2752 2753 public final void xorq(Register dst, AMD64Address src) { 2754 prefixq(src, dst); 2755 emitByte(0x33); 2756 emitOperandHelper(dst, src); 2757 } 2758 2759 public final void membar(int barriers) { 2760 if (target.isMP) { 2761 // We only have to handle StoreLoad 2762 if ((barriers & STORE_LOAD) != 0) { 2763 // All usable chips support "locked" instructions which suffice 2764 // as barriers, and are much faster than the alternative of 2765 // using cpuid instruction. We use here a locked add [rsp],0. 2766 // This is conveniently otherwise a no-op except for blowing 2767 // flags. 2768 // Any change to this code may need to revisit other places in 2769 // the code where this idiom is used, in particular the 2770 // orderAccess code. 2771 lock(); 2772 addl(new AMD64Address(rsp, 0), 0); // Assert the lock# signal here 2773 } 2774 } 2775 } 2776 2777 @Override 2778 protected final void patchJumpTarget(int branch, int branchTarget) { 2779 int op = getByte(branch); 2780 assert op == 0xE8 // call 2781 || 2782 op == 0x00 // jump table entry 2783 || op == 0xE9 // jmp 2784 || op == 0xEB // short jmp 2785 || (op & 0xF0) == 0x70 // short jcc 2786 || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc 2787 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op; 2788 2789 if (op == 0x00) { 2790 int offsetToJumpTableBase = getShort(branch + 1); 2791 int jumpTableBase = branch - offsetToJumpTableBase; 2792 int imm32 = branchTarget - jumpTableBase; 2793 emitInt(imm32, branch); 2794 } else if (op == 0xEB || (op & 0xF0) == 0x70) { 2795 2796 // short offset operators (jmp and jcc) 2797 final int imm8 = branchTarget - (branch + 2); 2798 /* 2799 * Since a wrongly patched short branch can potentially lead to working but really bad 2800 * behaving code we should always fail with an exception instead of having an assert. 2801 */ 2802 if (!NumUtil.isByte(imm8)) { 2803 throw new InternalError("branch displacement out of range: " + imm8); 2804 } 2805 emitByte(imm8, branch + 1); 2806 2807 } else { 2808 2809 int off = 1; 2810 if (op == 0x0F) { 2811 off = 2; 2812 } 2813 2814 int imm32 = branchTarget - (branch + 4 + off); 2815 emitInt(imm32, branch + off); 2816 } 2817 } 2818 2819 public void nullCheck(Register r) { 2820 testl(AMD64.rax, new AMD64Address(r, 0)); 2821 } 2822 2823 @Override 2824 public void align(int modulus) { 2825 if (position() % modulus != 0) { 2826 nop(modulus - (position() % modulus)); 2827 } 2828 } 2829 2830 /** 2831 * Emits a direct call instruction. Note that the actual call target is not specified, because 2832 * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is 2833 * responsible to add the call address to the appropriate patching tables. 2834 */ 2835 public final void call() { 2836 emitByte(0xE8); 2837 emitInt(0); 2838 } 2839 2840 public final void call(Register src) { 2841 int encode = prefixAndEncode(src.encoding); 2842 emitByte(0xFF); 2843 emitByte(0xD0 | encode); 2844 } 2845 2846 public final void int3() { 2847 emitByte(0xCC); 2848 } 2849 2850 private void emitx87(int b1, int b2, int i) { 2851 assert 0 <= i && i < 8 : "illegal stack offset"; 2852 emitByte(b1); 2853 emitByte(b2 + i); 2854 } 2855 2856 public final void fldd(AMD64Address src) { 2857 emitByte(0xDD); 2858 emitOperandHelper(0, src); 2859 } 2860 2861 public final void flds(AMD64Address src) { 2862 emitByte(0xD9); 2863 emitOperandHelper(0, src); 2864 } 2865 2866 public final void fldln2() { 2867 emitByte(0xD9); 2868 emitByte(0xED); 2869 } 2870 2871 public final void fldlg2() { 2872 emitByte(0xD9); 2873 emitByte(0xEC); 2874 } 2875 2876 public final void fyl2x() { 2877 emitByte(0xD9); 2878 emitByte(0xF1); 2879 } 2880 2881 public final void fstps(AMD64Address src) { 2882 emitByte(0xD9); 2883 emitOperandHelper(3, src); 2884 } 2885 2886 public final void fstpd(AMD64Address src) { 2887 emitByte(0xDD); 2888 emitOperandHelper(3, src); 2889 } 2890 2891 private void emitFPUArith(int b1, int b2, int i) { 2892 assert 0 <= i && i < 8 : "illegal FPU register: " + i; 2893 emitByte(b1); 2894 emitByte(b2 + i); 2895 } 2896 2897 public void ffree(int i) { 2898 emitFPUArith(0xDD, 0xC0, i); 2899 } 2900 2901 public void fincstp() { 2902 emitByte(0xD9); 2903 emitByte(0xF7); 2904 } 2905 2906 public void fxch(int i) { 2907 emitFPUArith(0xD9, 0xC8, i); 2908 } 2909 2910 public void fnstswAX() { 2911 emitByte(0xDF); 2912 emitByte(0xE0); 2913 } 2914 2915 public void fwait() { 2916 emitByte(0x9B); 2917 } 2918 2919 public void fprem() { 2920 emitByte(0xD9); 2921 emitByte(0xF8); 2922 } 2923 2924 public final void fsin() { 2925 emitByte(0xD9); 2926 emitByte(0xFE); 2927 } 2928 2929 public final void fcos() { 2930 emitByte(0xD9); 2931 emitByte(0xFF); 2932 } 2933 2934 public final void fptan() { 2935 emitByte(0xD9); 2936 emitByte(0xF2); 2937 } 2938 2939 public final void fstp(int i) { 2940 emitx87(0xDD, 0xD8, i); 2941 } 2942 2943 @Override 2944 public AMD64Address makeAddress(Register base, int displacement) { 2945 return new AMD64Address(base, displacement); 2946 } 2947 2948 @Override 2949 public AMD64Address getPlaceholder() { 2950 return Placeholder; 2951 } 2952 2953 private void prefetchPrefix(AMD64Address src) { 2954 prefix(src); 2955 emitByte(0x0F); 2956 } 2957 2958 public void prefetchnta(AMD64Address src) { 2959 prefetchPrefix(src); 2960 emitByte(0x18); 2961 emitOperandHelper(0, src); 2962 } 2963 2964 void prefetchr(AMD64Address src) { 2965 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 2966 prefetchPrefix(src); 2967 emitByte(0x0D); 2968 emitOperandHelper(0, src); 2969 } 2970 2971 public void prefetcht0(AMD64Address src) { 2972 assert supports(CPUFeature.SSE); 2973 prefetchPrefix(src); 2974 emitByte(0x18); 2975 emitOperandHelper(1, src); 2976 } 2977 2978 public void prefetcht1(AMD64Address src) { 2979 assert supports(CPUFeature.SSE); 2980 prefetchPrefix(src); 2981 emitByte(0x18); 2982 emitOperandHelper(2, src); 2983 } 2984 2985 public void prefetcht2(AMD64Address src) { 2986 assert supports(CPUFeature.SSE); 2987 prefix(src); 2988 emitByte(0x0f); 2989 emitByte(0x18); 2990 emitOperandHelper(3, src); 2991 } 2992 2993 public void prefetchw(AMD64Address src) { 2994 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 2995 prefix(src); 2996 emitByte(0x0f); 2997 emitByte(0x0D); 2998 emitOperandHelper(1, src); 2999 } 3000 3001 }