1 /* 2 * Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.asm.amd64; 26 27 import static jdk.vm.ci.amd64.AMD64.CPU; 28 import static jdk.vm.ci.amd64.AMD64.MASK; 29 import static jdk.vm.ci.amd64.AMD64.XMM; 30 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD; 31 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop; 32 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop; 33 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD; 34 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND; 35 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP; 36 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR; 37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB; 38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB; 39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR; 40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC; 41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC; 42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG; 43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT; 44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0; 45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0; 46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.BYTE; 47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.DWORD; 48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PD; 49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PS; 50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.QWORD; 51 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SD; 52 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SS; 53 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.WORD; 54 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128; 55 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256; 56 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ; 57 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F; 58 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38; 59 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A; 60 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_; 61 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66; 62 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2; 63 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3; 64 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0; 65 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1; 66 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG; 67 import static org.graalvm.compiler.core.common.NumUtil.isByte; 68 import static org.graalvm.compiler.core.common.NumUtil.isInt; 69 import static org.graalvm.compiler.core.common.NumUtil.isShiftCount; 70 import static org.graalvm.compiler.core.common.NumUtil.isUByte; 71 72 import java.util.EnumSet; 73 74 import org.graalvm.compiler.asm.Label; 75 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 76 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize; 77 import org.graalvm.compiler.core.common.NumUtil; 78 import org.graalvm.compiler.core.common.calc.Condition; 79 import org.graalvm.compiler.debug.GraalError; 80 81 import jdk.vm.ci.amd64.AMD64; 82 import jdk.vm.ci.amd64.AMD64.CPUFeature; 83 import jdk.vm.ci.code.Register; 84 import jdk.vm.ci.code.Register.RegisterCategory; 85 import jdk.vm.ci.code.TargetDescription; 86 87 /** 88 * This class implements an assembler that can encode most X86 instructions. 89 */ 90 public class AMD64Assembler extends AMD64BaseAssembler { 91 92 /** 93 * Constructs an assembler for the AMD64 architecture. 94 */ 95 public AMD64Assembler(TargetDescription target) { 96 super(target); 97 } 98 99 /** 100 * The x86 condition codes used for conditional jumps/moves. 101 */ 102 public enum ConditionFlag { 103 Zero(0x4, "|zero|"), 104 NotZero(0x5, "|nzero|"), 105 Equal(0x4, "="), 106 NotEqual(0x5, "!="), 107 Less(0xc, "<"), 108 LessEqual(0xe, "<="), 109 Greater(0xf, ">"), 110 GreaterEqual(0xd, ">="), 111 Below(0x2, "|<|"), 112 BelowEqual(0x6, "|<=|"), 113 Above(0x7, "|>|"), 114 AboveEqual(0x3, "|>=|"), 115 Overflow(0x0, "|of|"), 116 NoOverflow(0x1, "|nof|"), 117 CarrySet(0x2, "|carry|"), 118 CarryClear(0x3, "|ncarry|"), 119 Negative(0x8, "|neg|"), 120 Positive(0x9, "|pos|"), 121 Parity(0xa, "|par|"), 122 NoParity(0xb, "|npar|"); 123 124 private final int value; 125 private final String operator; 126 127 ConditionFlag(int value, String operator) { 128 this.value = value; 129 this.operator = operator; 130 } 131 132 public ConditionFlag negate() { 133 switch (this) { 134 case Zero: 135 return NotZero; 136 case NotZero: 137 return Zero; 138 case Equal: 139 return NotEqual; 140 case NotEqual: 141 return Equal; 142 case Less: 143 return GreaterEqual; 144 case LessEqual: 145 return Greater; 146 case Greater: 147 return LessEqual; 148 case GreaterEqual: 149 return Less; 150 case Below: 151 return AboveEqual; 152 case BelowEqual: 153 return Above; 154 case Above: 155 return BelowEqual; 156 case AboveEqual: 157 return Below; 158 case Overflow: 159 return NoOverflow; 160 case NoOverflow: 161 return Overflow; 162 case CarrySet: 163 return CarryClear; 164 case CarryClear: 165 return CarrySet; 166 case Negative: 167 return Positive; 168 case Positive: 169 return Negative; 170 case Parity: 171 return NoParity; 172 case NoParity: 173 return Parity; 174 } 175 throw new IllegalArgumentException(); 176 } 177 178 public int getValue() { 179 return value; 180 } 181 182 @Override 183 public String toString() { 184 return operator; 185 } 186 } 187 188 /** 189 * Operand size and register type constraints. 190 */ 191 private enum OpAssertion { 192 ByteAssertion(CPU, CPU, BYTE), 193 ByteOrLargerAssertion(CPU, CPU, BYTE, WORD, DWORD, QWORD), 194 WordOrLargerAssertion(CPU, CPU, WORD, DWORD, QWORD), 195 DwordOrLargerAssertion(CPU, CPU, DWORD, QWORD), 196 WordOrDwordAssertion(CPU, CPU, WORD, QWORD), 197 QwordAssertion(CPU, CPU, QWORD), 198 FloatAssertion(XMM, XMM, SS, SD, PS, PD), 199 PackedFloatAssertion(XMM, XMM, PS, PD), 200 SingleAssertion(XMM, XMM, SS), 201 DoubleAssertion(XMM, XMM, SD), 202 PackedDoubleAssertion(XMM, XMM, PD), 203 IntToFloatAssertion(XMM, CPU, DWORD, QWORD), 204 FloatToIntAssertion(CPU, XMM, DWORD, QWORD); 205 206 private final RegisterCategory resultCategory; 207 private final RegisterCategory inputCategory; 208 private final OperandSize[] allowedSizes; 209 210 OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) { 211 this.resultCategory = resultCategory; 212 this.inputCategory = inputCategory; 213 this.allowedSizes = allowedSizes; 214 } 215 216 protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) { 217 assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op; 218 assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op; 219 220 for (OperandSize s : allowedSizes) { 221 if (size == s) { 222 return true; 223 } 224 } 225 226 assert false : "invalid operand size " + size + " used in " + op; 227 return false; 228 } 229 230 } 231 232 protected static final int P_0F = 0x0F; 233 protected static final int P_0F38 = 0x380F; 234 protected static final int P_0F3A = 0x3A0F; 235 236 /** 237 * Base class for AMD64 opcodes. 238 */ 239 public static class AMD64Op { 240 241 private final String opcode; 242 243 protected final int prefix1; 244 protected final int prefix2; 245 protected final int op; 246 247 private final boolean dstIsByte; 248 private final boolean srcIsByte; 249 250 private final OpAssertion assertion; 251 private final CPUFeature feature; 252 253 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 254 this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature); 255 } 256 257 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 258 this.opcode = opcode; 259 this.prefix1 = prefix1; 260 this.prefix2 = prefix2; 261 this.op = op; 262 263 this.dstIsByte = dstIsByte; 264 this.srcIsByte = srcIsByte; 265 266 this.assertion = assertion; 267 this.feature = feature; 268 } 269 270 protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) { 271 if (prefix1 != 0) { 272 asm.emitByte(prefix1); 273 } 274 if (size.getSizePrefix() != 0) { 275 asm.emitByte(size.getSizePrefix()); 276 } 277 int rexPrefix = 0x40 | rxb; 278 if (size == QWORD) { 279 rexPrefix |= 0x08; 280 } 281 if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) { 282 asm.emitByte(rexPrefix); 283 } 284 if (prefix2 > 0xFF) { 285 asm.emitShort(prefix2); 286 } else if (prefix2 > 0) { 287 asm.emitByte(prefix2); 288 } 289 asm.emitByte(op); 290 } 291 292 protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) { 293 assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode); 294 assert assertion.checkOperands(this, size, resultReg, inputReg); 295 return true; 296 } 297 298 public OperandSize[] getAllowedSizes() { 299 return assertion.allowedSizes; 300 } 301 302 protected final boolean isSSEInstruction() { 303 if (feature == null) { 304 return false; 305 } 306 switch (feature) { 307 case SSE: 308 case SSE2: 309 case SSE3: 310 case SSSE3: 311 case SSE4A: 312 case SSE4_1: 313 case SSE4_2: 314 return true; 315 default: 316 return false; 317 } 318 } 319 320 public final OpAssertion getAssertion() { 321 return assertion; 322 } 323 324 @Override 325 public String toString() { 326 return opcode; 327 } 328 } 329 330 /** 331 * Base class for AMD64 opcodes with immediate operands. 332 */ 333 public static class AMD64ImmOp extends AMD64Op { 334 335 private final boolean immIsByte; 336 337 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) { 338 this(opcode, immIsByte, prefix, op, assertion, null); 339 } 340 341 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 342 super(opcode, 0, prefix, op, assertion, feature); 343 this.immIsByte = immIsByte; 344 } 345 346 protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) { 347 if (immIsByte) { 348 assert imm == (byte) imm; 349 asm.emitByte(imm); 350 } else { 351 size.emitImmediate(asm, imm); 352 } 353 } 354 355 protected final int immediateSize(OperandSize size) { 356 if (immIsByte) { 357 return 1; 358 } else { 359 return size.getBytes(); 360 } 361 } 362 } 363 364 /** 365 * Opcode with operand order of either RM or MR for 2 address forms. 366 */ 367 public abstract static class AMD64RROp extends AMD64Op { 368 369 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 370 super(opcode, prefix1, prefix2, op, assertion, feature); 371 } 372 373 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 374 super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature); 375 } 376 377 public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src); 378 } 379 380 /** 381 * Opcode with operand order of RM. 382 */ 383 public static class AMD64RMOp extends AMD64RROp { 384 // @formatter:off 385 public static final AMD64RMOp IMUL = new AMD64RMOp("IMUL", P_0F, 0xAF, OpAssertion.ByteOrLargerAssertion); 386 public static final AMD64RMOp BSF = new AMD64RMOp("BSF", P_0F, 0xBC); 387 public static final AMD64RMOp BSR = new AMD64RMOp("BSR", P_0F, 0xBD); 388 // POPCNT, TZCNT, and LZCNT support word operation. However, the legacy size prefix should 389 // be emitted before the mandatory prefix 0xF3. Since we are not emitting bit count for 390 // 16-bit operands, here we simply use DwordOrLargerAssertion. 391 public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, OpAssertion.DwordOrLargerAssertion, CPUFeature.POPCNT); 392 public static final AMD64RMOp TZCNT = new AMD64RMOp("TZCNT", 0xF3, P_0F, 0xBC, OpAssertion.DwordOrLargerAssertion, CPUFeature.BMI1); 393 public static final AMD64RMOp LZCNT = new AMD64RMOp("LZCNT", 0xF3, P_0F, 0xBD, OpAssertion.DwordOrLargerAssertion, CPUFeature.LZCNT); 394 public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB", P_0F, 0xB6, false, true, OpAssertion.WordOrLargerAssertion); 395 public static final AMD64RMOp MOVZX = new AMD64RMOp("MOVZX", P_0F, 0xB7, OpAssertion.DwordOrLargerAssertion); 396 public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB", P_0F, 0xBE, false, true, OpAssertion.WordOrLargerAssertion); 397 public static final AMD64RMOp MOVSX = new AMD64RMOp("MOVSX", P_0F, 0xBF, OpAssertion.DwordOrLargerAssertion); 398 public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD", 0x63, OpAssertion.QwordAssertion); 399 public static final AMD64RMOp MOVB = new AMD64RMOp("MOVB", 0x8A, OpAssertion.ByteAssertion); 400 public static final AMD64RMOp MOV = new AMD64RMOp("MOV", 0x8B); 401 public static final AMD64RMOp CMP = new AMD64RMOp("CMP", 0x3B); 402 403 // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix 404 public static final AMD64RMOp MOVD = new AMD64RMOp("MOVD", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 405 public static final AMD64RMOp MOVQ = new AMD64RMOp("MOVQ", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 406 public static final AMD64RMOp MOVSS = new AMD64RMOp("MOVSS", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE); 407 public static final AMD64RMOp MOVSD = new AMD64RMOp("MOVSD", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE); 408 409 // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient. 410 public static final AMD64RMOp TESTB = new AMD64RMOp("TEST", 0x84, OpAssertion.ByteAssertion); 411 public static final AMD64RMOp TEST = new AMD64RMOp("TEST", 0x85); 412 // @formatter:on 413 414 protected AMD64RMOp(String opcode, int op) { 415 this(opcode, 0, op); 416 } 417 418 protected AMD64RMOp(String opcode, int op, OpAssertion assertion) { 419 this(opcode, 0, op, assertion); 420 } 421 422 protected AMD64RMOp(String opcode, int prefix, int op) { 423 this(opcode, 0, prefix, op, null); 424 } 425 426 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) { 427 this(opcode, 0, prefix, op, assertion, null); 428 } 429 430 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 431 this(opcode, 0, prefix, op, assertion, feature); 432 } 433 434 protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) { 435 super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null); 436 } 437 438 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) { 439 this(opcode, prefix1, prefix2, op, OpAssertion.WordOrLargerAssertion, feature); 440 } 441 442 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 443 super(opcode, prefix1, prefix2, op, assertion, feature); 444 } 445 446 @Override 447 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 448 assert verify(asm, size, dst, src); 449 if (isSSEInstruction()) { 450 Register nds = Register.None; 451 switch (op) { 452 case 0x10: 453 case 0x51: 454 if ((size == SS) || (size == SD)) { 455 nds = dst; 456 } 457 break; 458 case 0x2A: 459 case 0x54: 460 case 0x55: 461 case 0x56: 462 case 0x57: 463 case 0x58: 464 case 0x59: 465 case 0x5A: 466 case 0x5C: 467 case 0x5D: 468 case 0x5E: 469 case 0x5F: 470 nds = dst; 471 break; 472 default: 473 break; 474 } 475 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, size == QWORD); 476 asm.emitByte(op); 477 asm.emitModRM(dst, src); 478 } else { 479 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 480 asm.emitModRM(dst, src); 481 } 482 } 483 484 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) { 485 assert verify(asm, size, dst, null); 486 if (isSSEInstruction()) { 487 Register nds = Register.None; 488 switch (op) { 489 case 0x51: 490 if ((size == SS) || (size == SD)) { 491 nds = dst; 492 } 493 break; 494 case 0x2A: 495 case 0x54: 496 case 0x55: 497 case 0x56: 498 case 0x57: 499 case 0x58: 500 case 0x59: 501 case 0x5A: 502 case 0x5C: 503 case 0x5D: 504 case 0x5E: 505 case 0x5F: 506 nds = dst; 507 break; 508 default: 509 break; 510 } 511 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, size == QWORD); 512 asm.emitByte(op); 513 asm.emitOperandHelper(dst, src, 0); 514 } else { 515 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 516 asm.emitOperandHelper(dst, src, 0); 517 } 518 } 519 } 520 521 /** 522 * Opcode with operand order of MR. 523 */ 524 public static class AMD64MROp extends AMD64RROp { 525 // @formatter:off 526 public static final AMD64MROp MOVB = new AMD64MROp("MOVB", 0x88, OpAssertion.ByteAssertion); 527 public static final AMD64MROp MOV = new AMD64MROp("MOV", 0x89); 528 529 // MOVD and MOVQ are the same opcode, just with different operand size prefix 530 // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used. 531 public static final AMD64MROp MOVD = new AMD64MROp("MOVD", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 532 public static final AMD64MROp MOVQ = new AMD64MROp("MOVQ", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 533 534 // MOVSS and MOVSD are the same opcode, just with different operand size prefix 535 public static final AMD64MROp MOVSS = new AMD64MROp("MOVSS", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE); 536 public static final AMD64MROp MOVSD = new AMD64MROp("MOVSD", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE); 537 // @formatter:on 538 539 protected AMD64MROp(String opcode, int op) { 540 this(opcode, 0, op); 541 } 542 543 protected AMD64MROp(String opcode, int op, OpAssertion assertion) { 544 this(opcode, 0, op, assertion); 545 } 546 547 protected AMD64MROp(String opcode, int prefix, int op) { 548 this(opcode, prefix, op, OpAssertion.WordOrLargerAssertion); 549 } 550 551 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) { 552 this(opcode, prefix, op, assertion, null); 553 } 554 555 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 556 this(opcode, 0, prefix, op, assertion, feature); 557 } 558 559 protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 560 super(opcode, prefix1, prefix2, op, assertion, feature); 561 } 562 563 @Override 564 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 565 assert verify(asm, size, src, dst); 566 if (isSSEInstruction()) { 567 Register nds = Register.None; 568 switch (op) { 569 case 0x11: 570 if ((size == SS) || (size == SD)) { 571 nds = src; 572 } 573 break; 574 default: 575 break; 576 } 577 asm.simdPrefix(src, nds, dst, size, prefix1, prefix2, size == QWORD); 578 asm.emitByte(op); 579 asm.emitModRM(src, dst); 580 } else { 581 emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding); 582 asm.emitModRM(src, dst); 583 } 584 } 585 586 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) { 587 assert verify(asm, size, src, null); 588 if (isSSEInstruction()) { 589 asm.simdPrefix(src, Register.None, dst, size, prefix1, prefix2, size == QWORD); 590 asm.emitByte(op); 591 } else { 592 emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0); 593 } 594 asm.emitOperandHelper(src, dst, 0); 595 } 596 } 597 598 /** 599 * Opcodes with operand order of M. 600 */ 601 public static class AMD64MOp extends AMD64Op { 602 // @formatter:off 603 public static final AMD64MOp NOT = new AMD64MOp("NOT", 0xF7, 2); 604 public static final AMD64MOp NEG = new AMD64MOp("NEG", 0xF7, 3); 605 public static final AMD64MOp MUL = new AMD64MOp("MUL", 0xF7, 4); 606 public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5); 607 public static final AMD64MOp DIV = new AMD64MOp("DIV", 0xF7, 6); 608 public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7); 609 public static final AMD64MOp INC = new AMD64MOp("INC", 0xFF, 0); 610 public static final AMD64MOp DEC = new AMD64MOp("DEC", 0xFF, 1); 611 public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6); 612 public static final AMD64MOp POP = new AMD64MOp("POP", 0x8F, 0, OpAssertion.WordOrDwordAssertion); 613 // @formatter:on 614 615 private final int ext; 616 617 protected AMD64MOp(String opcode, int op, int ext) { 618 this(opcode, 0, op, ext); 619 } 620 621 protected AMD64MOp(String opcode, int prefix, int op, int ext) { 622 this(opcode, prefix, op, ext, OpAssertion.WordOrLargerAssertion); 623 } 624 625 protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) { 626 this(opcode, 0, op, ext, assertion); 627 } 628 629 protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) { 630 super(opcode, 0, prefix, op, assertion, null); 631 this.ext = ext; 632 } 633 634 public final void emit(AMD64Assembler asm, OperandSize size, Register dst) { 635 assert verify(asm, size, dst, null); 636 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 637 asm.emitModRM(ext, dst); 638 } 639 640 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) { 641 assert verify(asm, size, null, null); 642 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 643 asm.emitOperandHelper(ext, dst, 0); 644 } 645 } 646 647 /** 648 * Opcodes with operand order of MI. 649 */ 650 public static class AMD64MIOp extends AMD64ImmOp { 651 // @formatter:off 652 public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true, 0xC6, 0, OpAssertion.ByteAssertion); 653 public static final AMD64MIOp MOV = new AMD64MIOp("MOV", false, 0xC7, 0); 654 public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0); 655 // @formatter:on 656 657 private final int ext; 658 659 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) { 660 this(opcode, immIsByte, op, ext, OpAssertion.WordOrLargerAssertion); 661 } 662 663 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) { 664 this(opcode, immIsByte, 0, op, ext, assertion); 665 } 666 667 protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) { 668 super(opcode, immIsByte, prefix, op, assertion); 669 this.ext = ext; 670 } 671 672 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) { 673 emit(asm, size, dst, imm, false); 674 } 675 676 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm, boolean annotateImm) { 677 assert verify(asm, size, dst, null); 678 int insnPos = asm.position(); 679 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 680 asm.emitModRM(ext, dst); 681 int immPos = asm.position(); 682 emitImmediate(asm, size, imm); 683 int nextInsnPos = asm.position(); 684 if (annotateImm && asm.codePatchingAnnotationConsumer != null) { 685 asm.codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 686 } 687 } 688 689 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) { 690 emit(asm, size, dst, imm, false); 691 } 692 693 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm, boolean annotateImm) { 694 assert verify(asm, size, null, null); 695 int insnPos = asm.position(); 696 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 697 asm.emitOperandHelper(ext, dst, immediateSize(size)); 698 int immPos = asm.position(); 699 emitImmediate(asm, size, imm); 700 int nextInsnPos = asm.position(); 701 if (annotateImm && asm.codePatchingAnnotationConsumer != null) { 702 asm.codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 703 } 704 } 705 } 706 707 /** 708 * Opcodes with operand order of RMI. 709 * 710 * We only have one form of round as the operation is always treated with single variant input, 711 * making its extension to 3 address forms redundant. 712 */ 713 public static class AMD64RMIOp extends AMD64ImmOp { 714 // @formatter:off 715 public static final AMD64RMIOp IMUL = new AMD64RMIOp("IMUL", false, 0x69); 716 public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true, 0x6B); 717 public static final AMD64RMIOp ROUNDSS = new AMD64RMIOp("ROUNDSS", true, P_0F3A, 0x0A, OpAssertion.PackedDoubleAssertion, CPUFeature.SSE4_1); 718 public static final AMD64RMIOp ROUNDSD = new AMD64RMIOp("ROUNDSD", true, P_0F3A, 0x0B, OpAssertion.PackedDoubleAssertion, CPUFeature.SSE4_1); 719 // @formatter:on 720 721 protected AMD64RMIOp(String opcode, boolean immIsByte, int op) { 722 this(opcode, immIsByte, 0, op, OpAssertion.WordOrLargerAssertion, null); 723 } 724 725 protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 726 super(opcode, immIsByte, prefix, op, assertion, feature); 727 } 728 729 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) { 730 assert verify(asm, size, dst, src); 731 if (isSSEInstruction()) { 732 Register nds = Register.None; 733 switch (op) { 734 case 0x0A: 735 case 0x0B: 736 nds = dst; 737 break; 738 default: 739 break; 740 } 741 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, false); 742 asm.emitByte(op); 743 asm.emitModRM(dst, src); 744 } else { 745 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 746 asm.emitModRM(dst, src); 747 } 748 emitImmediate(asm, size, imm); 749 } 750 751 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) { 752 assert verify(asm, size, dst, null); 753 if (isSSEInstruction()) { 754 Register nds = Register.None; 755 switch (op) { 756 case 0x0A: 757 case 0x0B: 758 nds = dst; 759 break; 760 default: 761 break; 762 } 763 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, false); 764 asm.emitByte(op); 765 } else { 766 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 767 } 768 asm.emitOperandHelper(dst, src, immediateSize(size)); 769 emitImmediate(asm, size, imm); 770 } 771 } 772 773 public static class SSEOp extends AMD64RMOp { 774 // @formatter:off 775 public static final SSEOp CVTSI2SS = new SSEOp("CVTSI2SS", 0xF3, P_0F, 0x2A, OpAssertion.IntToFloatAssertion); 776 public static final SSEOp CVTSI2SD = new SSEOp("CVTSI2SD", 0xF2, P_0F, 0x2A, OpAssertion.IntToFloatAssertion); 777 public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatToIntAssertion); 778 public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatToIntAssertion); 779 public static final SSEOp UCOMIS = new SSEOp("UCOMIS", P_0F, 0x2E, OpAssertion.PackedFloatAssertion); 780 public static final SSEOp SQRT = new SSEOp("SQRT", P_0F, 0x51); 781 public static final SSEOp AND = new SSEOp("AND", P_0F, 0x54, OpAssertion.PackedFloatAssertion); 782 public static final SSEOp ANDN = new SSEOp("ANDN", P_0F, 0x55, OpAssertion.PackedFloatAssertion); 783 public static final SSEOp OR = new SSEOp("OR", P_0F, 0x56, OpAssertion.PackedFloatAssertion); 784 public static final SSEOp XOR = new SSEOp("XOR", P_0F, 0x57, OpAssertion.PackedFloatAssertion); 785 public static final SSEOp ADD = new SSEOp("ADD", P_0F, 0x58); 786 public static final SSEOp MUL = new SSEOp("MUL", P_0F, 0x59); 787 public static final SSEOp CVTSS2SD = new SSEOp("CVTSS2SD", P_0F, 0x5A, OpAssertion.SingleAssertion); 788 public static final SSEOp CVTSD2SS = new SSEOp("CVTSD2SS", P_0F, 0x5A, OpAssertion.DoubleAssertion); 789 public static final SSEOp SUB = new SSEOp("SUB", P_0F, 0x5C); 790 public static final SSEOp MIN = new SSEOp("MIN", P_0F, 0x5D); 791 public static final SSEOp DIV = new SSEOp("DIV", P_0F, 0x5E); 792 public static final SSEOp MAX = new SSEOp("MAX", P_0F, 0x5F); 793 // @formatter:on 794 795 protected SSEOp(String opcode, int prefix, int op) { 796 this(opcode, prefix, op, OpAssertion.FloatAssertion); 797 } 798 799 protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) { 800 this(opcode, 0, prefix, op, assertion); 801 } 802 803 protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) { 804 super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2); 805 } 806 } 807 808 /** 809 * Arithmetic operation with operand order of RM, MR or MI. 810 */ 811 public static final class AMD64BinaryArithmetic { 812 // @formatter:off 813 public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0); 814 public static final AMD64BinaryArithmetic OR = new AMD64BinaryArithmetic("OR", 1); 815 public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2); 816 public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3); 817 public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4); 818 public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5); 819 public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6); 820 public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7); 821 // @formatter:on 822 823 private final AMD64MIOp byteImmOp; 824 private final AMD64MROp byteMrOp; 825 private final AMD64RMOp byteRmOp; 826 827 private final AMD64MIOp immOp; 828 private final AMD64MIOp immSxOp; 829 private final AMD64MROp mrOp; 830 private final AMD64RMOp rmOp; 831 832 private AMD64BinaryArithmetic(String opcode, int code) { 833 int baseOp = code << 3; 834 835 byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion); 836 byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion); 837 byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion); 838 839 immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.WordOrLargerAssertion); 840 immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.WordOrLargerAssertion); 841 mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.WordOrLargerAssertion); 842 rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.WordOrLargerAssertion); 843 } 844 845 public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) { 846 if (size == BYTE) { 847 return byteImmOp; 848 } else if (sx) { 849 return immSxOp; 850 } else { 851 return immOp; 852 } 853 } 854 855 public AMD64MROp getMROpcode(OperandSize size) { 856 if (size == BYTE) { 857 return byteMrOp; 858 } else { 859 return mrOp; 860 } 861 } 862 863 public AMD64RMOp getRMOpcode(OperandSize size) { 864 if (size == BYTE) { 865 return byteRmOp; 866 } else { 867 return rmOp; 868 } 869 } 870 } 871 872 /** 873 * Shift operation with operand order of M1, MC or MI. 874 */ 875 public static final class AMD64Shift { 876 // @formatter:off 877 public static final AMD64Shift ROL = new AMD64Shift("ROL", 0); 878 public static final AMD64Shift ROR = new AMD64Shift("ROR", 1); 879 public static final AMD64Shift RCL = new AMD64Shift("RCL", 2); 880 public static final AMD64Shift RCR = new AMD64Shift("RCR", 3); 881 public static final AMD64Shift SHL = new AMD64Shift("SHL", 4); 882 public static final AMD64Shift SHR = new AMD64Shift("SHR", 5); 883 public static final AMD64Shift SAR = new AMD64Shift("SAR", 7); 884 // @formatter:on 885 886 public final AMD64MOp m1Op; 887 public final AMD64MOp mcOp; 888 public final AMD64MIOp miOp; 889 890 private AMD64Shift(String opcode, int code) { 891 m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.WordOrLargerAssertion); 892 mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.WordOrLargerAssertion); 893 miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.WordOrLargerAssertion); 894 } 895 } 896 897 private enum VEXOpAssertion { 898 AVX1(CPUFeature.AVX, CPUFeature.AVX), 899 AVX1_2(CPUFeature.AVX, CPUFeature.AVX2), 900 AVX2(CPUFeature.AVX2, CPUFeature.AVX2), 901 AVX1_128ONLY(CPUFeature.AVX, null), 902 AVX1_256ONLY(null, CPUFeature.AVX), 903 AVX2_256ONLY(null, CPUFeature.AVX2), 904 XMM_CPU(CPUFeature.AVX, null, XMM, null, CPU, null), 905 XMM_XMM_CPU(CPUFeature.AVX, null, XMM, XMM, CPU, null), 906 CPU_XMM(CPUFeature.AVX, null, CPU, null, XMM, null), 907 AVX1_2_CPU_XMM(CPUFeature.AVX, CPUFeature.AVX2, CPU, null, XMM, null), 908 BMI1(CPUFeature.BMI1, null, CPU, CPU, CPU, null), 909 BMI2(CPUFeature.BMI2, null, CPU, CPU, CPU, null); 910 911 private final CPUFeature l128feature; 912 private final CPUFeature l256feature; 913 914 private final RegisterCategory rCategory; 915 private final RegisterCategory vCategory; 916 private final RegisterCategory mCategory; 917 private final RegisterCategory imm8Category; 918 919 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature) { 920 this(l128feature, l256feature, XMM, XMM, XMM, XMM); 921 } 922 923 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature, RegisterCategory rCategory, RegisterCategory vCategory, RegisterCategory mCategory, RegisterCategory imm8Category) { 924 this.l128feature = l128feature; 925 this.l256feature = l256feature; 926 this.rCategory = rCategory; 927 this.vCategory = vCategory; 928 this.mCategory = mCategory; 929 this.imm8Category = imm8Category; 930 } 931 932 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m) { 933 return check(arch, getLFlag(size), r, v, m, null); 934 } 935 936 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m, Register imm8) { 937 return check(arch, getLFlag(size), r, v, m, imm8); 938 } 939 940 public boolean check(AMD64 arch, int l, Register r, Register v, Register m, Register imm8) { 941 switch (l) { 942 case L128: 943 assert l128feature != null && arch.getFeatures().contains(l128feature) : "emitting illegal 128 bit instruction"; 944 break; 945 case L256: 946 assert l256feature != null && arch.getFeatures().contains(l256feature) : "emitting illegal 256 bit instruction"; 947 break; 948 } 949 if (r != null) { 950 assert r.getRegisterCategory().equals(rCategory); 951 } 952 if (v != null) { 953 assert v.getRegisterCategory().equals(vCategory); 954 } 955 if (m != null) { 956 assert m.getRegisterCategory().equals(mCategory); 957 } 958 if (imm8 != null) { 959 assert imm8.getRegisterCategory().equals(imm8Category); 960 } 961 return true; 962 } 963 964 public boolean supports(EnumSet<CPUFeature> features, AVXSize avxSize) { 965 switch (avxSize) { 966 case XMM: 967 return l128feature != null && features.contains(l128feature); 968 case YMM: 969 return l256feature != null && features.contains(l256feature); 970 default: 971 throw GraalError.shouldNotReachHere(); 972 } 973 } 974 } 975 976 /** 977 * Base class for VEX-encoded instructions. 978 */ 979 public static class VexOp { 980 protected final int pp; 981 protected final int mmmmm; 982 protected final int w; 983 protected final int op; 984 985 private final String opcode; 986 protected final VEXOpAssertion assertion; 987 988 protected VexOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 989 this.pp = pp; 990 this.mmmmm = mmmmm; 991 this.w = w; 992 this.op = op; 993 this.opcode = opcode; 994 this.assertion = assertion; 995 } 996 997 public final boolean isSupported(AMD64Assembler vasm, AVXSize size) { 998 return assertion.supports(((AMD64) vasm.target.arch).getFeatures(), size); 999 } 1000 1001 @Override 1002 public String toString() { 1003 return opcode; 1004 } 1005 } 1006 1007 /** 1008 * VEX-encoded instructions with an operand order of RM, but the M operand must be a register. 1009 */ 1010 public static class VexRROp extends VexOp { 1011 // @formatter:off 1012 public static final VexRROp VMASKMOVDQU = new VexRROp("VMASKMOVDQU", P_66, M_0F, WIG, 0xF7, VEXOpAssertion.AVX1_128ONLY); 1013 // @formatter:on 1014 1015 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op) { 1016 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1017 } 1018 1019 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1020 super(opcode, pp, mmmmm, w, op, assertion); 1021 } 1022 1023 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1024 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); 1025 assert op != 0x1A || op != 0x5A; 1026 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w); 1027 asm.emitByte(op); 1028 asm.emitModRM(dst, src); 1029 } 1030 } 1031 1032 /** 1033 * VEX-encoded instructions with an operand order of RM. 1034 */ 1035 public static class VexRMOp extends VexRROp { 1036 // @formatter:off 1037 public static final VexRMOp VCVTTSS2SI = new VexRMOp("VCVTTSS2SI", P_F3, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM); 1038 public static final VexRMOp VCVTTSS2SQ = new VexRMOp("VCVTTSS2SQ", P_F3, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM); 1039 public static final VexRMOp VCVTTSD2SI = new VexRMOp("VCVTTSD2SI", P_F2, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM); 1040 public static final VexRMOp VCVTTSD2SQ = new VexRMOp("VCVTTSD2SQ", P_F2, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM); 1041 public static final VexRMOp VCVTPS2PD = new VexRMOp("VCVTPS2PD", P_, M_0F, WIG, 0x5A); 1042 public static final VexRMOp VCVTPD2PS = new VexRMOp("VCVTPD2PS", P_66, M_0F, WIG, 0x5A); 1043 public static final VexRMOp VCVTDQ2PS = new VexRMOp("VCVTDQ2PS", P_, M_0F, WIG, 0x5B); 1044 public static final VexRMOp VCVTTPS2DQ = new VexRMOp("VCVTTPS2DQ", P_F3, M_0F, WIG, 0x5B); 1045 public static final VexRMOp VCVTTPD2DQ = new VexRMOp("VCVTTPD2DQ", P_66, M_0F, WIG, 0xE6); 1046 public static final VexRMOp VCVTDQ2PD = new VexRMOp("VCVTDQ2PD", P_F3, M_0F, WIG, 0xE6); 1047 public static final VexRMOp VBROADCASTSS = new VexRMOp("VBROADCASTSS", P_66, M_0F38, W0, 0x18); 1048 public static final VexRMOp VBROADCASTSD = new VexRMOp("VBROADCASTSD", P_66, M_0F38, W0, 0x19, VEXOpAssertion.AVX1_256ONLY); 1049 public static final VexRMOp VBROADCASTF128 = new VexRMOp("VBROADCASTF128", P_66, M_0F38, W0, 0x1A, VEXOpAssertion.AVX1_256ONLY); 1050 public static final VexRMOp VPBROADCASTI128 = new VexRMOp("VPBROADCASTI128", P_66, M_0F38, W0, 0x5A, VEXOpAssertion.AVX2_256ONLY); 1051 public static final VexRMOp VPBROADCASTB = new VexRMOp("VPBROADCASTB", P_66, M_0F38, W0, 0x78, VEXOpAssertion.AVX2); 1052 public static final VexRMOp VPBROADCASTW = new VexRMOp("VPBROADCASTW", P_66, M_0F38, W0, 0x79, VEXOpAssertion.AVX2); 1053 public static final VexRMOp VPBROADCASTD = new VexRMOp("VPBROADCASTD", P_66, M_0F38, W0, 0x58, VEXOpAssertion.AVX2); 1054 public static final VexRMOp VPBROADCASTQ = new VexRMOp("VPBROADCASTQ", P_66, M_0F38, W0, 0x59, VEXOpAssertion.AVX2); 1055 public static final VexRMOp VPMOVMSKB = new VexRMOp("VPMOVMSKB", P_66, M_0F, WIG, 0xD7, VEXOpAssertion.AVX1_2_CPU_XMM); 1056 public static final VexRMOp VPMOVSXBW = new VexRMOp("VPMOVSXBW", P_66, M_0F38, WIG, 0x20); 1057 public static final VexRMOp VPMOVSXBD = new VexRMOp("VPMOVSXBD", P_66, M_0F38, WIG, 0x21); 1058 public static final VexRMOp VPMOVSXBQ = new VexRMOp("VPMOVSXBQ", P_66, M_0F38, WIG, 0x22); 1059 public static final VexRMOp VPMOVSXWD = new VexRMOp("VPMOVSXWD", P_66, M_0F38, WIG, 0x23); 1060 public static final VexRMOp VPMOVSXWQ = new VexRMOp("VPMOVSXWQ", P_66, M_0F38, WIG, 0x24); 1061 public static final VexRMOp VPMOVSXDQ = new VexRMOp("VPMOVSXDQ", P_66, M_0F38, WIG, 0x25); 1062 public static final VexRMOp VPMOVZXBW = new VexRMOp("VPMOVZXBW", P_66, M_0F38, WIG, 0x30); 1063 public static final VexRMOp VPMOVZXBD = new VexRMOp("VPMOVZXBD", P_66, M_0F38, WIG, 0x31); 1064 public static final VexRMOp VPMOVZXBQ = new VexRMOp("VPMOVZXBQ", P_66, M_0F38, WIG, 0x32); 1065 public static final VexRMOp VPMOVZXWD = new VexRMOp("VPMOVZXWD", P_66, M_0F38, WIG, 0x33); 1066 public static final VexRMOp VPMOVZXWQ = new VexRMOp("VPMOVZXWQ", P_66, M_0F38, WIG, 0x34); 1067 public static final VexRMOp VPMOVZXDQ = new VexRMOp("VPMOVZXDQ", P_66, M_0F38, WIG, 0x35); 1068 public static final VexRMOp VPTEST = new VexRMOp("VPTEST", P_66, M_0F38, WIG, 0x17); 1069 public static final VexRMOp VSQRTPD = new VexRMOp("VSQRTPD", P_66, M_0F, WIG, 0x51); 1070 public static final VexRMOp VSQRTPS = new VexRMOp("VSQRTPS", P_, M_0F, WIG, 0x51); 1071 public static final VexRMOp VSQRTSD = new VexRMOp("VSQRTSD", P_F2, M_0F, WIG, 0x51); 1072 public static final VexRMOp VSQRTSS = new VexRMOp("VSQRTSS", P_F3, M_0F, WIG, 0x51); 1073 public static final VexRMOp VUCOMISS = new VexRMOp("VUCOMISS", P_, M_0F, WIG, 0x2E); 1074 public static final VexRMOp VUCOMISD = new VexRMOp("VUCOMISD", P_66, M_0F, WIG, 0x2E); 1075 // @formatter:on 1076 1077 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op) { 1078 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1079 } 1080 1081 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1082 super(opcode, pp, mmmmm, w, op, assertion); 1083 } 1084 1085 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) { 1086 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1087 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w); 1088 asm.emitByte(op); 1089 asm.emitOperandHelper(dst, src, 0); 1090 } 1091 } 1092 1093 /** 1094 * VEX-encoded move instructions. 1095 * <p> 1096 * These instructions have two opcodes: op is the forward move instruction with an operand order 1097 * of RM, and opReverse is the reverse move instruction with an operand order of MR. 1098 */ 1099 public static final class VexMoveOp extends VexRMOp { 1100 // @formatter:off 1101 public static final VexMoveOp VMOVDQA = new VexMoveOp("VMOVDQA", P_66, M_0F, WIG, 0x6F, 0x7F); 1102 public static final VexMoveOp VMOVDQU = new VexMoveOp("VMOVDQU", P_F3, M_0F, WIG, 0x6F, 0x7F); 1103 public static final VexMoveOp VMOVAPS = new VexMoveOp("VMOVAPS", P_, M_0F, WIG, 0x28, 0x29); 1104 public static final VexMoveOp VMOVAPD = new VexMoveOp("VMOVAPD", P_66, M_0F, WIG, 0x28, 0x29); 1105 public static final VexMoveOp VMOVUPS = new VexMoveOp("VMOVUPS", P_, M_0F, WIG, 0x10, 0x11); 1106 public static final VexMoveOp VMOVUPD = new VexMoveOp("VMOVUPD", P_66, M_0F, WIG, 0x10, 0x11); 1107 public static final VexMoveOp VMOVSS = new VexMoveOp("VMOVSS", P_F3, M_0F, WIG, 0x10, 0x11); 1108 public static final VexMoveOp VMOVSD = new VexMoveOp("VMOVSD", P_F2, M_0F, WIG, 0x10, 0x11); 1109 public static final VexMoveOp VMOVD = new VexMoveOp("VMOVD", P_66, M_0F, W0, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU); 1110 public static final VexMoveOp VMOVQ = new VexMoveOp("VMOVQ", P_66, M_0F, W1, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU); 1111 // @formatter:on 1112 1113 private final int opReverse; 1114 1115 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) { 1116 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1); 1117 } 1118 1119 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) { 1120 super(opcode, pp, mmmmm, w, op, assertion); 1121 this.opReverse = opReverse; 1122 } 1123 1124 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src) { 1125 assert assertion.check((AMD64) asm.target.arch, size, src, null, null); 1126 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w); 1127 asm.emitByte(opReverse); 1128 asm.emitOperandHelper(src, dst, 0); 1129 } 1130 1131 public void emitReverse(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1132 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); 1133 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w); 1134 asm.emitByte(opReverse); 1135 asm.emitModRM(src, dst); 1136 } 1137 } 1138 1139 public interface VexRRIOp { 1140 void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8); 1141 } 1142 1143 /** 1144 * VEX-encoded instructions with an operand order of RMI. 1145 */ 1146 public static final class VexRMIOp extends VexOp implements VexRRIOp { 1147 // @formatter:off 1148 public static final VexRMIOp VPERMQ = new VexRMIOp("VPERMQ", P_66, M_0F3A, W1, 0x00, VEXOpAssertion.AVX2_256ONLY); 1149 public static final VexRMIOp VPSHUFLW = new VexRMIOp("VPSHUFLW", P_F2, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1150 public static final VexRMIOp VPSHUFHW = new VexRMIOp("VPSHUFHW", P_F3, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1151 public static final VexRMIOp VPSHUFD = new VexRMIOp("VPSHUFD", P_66, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1152 // @formatter:on 1153 1154 private VexRMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1155 super(opcode, pp, mmmmm, w, op, assertion); 1156 } 1157 1158 @Override 1159 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1160 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); 1161 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w); 1162 asm.emitByte(op); 1163 asm.emitModRM(dst, src); 1164 asm.emitByte(imm8); 1165 } 1166 1167 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src, int imm8) { 1168 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1169 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w); 1170 asm.emitByte(op); 1171 asm.emitOperandHelper(dst, src, 1); 1172 asm.emitByte(imm8); 1173 } 1174 } 1175 1176 /** 1177 * VEX-encoded instructions with an operand order of MRI. 1178 */ 1179 public static final class VexMRIOp extends VexOp implements VexRRIOp { 1180 // @formatter:off 1181 public static final VexMRIOp VEXTRACTF128 = new VexMRIOp("VEXTRACTF128", P_66, M_0F3A, W0, 0x19, VEXOpAssertion.AVX1_256ONLY); 1182 public static final VexMRIOp VEXTRACTI128 = new VexMRIOp("VEXTRACTI128", P_66, M_0F3A, W0, 0x39, VEXOpAssertion.AVX2_256ONLY); 1183 public static final VexMRIOp VPEXTRB = new VexMRIOp("VPEXTRB", P_66, M_0F3A, W0, 0x14, VEXOpAssertion.XMM_CPU); 1184 public static final VexMRIOp VPEXTRW = new VexMRIOp("VPEXTRW", P_66, M_0F3A, W0, 0x15, VEXOpAssertion.XMM_CPU); 1185 public static final VexMRIOp VPEXTRD = new VexMRIOp("VPEXTRD", P_66, M_0F3A, W0, 0x16, VEXOpAssertion.XMM_CPU); 1186 public static final VexMRIOp VPEXTRQ = new VexMRIOp("VPEXTRQ", P_66, M_0F3A, W1, 0x16, VEXOpAssertion.XMM_CPU); 1187 // @formatter:on 1188 1189 private VexMRIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1190 super(opcode, pp, mmmmm, w, op, assertion); 1191 } 1192 1193 @Override 1194 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1195 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); 1196 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w); 1197 asm.emitByte(op); 1198 asm.emitModRM(src, dst); 1199 asm.emitByte(imm8); 1200 } 1201 1202 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src, int imm8) { 1203 assert assertion.check((AMD64) asm.target.arch, size, src, null, null); 1204 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w); 1205 asm.emitByte(op); 1206 asm.emitOperandHelper(src, dst, 1); 1207 asm.emitByte(imm8); 1208 } 1209 } 1210 1211 /** 1212 * VEX-encoded instructions with an operand order of RVMR. 1213 */ 1214 public static class VexRVMROp extends VexOp { 1215 // @formatter:off 1216 public static final VexRVMROp VPBLENDVB = new VexRVMROp("VPBLENDVB", P_66, M_0F3A, W0, 0x4C, VEXOpAssertion.AVX1_2); 1217 public static final VexRVMROp VPBLENDVPS = new VexRVMROp("VPBLENDVPS", P_66, M_0F3A, W0, 0x4A, VEXOpAssertion.AVX1); 1218 public static final VexRVMROp VPBLENDVPD = new VexRVMROp("VPBLENDVPD", P_66, M_0F3A, W0, 0x4B, VEXOpAssertion.AVX1); 1219 // @formatter:on 1220 1221 protected VexRVMROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1222 super(opcode, pp, mmmmm, w, op, assertion); 1223 } 1224 1225 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, Register src2) { 1226 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, src2); 1227 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w); 1228 asm.emitByte(op); 1229 asm.emitModRM(dst, src2); 1230 asm.emitByte(mask.encoding() << 4); 1231 } 1232 1233 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, AMD64Address src2) { 1234 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, null); 1235 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w); 1236 asm.emitByte(op); 1237 asm.emitOperandHelper(dst, src2, 0); 1238 asm.emitByte(mask.encoding() << 4); 1239 } 1240 } 1241 1242 /** 1243 * VEX-encoded instructions with an operand order of RVM. 1244 */ 1245 public static class VexRVMOp extends VexOp { 1246 // @formatter:off 1247 public static final VexRVMOp VANDPS = new VexRVMOp("VANDPS", P_, M_0F, WIG, 0x54); 1248 public static final VexRVMOp VANDPD = new VexRVMOp("VANDPD", P_66, M_0F, WIG, 0x54); 1249 public static final VexRVMOp VANDNPS = new VexRVMOp("VANDNPS", P_, M_0F, WIG, 0x55); 1250 public static final VexRVMOp VANDNPD = new VexRVMOp("VANDNPD", P_66, M_0F, WIG, 0x55); 1251 public static final VexRVMOp VORPS = new VexRVMOp("VORPS", P_, M_0F, WIG, 0x56); 1252 public static final VexRVMOp VORPD = new VexRVMOp("VORPD", P_66, M_0F, WIG, 0x56); 1253 public static final VexRVMOp VXORPS = new VexRVMOp("VXORPS", P_, M_0F, WIG, 0x57); 1254 public static final VexRVMOp VXORPD = new VexRVMOp("VXORPD", P_66, M_0F, WIG, 0x57); 1255 public static final VexRVMOp VADDPS = new VexRVMOp("VADDPS", P_, M_0F, WIG, 0x58); 1256 public static final VexRVMOp VADDPD = new VexRVMOp("VADDPD", P_66, M_0F, WIG, 0x58); 1257 public static final VexRVMOp VADDSS = new VexRVMOp("VADDSS", P_F3, M_0F, WIG, 0x58); 1258 public static final VexRVMOp VADDSD = new VexRVMOp("VADDSD", P_F2, M_0F, WIG, 0x58); 1259 public static final VexRVMOp VMULPS = new VexRVMOp("VMULPS", P_, M_0F, WIG, 0x59); 1260 public static final VexRVMOp VMULPD = new VexRVMOp("VMULPD", P_66, M_0F, WIG, 0x59); 1261 public static final VexRVMOp VMULSS = new VexRVMOp("VMULSS", P_F3, M_0F, WIG, 0x59); 1262 public static final VexRVMOp VMULSD = new VexRVMOp("VMULSD", P_F2, M_0F, WIG, 0x59); 1263 public static final VexRVMOp VSUBPS = new VexRVMOp("VSUBPS", P_, M_0F, WIG, 0x5C); 1264 public static final VexRVMOp VSUBPD = new VexRVMOp("VSUBPD", P_66, M_0F, WIG, 0x5C); 1265 public static final VexRVMOp VSUBSS = new VexRVMOp("VSUBSS", P_F3, M_0F, WIG, 0x5C); 1266 public static final VexRVMOp VSUBSD = new VexRVMOp("VSUBSD", P_F2, M_0F, WIG, 0x5C); 1267 public static final VexRVMOp VMINPS = new VexRVMOp("VMINPS", P_, M_0F, WIG, 0x5D); 1268 public static final VexRVMOp VMINPD = new VexRVMOp("VMINPD", P_66, M_0F, WIG, 0x5D); 1269 public static final VexRVMOp VMINSS = new VexRVMOp("VMINSS", P_F3, M_0F, WIG, 0x5D); 1270 public static final VexRVMOp VMINSD = new VexRVMOp("VMINSD", P_F2, M_0F, WIG, 0x5D); 1271 public static final VexRVMOp VDIVPS = new VexRVMOp("VDIVPS", P_, M_0F, WIG, 0x5E); 1272 public static final VexRVMOp VDIVPD = new VexRVMOp("VDIVPD", P_66, M_0F, WIG, 0x5E); 1273 public static final VexRVMOp VDIVSS = new VexRVMOp("VDIVPS", P_F3, M_0F, WIG, 0x5E); 1274 public static final VexRVMOp VDIVSD = new VexRVMOp("VDIVPD", P_F2, M_0F, WIG, 0x5E); 1275 public static final VexRVMOp VMAXPS = new VexRVMOp("VMAXPS", P_, M_0F, WIG, 0x5F); 1276 public static final VexRVMOp VMAXPD = new VexRVMOp("VMAXPD", P_66, M_0F, WIG, 0x5F); 1277 public static final VexRVMOp VMAXSS = new VexRVMOp("VMAXSS", P_F3, M_0F, WIG, 0x5F); 1278 public static final VexRVMOp VMAXSD = new VexRVMOp("VMAXSD", P_F2, M_0F, WIG, 0x5F); 1279 public static final VexRVMOp VADDSUBPS = new VexRVMOp("VADDSUBPS", P_F2, M_0F, WIG, 0xD0); 1280 public static final VexRVMOp VADDSUBPD = new VexRVMOp("VADDSUBPD", P_66, M_0F, WIG, 0xD0); 1281 public static final VexRVMOp VPAND = new VexRVMOp("VPAND", P_66, M_0F, WIG, 0xDB, VEXOpAssertion.AVX1_2); 1282 public static final VexRVMOp VPOR = new VexRVMOp("VPOR", P_66, M_0F, WIG, 0xEB, VEXOpAssertion.AVX1_2); 1283 public static final VexRVMOp VPXOR = new VexRVMOp("VPXOR", P_66, M_0F, WIG, 0xEF, VEXOpAssertion.AVX1_2); 1284 public static final VexRVMOp VPADDB = new VexRVMOp("VPADDB", P_66, M_0F, WIG, 0xFC, VEXOpAssertion.AVX1_2); 1285 public static final VexRVMOp VPADDW = new VexRVMOp("VPADDW", P_66, M_0F, WIG, 0xFD, VEXOpAssertion.AVX1_2); 1286 public static final VexRVMOp VPADDD = new VexRVMOp("VPADDD", P_66, M_0F, WIG, 0xFE, VEXOpAssertion.AVX1_2); 1287 public static final VexRVMOp VPADDQ = new VexRVMOp("VPADDQ", P_66, M_0F, WIG, 0xD4, VEXOpAssertion.AVX1_2); 1288 public static final VexRVMOp VPMULHUW = new VexRVMOp("VPMULHUW", P_66, M_0F, WIG, 0xE4, VEXOpAssertion.AVX1_2); 1289 public static final VexRVMOp VPMULHW = new VexRVMOp("VPMULHW", P_66, M_0F, WIG, 0xE5, VEXOpAssertion.AVX1_2); 1290 public static final VexRVMOp VPMULLW = new VexRVMOp("VPMULLW", P_66, M_0F, WIG, 0xD5, VEXOpAssertion.AVX1_2); 1291 public static final VexRVMOp VPMULLD = new VexRVMOp("VPMULLD", P_66, M_0F38, WIG, 0x40, VEXOpAssertion.AVX1_2); 1292 public static final VexRVMOp VPSUBB = new VexRVMOp("VPSUBB", P_66, M_0F, WIG, 0xF8, VEXOpAssertion.AVX1_2); 1293 public static final VexRVMOp VPSUBW = new VexRVMOp("VPSUBW", P_66, M_0F, WIG, 0xF9, VEXOpAssertion.AVX1_2); 1294 public static final VexRVMOp VPSUBD = new VexRVMOp("VPSUBD", P_66, M_0F, WIG, 0xFA, VEXOpAssertion.AVX1_2); 1295 public static final VexRVMOp VPSUBQ = new VexRVMOp("VPSUBQ", P_66, M_0F, WIG, 0xFB, VEXOpAssertion.AVX1_2); 1296 public static final VexRVMOp VPSHUFB = new VexRVMOp("VPSHUFB", P_66, M_0F38, WIG, 0x00, VEXOpAssertion.AVX1_2); 1297 public static final VexRVMOp VCVTSD2SS = new VexRVMOp("VCVTSD2SS", P_F2, M_0F, WIG, 0x5A); 1298 public static final VexRVMOp VCVTSS2SD = new VexRVMOp("VCVTSS2SD", P_F3, M_0F, WIG, 0x5A); 1299 public static final VexRVMOp VCVTSI2SD = new VexRVMOp("VCVTSI2SD", P_F2, M_0F, W0, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1300 public static final VexRVMOp VCVTSQ2SD = new VexRVMOp("VCVTSQ2SD", P_F2, M_0F, W1, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1301 public static final VexRVMOp VCVTSI2SS = new VexRVMOp("VCVTSI2SS", P_F3, M_0F, W0, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1302 public static final VexRVMOp VCVTSQ2SS = new VexRVMOp("VCVTSQ2SS", P_F3, M_0F, W1, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1303 public static final VexRVMOp VPCMPEQB = new VexRVMOp("VPCMPEQB", P_66, M_0F, WIG, 0x74, VEXOpAssertion.AVX1_2); 1304 public static final VexRVMOp VPCMPEQW = new VexRVMOp("VPCMPEQW", P_66, M_0F, WIG, 0x75, VEXOpAssertion.AVX1_2); 1305 public static final VexRVMOp VPCMPEQD = new VexRVMOp("VPCMPEQD", P_66, M_0F, WIG, 0x76, VEXOpAssertion.AVX1_2); 1306 public static final VexRVMOp VPCMPEQQ = new VexRVMOp("VPCMPEQQ", P_66, M_0F38, WIG, 0x29, VEXOpAssertion.AVX1_2); 1307 public static final VexRVMOp VPCMPGTB = new VexRVMOp("VPCMPGTB", P_66, M_0F, WIG, 0x64, VEXOpAssertion.AVX1_2); 1308 public static final VexRVMOp VPCMPGTW = new VexRVMOp("VPCMPGTW", P_66, M_0F, WIG, 0x65, VEXOpAssertion.AVX1_2); 1309 public static final VexRVMOp VPCMPGTD = new VexRVMOp("VPCMPGTD", P_66, M_0F, WIG, 0x66, VEXOpAssertion.AVX1_2); 1310 public static final VexRVMOp VPCMPGTQ = new VexRVMOp("VPCMPGTQ", P_66, M_0F38, WIG, 0x37, VEXOpAssertion.AVX1_2); 1311 // @formatter:on 1312 1313 private VexRVMOp(String opcode, int pp, int mmmmm, int w, int op) { 1314 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1315 } 1316 1317 protected VexRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1318 super(opcode, pp, mmmmm, w, op, assertion); 1319 } 1320 1321 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1322 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1323 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w); 1324 asm.emitByte(op); 1325 asm.emitModRM(dst, src2); 1326 } 1327 1328 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { 1329 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1330 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w); 1331 asm.emitByte(op); 1332 asm.emitOperandHelper(dst, src2, 0); 1333 } 1334 } 1335 1336 public static final class VexGeneralPurposeRVMOp extends VexOp { 1337 // @formatter:off 1338 public static final VexGeneralPurposeRVMOp ANDN = new VexGeneralPurposeRVMOp("ANDN", P_, M_0F38, WIG, 0xF2, VEXOpAssertion.BMI1); 1339 public static final VexGeneralPurposeRVMOp MULX = new VexGeneralPurposeRVMOp("MULX", P_F2, M_0F38, WIG, 0xF6, VEXOpAssertion.BMI2); 1340 public static final VexGeneralPurposeRVMOp PDEP = new VexGeneralPurposeRVMOp("PDEP", P_F2, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1341 public static final VexGeneralPurposeRVMOp PEXT = new VexGeneralPurposeRVMOp("PEXT", P_F3, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1342 // @formatter:on 1343 1344 private VexGeneralPurposeRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1345 super(opcode, pp, mmmmm, w, op, assertion); 1346 } 1347 1348 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1349 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, src2, null); 1350 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1351 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1); 1352 asm.emitByte(op); 1353 asm.emitModRM(dst, src2); 1354 } 1355 1356 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { 1357 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, null, null); 1358 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1359 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1); 1360 asm.emitByte(op); 1361 asm.emitOperandHelper(dst, src2, 0); 1362 } 1363 } 1364 1365 public static final class VexGeneralPurposeRMVOp extends VexOp { 1366 // @formatter:off 1367 public static final VexGeneralPurposeRMVOp BEXTR = new VexGeneralPurposeRMVOp("BEXTR", P_, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI1); 1368 public static final VexGeneralPurposeRMVOp BZHI = new VexGeneralPurposeRMVOp("BZHI", P_, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1369 public static final VexGeneralPurposeRMVOp SARX = new VexGeneralPurposeRMVOp("SARX", P_F3, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1370 public static final VexGeneralPurposeRMVOp SHRX = new VexGeneralPurposeRMVOp("SHRX", P_F2, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1371 public static final VexGeneralPurposeRMVOp SHLX = new VexGeneralPurposeRMVOp("SHLX", P_66, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1372 // @formatter:on 1373 1374 private VexGeneralPurposeRMVOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1375 super(opcode, pp, mmmmm, w, op, assertion); 1376 } 1377 1378 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1379 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, src1, null); 1380 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1381 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1); 1382 asm.emitByte(op); 1383 asm.emitModRM(dst, src1); 1384 } 1385 1386 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src1, Register src2) { 1387 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, null, null); 1388 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1389 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1); 1390 asm.emitByte(op); 1391 asm.emitOperandHelper(dst, src1, 0); 1392 } 1393 } 1394 1395 /** 1396 * VEX-encoded shift instructions with an operand order of either RVM or VMI. 1397 */ 1398 public static final class VexShiftOp extends VexRVMOp implements VexRRIOp { 1399 // @formatter:off 1400 public static final VexShiftOp VPSRLW = new VexShiftOp("VPSRLW", P_66, M_0F, WIG, 0xD1, 0x71, 2); 1401 public static final VexShiftOp VPSRLD = new VexShiftOp("VPSRLD", P_66, M_0F, WIG, 0xD2, 0x72, 2); 1402 public static final VexShiftOp VPSRLQ = new VexShiftOp("VPSRLQ", P_66, M_0F, WIG, 0xD3, 0x73, 2); 1403 public static final VexShiftOp VPSRAW = new VexShiftOp("VPSRAW", P_66, M_0F, WIG, 0xE1, 0x71, 4); 1404 public static final VexShiftOp VPSRAD = new VexShiftOp("VPSRAD", P_66, M_0F, WIG, 0xE2, 0x72, 4); 1405 public static final VexShiftOp VPSLLW = new VexShiftOp("VPSLLW", P_66, M_0F, WIG, 0xF1, 0x71, 6); 1406 public static final VexShiftOp VPSLLD = new VexShiftOp("VPSLLD", P_66, M_0F, WIG, 0xF2, 0x72, 6); 1407 public static final VexShiftOp VPSLLQ = new VexShiftOp("VPSLLQ", P_66, M_0F, WIG, 0xF3, 0x73, 6); 1408 // @formatter:on 1409 1410 private final int immOp; 1411 private final int r; 1412 1413 private VexShiftOp(String opcode, int pp, int mmmmm, int w, int op, int immOp, int r) { 1414 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1_2); 1415 this.immOp = immOp; 1416 this.r = r; 1417 } 1418 1419 @Override 1420 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1421 assert assertion.check((AMD64) asm.target.arch, size, null, dst, src); 1422 asm.vexPrefix(null, dst, src, size, pp, mmmmm, w); 1423 asm.emitByte(immOp); 1424 asm.emitModRM(r, src); 1425 asm.emitByte(imm8); 1426 } 1427 } 1428 1429 public static final class VexMaskMoveOp extends VexOp { 1430 // @formatter:off 1431 public static final VexMaskMoveOp VMASKMOVPS = new VexMaskMoveOp("VMASKMOVPS", P_66, M_0F38, W0, 0x2C, 0x2E); 1432 public static final VexMaskMoveOp VMASKMOVPD = new VexMaskMoveOp("VMASKMOVPD", P_66, M_0F38, W0, 0x2D, 0x2F); 1433 public static final VexMaskMoveOp VPMASKMOVD = new VexMaskMoveOp("VPMASKMOVD", P_66, M_0F38, W0, 0x8C, 0x8E, VEXOpAssertion.AVX2); 1434 public static final VexMaskMoveOp VPMASKMOVQ = new VexMaskMoveOp("VPMASKMOVQ", P_66, M_0F38, W1, 0x8C, 0x8E, VEXOpAssertion.AVX2); 1435 // @formatter:on 1436 1437 private final int opReverse; 1438 1439 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) { 1440 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1); 1441 } 1442 1443 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) { 1444 super(opcode, pp, mmmmm, w, op, assertion); 1445 this.opReverse = opReverse; 1446 } 1447 1448 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, AMD64Address src) { 1449 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, null); 1450 asm.vexPrefix(dst, mask, src, size, pp, mmmmm, w); 1451 asm.emitByte(op); 1452 asm.emitOperandHelper(dst, src, 0); 1453 } 1454 1455 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register mask, Register src) { 1456 assert assertion.check((AMD64) asm.target.arch, size, src, mask, null); 1457 asm.vexPrefix(src, mask, dst, size, pp, mmmmm, w); 1458 asm.emitByte(opReverse); 1459 asm.emitOperandHelper(src, dst, 0); 1460 } 1461 } 1462 1463 /** 1464 * VEX-encoded instructions with an operand order of RVMI. 1465 */ 1466 public static final class VexRVMIOp extends VexOp { 1467 // @formatter:off 1468 public static final VexRVMIOp VSHUFPS = new VexRVMIOp("VSHUFPS", P_, M_0F, WIG, 0xC6); 1469 public static final VexRVMIOp VSHUFPD = new VexRVMIOp("VSHUFPD", P_66, M_0F, WIG, 0xC6); 1470 public static final VexRVMIOp VINSERTF128 = new VexRVMIOp("VINSERTF128", P_66, M_0F3A, W0, 0x18, VEXOpAssertion.AVX1_256ONLY); 1471 public static final VexRVMIOp VINSERTI128 = new VexRVMIOp("VINSERTI128", P_66, M_0F3A, W0, 0x38, VEXOpAssertion.AVX2_256ONLY); 1472 // @formatter:on 1473 1474 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op) { 1475 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1476 } 1477 1478 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1479 super(opcode, pp, mmmmm, w, op, assertion); 1480 } 1481 1482 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, int imm8) { 1483 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1484 assert (imm8 & 0xFF) == imm8; 1485 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w); 1486 asm.emitByte(op); 1487 asm.emitModRM(dst, src2); 1488 asm.emitByte(imm8); 1489 } 1490 1491 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, int imm8) { 1492 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1493 assert (imm8 & 0xFF) == imm8; 1494 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w); 1495 asm.emitByte(op); 1496 asm.emitOperandHelper(dst, src2, 1); 1497 asm.emitByte(imm8); 1498 } 1499 } 1500 1501 /** 1502 * VEX-encoded comparison operation with an operand order of RVMI. The immediate operand is a 1503 * comparison operator. 1504 */ 1505 public static final class VexFloatCompareOp extends VexOp { 1506 // @formatter:off 1507 public static final VexFloatCompareOp VCMPPS = new VexFloatCompareOp("VCMPPS", P_, M_0F, WIG, 0xC2); 1508 public static final VexFloatCompareOp VCMPPD = new VexFloatCompareOp("VCMPPD", P_66, M_0F, WIG, 0xC2); 1509 public static final VexFloatCompareOp VCMPSS = new VexFloatCompareOp("VCMPSS", P_F2, M_0F, WIG, 0xC2); 1510 public static final VexFloatCompareOp VCMPSD = new VexFloatCompareOp("VCMPSD", P_F2, M_0F, WIG, 0xC2); 1511 // @formatter:on 1512 1513 public enum Predicate { 1514 EQ_OQ(0x00), 1515 LT_OS(0x01), 1516 LE_OS(0x02), 1517 UNORD_Q(0x03), 1518 NEQ_UQ(0x04), 1519 NLT_US(0x05), 1520 NLE_US(0x06), 1521 ORD_Q(0x07), 1522 EQ_UQ(0x08), 1523 NGE_US(0x09), 1524 NGT_US(0x0a), 1525 FALSE_OQ(0x0b), 1526 NEQ_OQ(0x0c), 1527 GE_OS(0x0d), 1528 GT_OS(0x0e), 1529 TRUE_UQ(0x0f), 1530 EQ_OS(0x10), 1531 LT_OQ(0x11), 1532 LE_OQ(0x12), 1533 UNORD_S(0x13), 1534 NEQ_US(0x14), 1535 NLT_UQ(0x15), 1536 NLE_UQ(0x16), 1537 ORD_S(0x17), 1538 EQ_US(0x18), 1539 NGE_UQ(0x19), 1540 NGT_UQ(0x1a), 1541 FALSE_OS(0x1b), 1542 NEQ_OS(0x1c), 1543 GE_OQ(0x1d), 1544 GT_OQ(0x1e), 1545 TRUE_US(0x1f); 1546 1547 private int imm8; 1548 1549 Predicate(int imm8) { 1550 this.imm8 = imm8; 1551 } 1552 1553 public static Predicate getPredicate(Condition condition, boolean unorderedIsTrue) { 1554 if (unorderedIsTrue) { 1555 switch (condition) { 1556 case EQ: 1557 return EQ_UQ; 1558 case NE: 1559 return NEQ_UQ; 1560 case LT: 1561 return NGE_UQ; 1562 case LE: 1563 return NGT_UQ; 1564 case GT: 1565 return NLE_UQ; 1566 case GE: 1567 return NLT_UQ; 1568 default: 1569 throw GraalError.shouldNotReachHere(); 1570 } 1571 } else { 1572 switch (condition) { 1573 case EQ: 1574 return EQ_OQ; 1575 case NE: 1576 return NEQ_OQ; 1577 case LT: 1578 return LT_OQ; 1579 case LE: 1580 return LE_OQ; 1581 case GT: 1582 return GT_OQ; 1583 case GE: 1584 return GE_OQ; 1585 default: 1586 throw GraalError.shouldNotReachHere(); 1587 } 1588 } 1589 } 1590 } 1591 1592 private VexFloatCompareOp(String opcode, int pp, int mmmmm, int w, int op) { 1593 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1594 } 1595 1596 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, Predicate p) { 1597 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1598 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w); 1599 asm.emitByte(op); 1600 asm.emitModRM(dst, src2); 1601 asm.emitByte(p.imm8); 1602 } 1603 1604 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, Predicate p) { 1605 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1606 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w); 1607 asm.emitByte(op); 1608 asm.emitOperandHelper(dst, src2, 1); 1609 asm.emitByte(p.imm8); 1610 } 1611 } 1612 1613 public final void addl(AMD64Address dst, int imm32) { 1614 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1615 } 1616 1617 public final void addl(Register dst, int imm32) { 1618 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1619 } 1620 1621 public final void addl(Register dst, Register src) { 1622 ADD.rmOp.emit(this, DWORD, dst, src); 1623 } 1624 1625 public final void addpd(Register dst, Register src) { 1626 SSEOp.ADD.emit(this, PD, dst, src); 1627 } 1628 1629 public final void addpd(Register dst, AMD64Address src) { 1630 SSEOp.ADD.emit(this, PD, dst, src); 1631 } 1632 1633 public final void addsd(Register dst, Register src) { 1634 SSEOp.ADD.emit(this, SD, dst, src); 1635 } 1636 1637 public final void addsd(Register dst, AMD64Address src) { 1638 SSEOp.ADD.emit(this, SD, dst, src); 1639 } 1640 1641 private void addrNop4() { 1642 // 4 bytes: NOP DWORD PTR [EAX+0] 1643 emitByte(0x0F); 1644 emitByte(0x1F); 1645 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc); 1646 emitByte(0); // 8-bits offset (1 byte) 1647 } 1648 1649 private void addrNop5() { 1650 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 1651 emitByte(0x0F); 1652 emitByte(0x1F); 1653 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4); 1654 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1655 emitByte(0); // 8-bits offset (1 byte) 1656 } 1657 1658 private void addrNop7() { 1659 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 1660 emitByte(0x0F); 1661 emitByte(0x1F); 1662 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc); 1663 emitInt(0); // 32-bits offset (4 bytes) 1664 } 1665 1666 private void addrNop8() { 1667 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 1668 emitByte(0x0F); 1669 emitByte(0x1F); 1670 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4); 1671 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1672 emitInt(0); // 32-bits offset (4 bytes) 1673 } 1674 1675 public final void andl(Register dst, int imm32) { 1676 AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1677 } 1678 1679 public final void andl(Register dst, Register src) { 1680 AND.rmOp.emit(this, DWORD, dst, src); 1681 } 1682 1683 public final void andpd(Register dst, Register src) { 1684 SSEOp.AND.emit(this, PD, dst, src); 1685 } 1686 1687 public final void andpd(Register dst, AMD64Address src) { 1688 SSEOp.AND.emit(this, PD, dst, src); 1689 } 1690 1691 public final void bsfq(Register dst, Register src) { 1692 prefixq(dst, src); 1693 emitByte(0x0F); 1694 emitByte(0xBC); 1695 emitModRM(dst, src); 1696 } 1697 1698 public final void bsrl(Register dst, Register src) { 1699 prefix(dst, src); 1700 emitByte(0x0F); 1701 emitByte(0xBD); 1702 emitModRM(dst, src); 1703 } 1704 1705 public final void bswapl(Register reg) { 1706 prefix(reg); 1707 emitByte(0x0F); 1708 emitModRM(1, reg); 1709 } 1710 1711 public final void cdql() { 1712 emitByte(0x99); 1713 } 1714 1715 public final void cmovl(ConditionFlag cc, Register dst, Register src) { 1716 prefix(dst, src); 1717 emitByte(0x0F); 1718 emitByte(0x40 | cc.getValue()); 1719 emitModRM(dst, src); 1720 } 1721 1722 public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) { 1723 prefix(src, dst); 1724 emitByte(0x0F); 1725 emitByte(0x40 | cc.getValue()); 1726 emitOperandHelper(dst, src, 0); 1727 } 1728 1729 public final void cmpl(Register dst, int imm32) { 1730 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1731 } 1732 1733 public final void cmpl(Register dst, Register src) { 1734 CMP.rmOp.emit(this, DWORD, dst, src); 1735 } 1736 1737 public final void cmpl(Register dst, AMD64Address src) { 1738 CMP.rmOp.emit(this, DWORD, dst, src); 1739 } 1740 1741 public final void cmpl(AMD64Address dst, int imm32) { 1742 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1743 } 1744 1745 /** 1746 * The 8-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg into 1747 * adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the compared 1748 * values were equal, and cleared otherwise. 1749 */ 1750 public final void cmpxchgb(Register reg, AMD64Address adr) { // cmpxchg 1751 prefixb(adr, reg); 1752 emitByte(0x0F); 1753 emitByte(0xB0); 1754 emitOperandHelper(reg, adr, 0); 1755 } 1756 1757 /** 1758 * The 16-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg 1759 * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the 1760 * compared values were equal, and cleared otherwise. 1761 */ 1762 public final void cmpxchgw(Register reg, AMD64Address adr) { // cmpxchg 1763 emitByte(0x66); // Switch to 16-bit mode. 1764 prefix(adr, reg); 1765 emitByte(0x0F); 1766 emitByte(0xB1); 1767 emitOperandHelper(reg, adr, 0); 1768 } 1769 1770 /** 1771 * The 32-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg 1772 * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the 1773 * compared values were equal, and cleared otherwise. 1774 */ 1775 public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg 1776 prefix(adr, reg); 1777 emitByte(0x0F); 1778 emitByte(0xB1); 1779 emitOperandHelper(reg, adr, 0); 1780 } 1781 1782 public final void cvtsi2sdl(Register dst, Register src) { 1783 SSEOp.CVTSI2SD.emit(this, DWORD, dst, src); 1784 } 1785 1786 public final void cvttsd2sil(Register dst, Register src) { 1787 SSEOp.CVTTSD2SI.emit(this, DWORD, dst, src); 1788 } 1789 1790 public final void decl(AMD64Address dst) { 1791 prefix(dst); 1792 emitByte(0xFF); 1793 emitOperandHelper(1, dst, 0); 1794 } 1795 1796 public final void divsd(Register dst, Register src) { 1797 SSEOp.DIV.emit(this, SD, dst, src); 1798 } 1799 1800 public final void hlt() { 1801 emitByte(0xF4); 1802 } 1803 1804 public final void imull(Register dst, Register src, int value) { 1805 if (isByte(value)) { 1806 AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value); 1807 } else { 1808 AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value); 1809 } 1810 } 1811 1812 public final void incl(AMD64Address dst) { 1813 prefix(dst); 1814 emitByte(0xFF); 1815 emitOperandHelper(0, dst, 0); 1816 } 1817 1818 public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { 1819 int shortSize = 2; 1820 int longSize = 6; 1821 long disp = jumpTarget - position(); 1822 if (!forceDisp32 && isByte(disp - shortSize)) { 1823 // 0111 tttn #8-bit disp 1824 emitByte(0x70 | cc.getValue()); 1825 emitByte((int) ((disp - shortSize) & 0xFF)); 1826 } else { 1827 // 0000 1111 1000 tttn #32-bit disp 1828 assert isInt(disp - longSize) : "must be 32bit offset (call4)"; 1829 emitByte(0x0F); 1830 emitByte(0x80 | cc.getValue()); 1831 emitInt((int) (disp - longSize)); 1832 } 1833 } 1834 1835 public final void jcc(ConditionFlag cc, Label l) { 1836 assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc"; 1837 if (l.isBound()) { 1838 jcc(cc, l.position(), false); 1839 } else { 1840 // Note: could eliminate cond. jumps to this jump if condition 1841 // is the same however, seems to be rather unlikely case. 1842 // Note: use jccb() if label to be bound is very close to get 1843 // an 8-bit displacement 1844 l.addPatchAt(position()); 1845 emitByte(0x0F); 1846 emitByte(0x80 | cc.getValue()); 1847 emitInt(0); 1848 } 1849 1850 } 1851 1852 public final void jccb(ConditionFlag cc, Label l) { 1853 if (l.isBound()) { 1854 int shortSize = 2; 1855 int entry = l.position(); 1856 assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp"; 1857 long disp = entry - position(); 1858 // 0111 tttn #8-bit disp 1859 emitByte(0x70 | cc.getValue()); 1860 emitByte((int) ((disp - shortSize) & 0xFF)); 1861 } else { 1862 l.addPatchAt(position()); 1863 emitByte(0x70 | cc.getValue()); 1864 emitByte(0); 1865 } 1866 } 1867 1868 public final void jmp(int jumpTarget, boolean forceDisp32) { 1869 int shortSize = 2; 1870 int longSize = 5; 1871 long disp = jumpTarget - position(); 1872 if (!forceDisp32 && isByte(disp - shortSize)) { 1873 emitByte(0xEB); 1874 emitByte((int) ((disp - shortSize) & 0xFF)); 1875 } else { 1876 emitByte(0xE9); 1877 emitInt((int) (disp - longSize)); 1878 } 1879 } 1880 1881 @Override 1882 public final void jmp(Label l) { 1883 if (l.isBound()) { 1884 jmp(l.position(), false); 1885 } else { 1886 // By default, forward jumps are always 32-bit displacements, since 1887 // we can't yet know where the label will be bound. If you're sure that 1888 // the forward jump will not run beyond 256 bytes, use jmpb to 1889 // force an 8-bit displacement. 1890 1891 l.addPatchAt(position()); 1892 emitByte(0xE9); 1893 emitInt(0); 1894 } 1895 } 1896 1897 public final void jmp(Register entry) { 1898 prefix(entry); 1899 emitByte(0xFF); 1900 emitModRM(4, entry); 1901 } 1902 1903 public final void jmp(AMD64Address adr) { 1904 prefix(adr); 1905 emitByte(0xFF); 1906 emitOperandHelper(AMD64.rsp, adr, 0); 1907 } 1908 1909 public final void jmpb(Label l) { 1910 if (l.isBound()) { 1911 int shortSize = 2; 1912 int entry = l.position(); 1913 assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp"; 1914 long offs = entry - position(); 1915 emitByte(0xEB); 1916 emitByte((int) ((offs - shortSize) & 0xFF)); 1917 } else { 1918 1919 l.addPatchAt(position()); 1920 emitByte(0xEB); 1921 emitByte(0); 1922 } 1923 } 1924 1925 public final void lead(Register dst, AMD64Address src) { 1926 prefix(src, dst); 1927 emitByte(0x8D); 1928 emitOperandHelper(dst, src, 0); 1929 } 1930 1931 public final void leaq(Register dst, AMD64Address src) { 1932 prefixq(src, dst); 1933 emitByte(0x8D); 1934 emitOperandHelper(dst, src, 0); 1935 } 1936 1937 public final void leave() { 1938 emitByte(0xC9); 1939 } 1940 1941 public final void lock() { 1942 emitByte(0xF0); 1943 } 1944 1945 public final void movapd(Register dst, Register src) { 1946 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 1947 simdPrefix(dst, Register.None, src, PD, P_0F, false); 1948 emitByte(0x28); 1949 emitModRM(dst, src); 1950 } 1951 1952 public final void movaps(Register dst, Register src) { 1953 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 1954 simdPrefix(dst, Register.None, src, PS, P_0F, false); 1955 emitByte(0x28); 1956 emitModRM(dst, src); 1957 } 1958 1959 public final void movb(AMD64Address dst, int imm8) { 1960 prefix(dst); 1961 emitByte(0xC6); 1962 emitOperandHelper(0, dst, 1); 1963 emitByte(imm8); 1964 } 1965 1966 public final void movb(AMD64Address dst, Register src) { 1967 assert src.getRegisterCategory().equals(CPU) : "must have byte register"; 1968 prefixb(dst, src); 1969 emitByte(0x88); 1970 emitOperandHelper(src, dst, 0); 1971 } 1972 1973 public final void movl(Register dst, int imm32) { 1974 movl(dst, imm32, false); 1975 } 1976 1977 public final void movl(Register dst, int imm32, boolean annotateImm) { 1978 int insnPos = position(); 1979 prefix(dst); 1980 emitByte(0xB8 + encode(dst)); 1981 int immPos = position(); 1982 emitInt(imm32); 1983 int nextInsnPos = position(); 1984 if (annotateImm && codePatchingAnnotationConsumer != null) { 1985 codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 1986 } 1987 } 1988 1989 public final void movl(Register dst, Register src) { 1990 prefix(dst, src); 1991 emitByte(0x8B); 1992 emitModRM(dst, src); 1993 } 1994 1995 public final void movl(Register dst, AMD64Address src) { 1996 prefix(src, dst); 1997 emitByte(0x8B); 1998 emitOperandHelper(dst, src, 0); 1999 } 2000 2001 /** 2002 * @param wide use 4 byte encoding for displacements that would normally fit in a byte 2003 */ 2004 public final void movl(Register dst, AMD64Address src, boolean wide) { 2005 prefix(src, dst); 2006 emitByte(0x8B); 2007 emitOperandHelper(dst, src, wide, 0); 2008 } 2009 2010 public final void movl(AMD64Address dst, int imm32) { 2011 prefix(dst); 2012 emitByte(0xC7); 2013 emitOperandHelper(0, dst, 4); 2014 emitInt(imm32); 2015 } 2016 2017 public final void movl(AMD64Address dst, Register src) { 2018 prefix(dst, src); 2019 emitByte(0x89); 2020 emitOperandHelper(src, dst, 0); 2021 } 2022 2023 /** 2024 * New CPUs require use of movsd and movss to avoid partial register stall when loading from 2025 * memory. But for old Opteron use movlpd instead of movsd. The selection is done in 2026 * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and 2027 * {@link AMD64MacroAssembler#movflt(Register, Register)}. 2028 */ 2029 public final void movlpd(Register dst, AMD64Address src) { 2030 assert dst.getRegisterCategory().equals(XMM); 2031 simdPrefix(dst, dst, src, PD, P_0F, false); 2032 emitByte(0x12); 2033 emitOperandHelper(dst, src, 0); 2034 } 2035 2036 public final void movlhps(Register dst, Register src) { 2037 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2038 simdPrefix(dst, src, src, PS, P_0F, false); 2039 emitByte(0x16); 2040 emitModRM(dst, src); 2041 } 2042 2043 public final void movq(Register dst, AMD64Address src) { 2044 movq(dst, src, false); 2045 } 2046 2047 public final void movq(Register dst, AMD64Address src, boolean wide) { 2048 if (dst.getRegisterCategory().equals(XMM)) { 2049 simdPrefix(dst, Register.None, src, SS, P_0F, false); 2050 emitByte(0x7E); 2051 emitOperandHelper(dst, src, wide, 0); 2052 } else { 2053 // gpr version of movq 2054 prefixq(src, dst); 2055 emitByte(0x8B); 2056 emitOperandHelper(dst, src, wide, 0); 2057 } 2058 } 2059 2060 public final void movq(Register dst, Register src) { 2061 prefixq(dst, src); 2062 emitByte(0x8B); 2063 emitModRM(dst, src); 2064 } 2065 2066 public final void movq(AMD64Address dst, Register src) { 2067 if (src.getRegisterCategory().equals(XMM)) { 2068 simdPrefix(src, Register.None, dst, PD, P_0F, true); 2069 emitByte(0xD6); 2070 emitOperandHelper(src, dst, 0); 2071 } else { 2072 // gpr version of movq 2073 prefixq(dst, src); 2074 emitByte(0x89); 2075 emitOperandHelper(src, dst, 0); 2076 } 2077 } 2078 2079 public final void movsbl(Register dst, AMD64Address src) { 2080 prefix(src, dst); 2081 emitByte(0x0F); 2082 emitByte(0xBE); 2083 emitOperandHelper(dst, src, 0); 2084 } 2085 2086 public final void movsbl(Register dst, Register src) { 2087 prefix(dst, false, src, true); 2088 emitByte(0x0F); 2089 emitByte(0xBE); 2090 emitModRM(dst, src); 2091 } 2092 2093 public final void movsbq(Register dst, AMD64Address src) { 2094 prefixq(src, dst); 2095 emitByte(0x0F); 2096 emitByte(0xBE); 2097 emitOperandHelper(dst, src, 0); 2098 } 2099 2100 public final void movsbq(Register dst, Register src) { 2101 prefixq(dst, src); 2102 emitByte(0x0F); 2103 emitByte(0xBE); 2104 emitModRM(dst, src); 2105 } 2106 2107 public final void movsd(Register dst, Register src) { 2108 AMD64RMOp.MOVSD.emit(this, SD, dst, src); 2109 } 2110 2111 public final void movsd(Register dst, AMD64Address src) { 2112 AMD64RMOp.MOVSD.emit(this, SD, dst, src); 2113 } 2114 2115 public final void movsd(AMD64Address dst, Register src) { 2116 AMD64MROp.MOVSD.emit(this, SD, dst, src); 2117 } 2118 2119 public final void movss(Register dst, Register src) { 2120 AMD64RMOp.MOVSS.emit(this, SS, dst, src); 2121 } 2122 2123 public final void movss(Register dst, AMD64Address src) { 2124 AMD64RMOp.MOVSS.emit(this, SS, dst, src); 2125 } 2126 2127 public final void movss(AMD64Address dst, Register src) { 2128 AMD64MROp.MOVSS.emit(this, SS, dst, src); 2129 } 2130 2131 public final void mulpd(Register dst, Register src) { 2132 SSEOp.MUL.emit(this, PD, dst, src); 2133 } 2134 2135 public final void mulpd(Register dst, AMD64Address src) { 2136 SSEOp.MUL.emit(this, PD, dst, src); 2137 } 2138 2139 public final void mulsd(Register dst, Register src) { 2140 SSEOp.MUL.emit(this, SD, dst, src); 2141 } 2142 2143 public final void mulsd(Register dst, AMD64Address src) { 2144 SSEOp.MUL.emit(this, SD, dst, src); 2145 } 2146 2147 public final void mulss(Register dst, Register src) { 2148 SSEOp.MUL.emit(this, SS, dst, src); 2149 } 2150 2151 public final void movswl(Register dst, AMD64Address src) { 2152 prefix(src, dst); 2153 emitByte(0x0F); 2154 emitByte(0xBF); 2155 emitOperandHelper(dst, src, 0); 2156 } 2157 2158 public final void movw(AMD64Address dst, int imm16) { 2159 emitByte(0x66); // switch to 16-bit mode 2160 prefix(dst); 2161 emitByte(0xC7); 2162 emitOperandHelper(0, dst, 2); 2163 emitShort(imm16); 2164 } 2165 2166 public final void movw(AMD64Address dst, Register src) { 2167 emitByte(0x66); 2168 prefix(dst, src); 2169 emitByte(0x89); 2170 emitOperandHelper(src, dst, 0); 2171 } 2172 2173 public final void movzbl(Register dst, AMD64Address src) { 2174 prefix(src, dst); 2175 emitByte(0x0F); 2176 emitByte(0xB6); 2177 emitOperandHelper(dst, src, 0); 2178 } 2179 2180 public final void movzbl(Register dst, Register src) { 2181 AMD64RMOp.MOVZXB.emit(this, DWORD, dst, src); 2182 } 2183 2184 public final void movzbq(Register dst, Register src) { 2185 AMD64RMOp.MOVZXB.emit(this, QWORD, dst, src); 2186 } 2187 2188 public final void movzwl(Register dst, AMD64Address src) { 2189 prefix(src, dst); 2190 emitByte(0x0F); 2191 emitByte(0xB7); 2192 emitOperandHelper(dst, src, 0); 2193 } 2194 2195 public final void negl(Register dst) { 2196 NEG.emit(this, DWORD, dst); 2197 } 2198 2199 public final void notl(Register dst) { 2200 NOT.emit(this, DWORD, dst); 2201 } 2202 2203 public final void notq(Register dst) { 2204 NOT.emit(this, QWORD, dst); 2205 } 2206 2207 @Override 2208 public final void ensureUniquePC() { 2209 nop(); 2210 } 2211 2212 public final void nop() { 2213 nop(1); 2214 } 2215 2216 public void nop(int count) { 2217 int i = count; 2218 if (UseNormalNop) { 2219 assert i > 0 : " "; 2220 // The fancy nops aren't currently recognized by debuggers making it a 2221 // pain to disassemble code while debugging. If assert are on clearly 2222 // speed is not an issue so simply use the single byte traditional nop 2223 // to do alignment. 2224 2225 for (; i > 0; i--) { 2226 emitByte(0x90); 2227 } 2228 return; 2229 } 2230 2231 if (UseAddressNop) { 2232 // 2233 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD. 2234 // 1: 0x90 2235 // 2: 0x66 0x90 2236 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2237 // 4: 0x0F 0x1F 0x40 0x00 2238 // 5: 0x0F 0x1F 0x44 0x00 0x00 2239 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2240 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2241 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2242 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2243 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2244 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2245 2246 // The rest coding is AMD specific - use consecutive Address nops 2247 2248 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2249 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2250 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2251 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2252 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2253 // Size prefixes (0x66) are added for larger sizes 2254 2255 while (i >= 22) { 2256 i -= 11; 2257 emitByte(0x66); // size prefix 2258 emitByte(0x66); // size prefix 2259 emitByte(0x66); // size prefix 2260 addrNop8(); 2261 } 2262 // Generate first nop for size between 21-12 2263 switch (i) { 2264 case 21: 2265 i -= 11; 2266 emitByte(0x66); // size prefix 2267 emitByte(0x66); // size prefix 2268 emitByte(0x66); // size prefix 2269 addrNop8(); 2270 break; 2271 case 20: 2272 case 19: 2273 i -= 10; 2274 emitByte(0x66); // size prefix 2275 emitByte(0x66); // size prefix 2276 addrNop8(); 2277 break; 2278 case 18: 2279 case 17: 2280 i -= 9; 2281 emitByte(0x66); // size prefix 2282 addrNop8(); 2283 break; 2284 case 16: 2285 case 15: 2286 i -= 8; 2287 addrNop8(); 2288 break; 2289 case 14: 2290 case 13: 2291 i -= 7; 2292 addrNop7(); 2293 break; 2294 case 12: 2295 i -= 6; 2296 emitByte(0x66); // size prefix 2297 addrNop5(); 2298 break; 2299 default: 2300 assert i < 12; 2301 } 2302 2303 // Generate second nop for size between 11-1 2304 switch (i) { 2305 case 11: 2306 emitByte(0x66); // size prefix 2307 emitByte(0x66); // size prefix 2308 emitByte(0x66); // size prefix 2309 addrNop8(); 2310 break; 2311 case 10: 2312 emitByte(0x66); // size prefix 2313 emitByte(0x66); // size prefix 2314 addrNop8(); 2315 break; 2316 case 9: 2317 emitByte(0x66); // size prefix 2318 addrNop8(); 2319 break; 2320 case 8: 2321 addrNop8(); 2322 break; 2323 case 7: 2324 addrNop7(); 2325 break; 2326 case 6: 2327 emitByte(0x66); // size prefix 2328 addrNop5(); 2329 break; 2330 case 5: 2331 addrNop5(); 2332 break; 2333 case 4: 2334 addrNop4(); 2335 break; 2336 case 3: 2337 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2338 emitByte(0x66); // size prefix 2339 emitByte(0x66); // size prefix 2340 emitByte(0x90); // nop 2341 break; 2342 case 2: 2343 emitByte(0x66); // size prefix 2344 emitByte(0x90); // nop 2345 break; 2346 case 1: 2347 emitByte(0x90); // nop 2348 break; 2349 default: 2350 assert i == 0; 2351 } 2352 return; 2353 } 2354 2355 // Using nops with size prefixes "0x66 0x90". 2356 // From AMD Optimization Guide: 2357 // 1: 0x90 2358 // 2: 0x66 0x90 2359 // 3: 0x66 0x66 0x90 2360 // 4: 0x66 0x66 0x66 0x90 2361 // 5: 0x66 0x66 0x90 0x66 0x90 2362 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2363 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2364 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2365 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2366 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2367 // 2368 while (i > 12) { 2369 i -= 4; 2370 emitByte(0x66); // size prefix 2371 emitByte(0x66); 2372 emitByte(0x66); 2373 emitByte(0x90); // nop 2374 } 2375 // 1 - 12 nops 2376 if (i > 8) { 2377 if (i > 9) { 2378 i -= 1; 2379 emitByte(0x66); 2380 } 2381 i -= 3; 2382 emitByte(0x66); 2383 emitByte(0x66); 2384 emitByte(0x90); 2385 } 2386 // 1 - 8 nops 2387 if (i > 4) { 2388 if (i > 6) { 2389 i -= 1; 2390 emitByte(0x66); 2391 } 2392 i -= 3; 2393 emitByte(0x66); 2394 emitByte(0x66); 2395 emitByte(0x90); 2396 } 2397 switch (i) { 2398 case 4: 2399 emitByte(0x66); 2400 emitByte(0x66); 2401 emitByte(0x66); 2402 emitByte(0x90); 2403 break; 2404 case 3: 2405 emitByte(0x66); 2406 emitByte(0x66); 2407 emitByte(0x90); 2408 break; 2409 case 2: 2410 emitByte(0x66); 2411 emitByte(0x90); 2412 break; 2413 case 1: 2414 emitByte(0x90); 2415 break; 2416 default: 2417 assert i == 0; 2418 } 2419 } 2420 2421 public final void orl(Register dst, Register src) { 2422 OR.rmOp.emit(this, DWORD, dst, src); 2423 } 2424 2425 public final void orl(Register dst, int imm32) { 2426 OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2427 } 2428 2429 public final void pop(Register dst) { 2430 prefix(dst); 2431 emitByte(0x58 + encode(dst)); 2432 } 2433 2434 public void popfq() { 2435 emitByte(0x9D); 2436 } 2437 2438 public final void ptest(Register dst, Register src) { 2439 assert supports(CPUFeature.SSE4_1); 2440 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2441 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2442 emitByte(0x17); 2443 emitModRM(dst, src); 2444 } 2445 2446 public final void pcmpeqb(Register dst, Register src) { 2447 assert supports(CPUFeature.SSE2); 2448 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2449 simdPrefix(dst, dst, src, PD, P_0F, false); 2450 emitByte(0x74); 2451 emitModRM(dst, src); 2452 } 2453 2454 public final void pcmpeqw(Register dst, Register src) { 2455 assert supports(CPUFeature.SSE2); 2456 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2457 simdPrefix(dst, dst, src, PD, P_0F, false); 2458 emitByte(0x75); 2459 emitModRM(dst, src); 2460 } 2461 2462 public final void pcmpestri(Register dst, AMD64Address src, int imm8) { 2463 assert supports(CPUFeature.SSE4_2); 2464 assert dst.getRegisterCategory().equals(XMM); 2465 simdPrefix(dst, Register.None, src, PD, P_0F3A, false); 2466 emitByte(0x61); 2467 emitOperandHelper(dst, src, 0); 2468 emitByte(imm8); 2469 } 2470 2471 public final void pcmpestri(Register dst, Register src, int imm8) { 2472 assert supports(CPUFeature.SSE4_2); 2473 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2474 simdPrefix(dst, Register.None, src, PD, P_0F3A, false); 2475 emitByte(0x61); 2476 emitModRM(dst, src); 2477 emitByte(imm8); 2478 } 2479 2480 public final void pmovmskb(Register dst, Register src) { 2481 assert supports(CPUFeature.SSE2); 2482 assert dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(XMM); 2483 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2484 emitByte(0xD7); 2485 emitModRM(dst, src); 2486 } 2487 2488 public final void pmovzxbw(Register dst, AMD64Address src) { 2489 assert supports(CPUFeature.SSE4_2); 2490 assert dst.getRegisterCategory().equals(XMM); 2491 // XXX legacy_mode should be: _legacy_mode_bw 2492 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2493 emitByte(0x30); 2494 emitOperandHelper(dst, src, 0); 2495 } 2496 2497 public final void push(Register src) { 2498 prefix(src); 2499 emitByte(0x50 + encode(src)); 2500 } 2501 2502 public void pushfq() { 2503 emitByte(0x9c); 2504 } 2505 2506 public final void paddd(Register dst, Register src) { 2507 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2508 simdPrefix(dst, dst, src, PD, P_0F, false); 2509 emitByte(0xFE); 2510 emitModRM(dst, src); 2511 } 2512 2513 public final void paddq(Register dst, Register src) { 2514 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2515 simdPrefix(dst, dst, src, PD, P_0F, false); 2516 emitByte(0xD4); 2517 emitModRM(dst, src); 2518 } 2519 2520 public final void pextrw(Register dst, Register src, int imm8) { 2521 assert dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(XMM); 2522 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2523 emitByte(0xC5); 2524 emitModRM(dst, src); 2525 emitByte(imm8); 2526 } 2527 2528 public final void pinsrw(Register dst, Register src, int imm8) { 2529 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(CPU); 2530 simdPrefix(dst, dst, src, PD, P_0F, false); 2531 emitByte(0xC4); 2532 emitModRM(dst, src); 2533 emitByte(imm8); 2534 } 2535 2536 public final void por(Register dst, Register src) { 2537 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2538 simdPrefix(dst, dst, src, PD, P_0F, false); 2539 emitByte(0xEB); 2540 emitModRM(dst, src); 2541 } 2542 2543 public final void pand(Register dst, Register src) { 2544 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2545 simdPrefix(dst, dst, src, PD, P_0F, false); 2546 emitByte(0xDB); 2547 emitModRM(dst, src); 2548 } 2549 2550 public final void pxor(Register dst, Register src) { 2551 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2552 simdPrefix(dst, dst, src, PD, P_0F, false); 2553 emitByte(0xEF); 2554 emitModRM(dst, src); 2555 } 2556 2557 public final void pslld(Register dst, int imm8) { 2558 assert isUByte(imm8) : "invalid value"; 2559 assert dst.getRegisterCategory().equals(XMM); 2560 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 2561 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); 2562 emitByte(0x72); 2563 emitModRM(6, dst); 2564 emitByte(imm8 & 0xFF); 2565 } 2566 2567 public final void psllq(Register dst, Register shift) { 2568 assert dst.getRegisterCategory().equals(XMM) && shift.getRegisterCategory().equals(XMM); 2569 simdPrefix(dst, dst, shift, PD, P_0F, false); 2570 emitByte(0xF3); 2571 emitModRM(dst, shift); 2572 } 2573 2574 public final void psllq(Register dst, int imm8) { 2575 assert isUByte(imm8) : "invalid value"; 2576 assert dst.getRegisterCategory().equals(XMM); 2577 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 2578 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); 2579 emitByte(0x73); 2580 emitModRM(6, dst); 2581 emitByte(imm8); 2582 } 2583 2584 public final void psrad(Register dst, int imm8) { 2585 assert isUByte(imm8) : "invalid value"; 2586 assert dst.getRegisterCategory().equals(XMM); 2587 // XMM4 is for /4 encoding: 66 0F 72 /4 ib 2588 simdPrefix(AMD64.xmm4, dst, dst, PD, P_0F, false); 2589 emitByte(0x72); 2590 emitModRM(4, dst); 2591 emitByte(imm8); 2592 } 2593 2594 public final void psrld(Register dst, int imm8) { 2595 assert isUByte(imm8) : "invalid value"; 2596 assert dst.getRegisterCategory().equals(XMM); 2597 // XMM2 is for /2 encoding: 66 0F 72 /2 ib 2598 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); 2599 emitByte(0x72); 2600 emitModRM(2, dst); 2601 emitByte(imm8); 2602 } 2603 2604 public final void psrlq(Register dst, int imm8) { 2605 assert isUByte(imm8) : "invalid value"; 2606 assert dst.getRegisterCategory().equals(XMM); 2607 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 2608 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); 2609 emitByte(0x73); 2610 emitModRM(2, dst); 2611 emitByte(imm8); 2612 } 2613 2614 public final void psrldq(Register dst, int imm8) { 2615 assert isUByte(imm8) : "invalid value"; 2616 assert dst.getRegisterCategory().equals(XMM); 2617 simdPrefix(AMD64.xmm3, dst, dst, PD, P_0F, false); 2618 emitByte(0x73); 2619 emitModRM(3, dst); 2620 emitByte(imm8); 2621 } 2622 2623 public final void pshufb(Register dst, Register src) { 2624 assert supports(CPUFeature.SSSE3); 2625 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2626 simdPrefix(dst, dst, src, PD, P_0F38, false); 2627 emitByte(0x00); 2628 emitModRM(dst, src); 2629 } 2630 2631 public final void pshuflw(Register dst, Register src, int imm8) { 2632 assert supports(CPUFeature.SSE2); 2633 assert isUByte(imm8) : "invalid value"; 2634 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2635 simdPrefix(dst, Register.None, src, SD, P_0F, false); 2636 emitByte(0x70); 2637 emitModRM(dst, src); 2638 emitByte(imm8); 2639 } 2640 2641 public final void pshufd(Register dst, Register src, int imm8) { 2642 assert isUByte(imm8) : "invalid value"; 2643 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2644 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2645 emitByte(0x70); 2646 emitModRM(dst, src); 2647 emitByte(imm8); 2648 } 2649 2650 public final void psubd(Register dst, Register src) { 2651 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2652 simdPrefix(dst, dst, src, PD, P_0F, false); 2653 emitByte(0xFA); 2654 emitModRM(dst, src); 2655 } 2656 2657 public final void rcpps(Register dst, Register src) { 2658 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2659 simdPrefix(dst, Register.None, src, PS, P_0F, false); 2660 emitByte(0x53); 2661 emitModRM(dst, src); 2662 } 2663 2664 public final void ret(int imm16) { 2665 if (imm16 == 0) { 2666 emitByte(0xC3); 2667 } else { 2668 emitByte(0xC2); 2669 emitShort(imm16); 2670 } 2671 } 2672 2673 public final void sarl(Register dst, int imm8) { 2674 prefix(dst); 2675 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2676 if (imm8 == 1) { 2677 emitByte(0xD1); 2678 emitModRM(7, dst); 2679 } else { 2680 emitByte(0xC1); 2681 emitModRM(7, dst); 2682 emitByte(imm8); 2683 } 2684 } 2685 2686 public final void shll(Register dst, int imm8) { 2687 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2688 prefix(dst); 2689 if (imm8 == 1) { 2690 emitByte(0xD1); 2691 emitModRM(4, dst); 2692 } else { 2693 emitByte(0xC1); 2694 emitModRM(4, dst); 2695 emitByte(imm8); 2696 } 2697 } 2698 2699 public final void shll(Register dst) { 2700 // Multiply dst by 2, CL times. 2701 prefix(dst); 2702 emitByte(0xD3); 2703 emitModRM(4, dst); 2704 } 2705 2706 public final void shrl(Register dst, int imm8) { 2707 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2708 prefix(dst); 2709 emitByte(0xC1); 2710 emitModRM(5, dst); 2711 emitByte(imm8); 2712 } 2713 2714 public final void shrl(Register dst) { 2715 // Unsigned divide dst by 2, CL times. 2716 prefix(dst); 2717 emitByte(0xD3); 2718 emitModRM(5, dst); 2719 } 2720 2721 public final void subl(AMD64Address dst, int imm32) { 2722 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2723 } 2724 2725 public final void subl(Register dst, int imm32) { 2726 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2727 } 2728 2729 public final void subl(Register dst, Register src) { 2730 SUB.rmOp.emit(this, DWORD, dst, src); 2731 } 2732 2733 public final void subpd(Register dst, Register src) { 2734 SSEOp.SUB.emit(this, PD, dst, src); 2735 } 2736 2737 public final void subsd(Register dst, Register src) { 2738 SSEOp.SUB.emit(this, SD, dst, src); 2739 } 2740 2741 public final void subsd(Register dst, AMD64Address src) { 2742 SSEOp.SUB.emit(this, SD, dst, src); 2743 } 2744 2745 public final void testl(Register dst, int imm32) { 2746 // not using emitArith because test 2747 // doesn't support sign-extension of 2748 // 8bit operands 2749 if (dst.encoding == 0) { 2750 emitByte(0xA9); 2751 } else { 2752 prefix(dst); 2753 emitByte(0xF7); 2754 emitModRM(0, dst); 2755 } 2756 emitInt(imm32); 2757 } 2758 2759 public final void testl(Register dst, Register src) { 2760 prefix(dst, src); 2761 emitByte(0x85); 2762 emitModRM(dst, src); 2763 } 2764 2765 public final void testl(Register dst, AMD64Address src) { 2766 prefix(src, dst); 2767 emitByte(0x85); 2768 emitOperandHelper(dst, src, 0); 2769 } 2770 2771 public final void unpckhpd(Register dst, Register src) { 2772 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2773 simdPrefix(dst, dst, src, PD, P_0F, false); 2774 emitByte(0x15); 2775 emitModRM(dst, src); 2776 } 2777 2778 public final void unpcklpd(Register dst, Register src) { 2779 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2780 simdPrefix(dst, dst, src, PD, P_0F, false); 2781 emitByte(0x14); 2782 emitModRM(dst, src); 2783 } 2784 2785 public final void xorl(Register dst, Register src) { 2786 XOR.rmOp.emit(this, DWORD, dst, src); 2787 } 2788 2789 public final void xorpd(Register dst, Register src) { 2790 SSEOp.XOR.emit(this, PD, dst, src); 2791 } 2792 2793 public final void xorps(Register dst, Register src) { 2794 SSEOp.XOR.emit(this, PS, dst, src); 2795 } 2796 2797 protected final void decl(Register dst) { 2798 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 2799 prefix(dst); 2800 emitByte(0xFF); 2801 emitModRM(1, dst); 2802 } 2803 2804 protected final void incl(Register dst) { 2805 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 2806 prefix(dst); 2807 emitByte(0xFF); 2808 emitModRM(0, dst); 2809 } 2810 2811 public final void addq(Register dst, int imm32) { 2812 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2813 } 2814 2815 public final void addq(AMD64Address dst, int imm32) { 2816 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2817 } 2818 2819 public final void addq(Register dst, Register src) { 2820 ADD.rmOp.emit(this, QWORD, dst, src); 2821 } 2822 2823 public final void addq(AMD64Address dst, Register src) { 2824 ADD.mrOp.emit(this, QWORD, dst, src); 2825 } 2826 2827 public final void andq(Register dst, int imm32) { 2828 AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2829 } 2830 2831 public final void bsrq(Register dst, Register src) { 2832 prefixq(dst, src); 2833 emitByte(0x0F); 2834 emitByte(0xBD); 2835 emitModRM(dst, src); 2836 } 2837 2838 public final void bswapq(Register reg) { 2839 prefixq(reg); 2840 emitByte(0x0F); 2841 emitByte(0xC8 + encode(reg)); 2842 } 2843 2844 public final void cdqq() { 2845 rexw(); 2846 emitByte(0x99); 2847 } 2848 2849 public final void cmovq(ConditionFlag cc, Register dst, Register src) { 2850 prefixq(dst, src); 2851 emitByte(0x0F); 2852 emitByte(0x40 | cc.getValue()); 2853 emitModRM(dst, src); 2854 } 2855 2856 public final void setb(ConditionFlag cc, Register dst) { 2857 prefix(dst, true); 2858 emitByte(0x0F); 2859 emitByte(0x90 | cc.getValue()); 2860 emitModRM(0, dst); 2861 } 2862 2863 public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) { 2864 prefixq(src, dst); 2865 emitByte(0x0F); 2866 emitByte(0x40 | cc.getValue()); 2867 emitOperandHelper(dst, src, 0); 2868 } 2869 2870 public final void cmpq(Register dst, int imm32) { 2871 CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2872 } 2873 2874 public final void cmpq(Register dst, Register src) { 2875 CMP.rmOp.emit(this, QWORD, dst, src); 2876 } 2877 2878 public final void cmpq(Register dst, AMD64Address src) { 2879 CMP.rmOp.emit(this, QWORD, dst, src); 2880 } 2881 2882 public final void cmpxchgq(Register reg, AMD64Address adr) { 2883 prefixq(adr, reg); 2884 emitByte(0x0F); 2885 emitByte(0xB1); 2886 emitOperandHelper(reg, adr, 0); 2887 } 2888 2889 public final void cvtdq2pd(Register dst, Register src) { 2890 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2891 simdPrefix(dst, Register.None, src, SS, P_0F, false); 2892 emitByte(0xE6); 2893 emitModRM(dst, src); 2894 } 2895 2896 public final void cvtsi2sdq(Register dst, Register src) { 2897 SSEOp.CVTSI2SD.emit(this, QWORD, dst, src); 2898 } 2899 2900 public final void cvttsd2siq(Register dst, Register src) { 2901 SSEOp.CVTTSD2SI.emit(this, QWORD, dst, src); 2902 } 2903 2904 public final void cvttpd2dq(Register dst, Register src) { 2905 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2906 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2907 emitByte(0xE6); 2908 emitModRM(dst, src); 2909 } 2910 2911 public final void decq(Register dst) { 2912 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 2913 prefixq(dst); 2914 emitByte(0xFF); 2915 emitModRM(1, dst); 2916 } 2917 2918 public final void decq(AMD64Address dst) { 2919 DEC.emit(this, QWORD, dst); 2920 } 2921 2922 public final void imulq(Register dst, Register src) { 2923 prefixq(dst, src); 2924 emitByte(0x0F); 2925 emitByte(0xAF); 2926 emitModRM(dst, src); 2927 } 2928 2929 public final void incq(Register dst) { 2930 // Don't use it directly. Use Macroincrementq() instead. 2931 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 2932 prefixq(dst); 2933 emitByte(0xFF); 2934 emitModRM(0, dst); 2935 } 2936 2937 public final void incq(AMD64Address dst) { 2938 INC.emit(this, QWORD, dst); 2939 } 2940 2941 public final void movq(Register dst, long imm64) { 2942 movq(dst, imm64, false); 2943 } 2944 2945 public final void movq(Register dst, long imm64, boolean annotateImm) { 2946 int insnPos = position(); 2947 prefixq(dst); 2948 emitByte(0xB8 + encode(dst)); 2949 int immPos = position(); 2950 emitLong(imm64); 2951 int nextInsnPos = position(); 2952 if (annotateImm && codePatchingAnnotationConsumer != null) { 2953 codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 2954 } 2955 } 2956 2957 public final void movslq(Register dst, int imm32) { 2958 prefixq(dst); 2959 emitByte(0xC7); 2960 emitModRM(0, dst); 2961 emitInt(imm32); 2962 } 2963 2964 public final void movdq(Register dst, AMD64Address src) { 2965 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src); 2966 } 2967 2968 public final void movdq(AMD64Address dst, Register src) { 2969 AMD64MROp.MOVQ.emit(this, QWORD, dst, src); 2970 } 2971 2972 public final void movdq(Register dst, Register src) { 2973 if (dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(CPU)) { 2974 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src); 2975 } else if (src.getRegisterCategory().equals(XMM) && dst.getRegisterCategory().equals(CPU)) { 2976 AMD64MROp.MOVQ.emit(this, QWORD, dst, src); 2977 } else { 2978 throw new InternalError("should not reach here"); 2979 } 2980 } 2981 2982 public final void movdl(Register dst, Register src) { 2983 if (dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(CPU)) { 2984 AMD64RMOp.MOVD.emit(this, DWORD, dst, src); 2985 } else if (src.getRegisterCategory().equals(XMM) && dst.getRegisterCategory().equals(CPU)) { 2986 AMD64MROp.MOVD.emit(this, DWORD, dst, src); 2987 } else { 2988 throw new InternalError("should not reach here"); 2989 } 2990 } 2991 2992 public final void movdl(Register dst, AMD64Address src) { 2993 AMD64RMOp.MOVD.emit(this, DWORD, dst, src); 2994 } 2995 2996 public final void movddup(Register dst, Register src) { 2997 assert supports(CPUFeature.SSE3); 2998 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2999 simdPrefix(dst, Register.None, src, SD, P_0F, false); 3000 emitByte(0x12); 3001 emitModRM(dst, src); 3002 } 3003 3004 public final void movdqu(Register dst, AMD64Address src) { 3005 assert dst.getRegisterCategory().equals(XMM); 3006 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3007 emitByte(0x6F); 3008 emitOperandHelper(dst, src, 0); 3009 } 3010 3011 public final void movdqu(Register dst, Register src) { 3012 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 3013 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3014 emitByte(0x6F); 3015 emitModRM(dst, src); 3016 } 3017 3018 public final void movslq(AMD64Address dst, int imm32) { 3019 prefixq(dst); 3020 emitByte(0xC7); 3021 emitOperandHelper(0, dst, 4); 3022 emitInt(imm32); 3023 } 3024 3025 public final void movslq(Register dst, AMD64Address src) { 3026 prefixq(src, dst); 3027 emitByte(0x63); 3028 emitOperandHelper(dst, src, 0); 3029 } 3030 3031 public final void movslq(Register dst, Register src) { 3032 prefixq(dst, src); 3033 emitByte(0x63); 3034 emitModRM(dst, src); 3035 } 3036 3037 public final void negq(Register dst) { 3038 prefixq(dst); 3039 emitByte(0xF7); 3040 emitModRM(3, dst); 3041 } 3042 3043 public final void orq(Register dst, Register src) { 3044 OR.rmOp.emit(this, QWORD, dst, src); 3045 } 3046 3047 public final void shlq(Register dst, int imm8) { 3048 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3049 prefixq(dst); 3050 if (imm8 == 1) { 3051 emitByte(0xD1); 3052 emitModRM(4, dst); 3053 } else { 3054 emitByte(0xC1); 3055 emitModRM(4, dst); 3056 emitByte(imm8); 3057 } 3058 } 3059 3060 public final void shlq(Register dst) { 3061 // Multiply dst by 2, CL times. 3062 prefixq(dst); 3063 emitByte(0xD3); 3064 emitModRM(4, dst); 3065 } 3066 3067 public final void shrq(Register dst, int imm8) { 3068 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3069 prefixq(dst); 3070 if (imm8 == 1) { 3071 emitByte(0xD1); 3072 emitModRM(5, dst); 3073 } else { 3074 emitByte(0xC1); 3075 emitModRM(5, dst); 3076 emitByte(imm8); 3077 } 3078 } 3079 3080 public final void shrq(Register dst) { 3081 prefixq(dst); 3082 emitByte(0xD3); 3083 // Unsigned divide dst by 2, CL times. 3084 emitModRM(5, dst); 3085 } 3086 3087 public final void sbbq(Register dst, Register src) { 3088 SBB.rmOp.emit(this, QWORD, dst, src); 3089 } 3090 3091 public final void subq(Register dst, int imm32) { 3092 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3093 } 3094 3095 public final void subq(AMD64Address dst, int imm32) { 3096 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3097 } 3098 3099 public final void subqWide(Register dst, int imm32) { 3100 // don't use the sign-extending version, forcing a 32-bit immediate 3101 SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32); 3102 } 3103 3104 public final void subq(Register dst, Register src) { 3105 SUB.rmOp.emit(this, QWORD, dst, src); 3106 } 3107 3108 public final void testq(Register dst, Register src) { 3109 prefixq(dst, src); 3110 emitByte(0x85); 3111 emitModRM(dst, src); 3112 } 3113 3114 public final void btrq(Register src, int imm8) { 3115 prefixq(src); 3116 emitByte(0x0F); 3117 emitByte(0xBA); 3118 emitModRM(6, src); 3119 emitByte(imm8); 3120 } 3121 3122 public final void xaddb(AMD64Address dst, Register src) { 3123 prefixb(dst, src); 3124 emitByte(0x0F); 3125 emitByte(0xC0); 3126 emitOperandHelper(src, dst, 0); 3127 } 3128 3129 public final void xaddw(AMD64Address dst, Register src) { 3130 emitByte(0x66); // Switch to 16-bit mode. 3131 prefix(dst, src); 3132 emitByte(0x0F); 3133 emitByte(0xC1); 3134 emitOperandHelper(src, dst, 0); 3135 } 3136 3137 public final void xaddl(AMD64Address dst, Register src) { 3138 prefix(dst, src); 3139 emitByte(0x0F); 3140 emitByte(0xC1); 3141 emitOperandHelper(src, dst, 0); 3142 } 3143 3144 public final void xaddq(AMD64Address dst, Register src) { 3145 prefixq(dst, src); 3146 emitByte(0x0F); 3147 emitByte(0xC1); 3148 emitOperandHelper(src, dst, 0); 3149 } 3150 3151 public final void xchgb(Register dst, AMD64Address src) { 3152 prefixb(src, dst); 3153 emitByte(0x86); 3154 emitOperandHelper(dst, src, 0); 3155 } 3156 3157 public final void xchgw(Register dst, AMD64Address src) { 3158 emitByte(0x66); 3159 prefix(src, dst); 3160 emitByte(0x87); 3161 emitOperandHelper(dst, src, 0); 3162 } 3163 3164 public final void xchgl(Register dst, AMD64Address src) { 3165 prefix(src, dst); 3166 emitByte(0x87); 3167 emitOperandHelper(dst, src, 0); 3168 } 3169 3170 public final void xchgq(Register dst, AMD64Address src) { 3171 prefixq(src, dst); 3172 emitByte(0x87); 3173 emitOperandHelper(dst, src, 0); 3174 } 3175 3176 public final void membar(int barriers) { 3177 if (target.isMP) { 3178 // We only have to handle StoreLoad 3179 if ((barriers & STORE_LOAD) != 0) { 3180 // All usable chips support "locked" instructions which suffice 3181 // as barriers, and are much faster than the alternative of 3182 // using cpuid instruction. We use here a locked add [rsp],0. 3183 // This is conveniently otherwise a no-op except for blowing 3184 // flags. 3185 // Any change to this code may need to revisit other places in 3186 // the code where this idiom is used, in particular the 3187 // orderAccess code. 3188 lock(); 3189 addl(new AMD64Address(AMD64.rsp, 0), 0); // Assert the lock# signal here 3190 } 3191 } 3192 } 3193 3194 @Override 3195 protected final void patchJumpTarget(int branch, int branchTarget) { 3196 int op = getByte(branch); 3197 assert op == 0xE8 // call 3198 || 3199 op == 0x00 // jump table entry 3200 || op == 0xE9 // jmp 3201 || op == 0xEB // short jmp 3202 || (op & 0xF0) == 0x70 // short jcc 3203 || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc 3204 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op; 3205 3206 if (op == 0x00) { 3207 int offsetToJumpTableBase = getShort(branch + 1); 3208 int jumpTableBase = branch - offsetToJumpTableBase; 3209 int imm32 = branchTarget - jumpTableBase; 3210 emitInt(imm32, branch); 3211 } else if (op == 0xEB || (op & 0xF0) == 0x70) { 3212 3213 // short offset operators (jmp and jcc) 3214 final int imm8 = branchTarget - (branch + 2); 3215 /* 3216 * Since a wrongly patched short branch can potentially lead to working but really bad 3217 * behaving code we should always fail with an exception instead of having an assert. 3218 */ 3219 if (!NumUtil.isByte(imm8)) { 3220 throw new InternalError("branch displacement out of range: " + imm8); 3221 } 3222 emitByte(imm8, branch + 1); 3223 3224 } else { 3225 3226 int off = 1; 3227 if (op == 0x0F) { 3228 off = 2; 3229 } 3230 3231 int imm32 = branchTarget - (branch + 4 + off); 3232 emitInt(imm32, branch + off); 3233 } 3234 } 3235 3236 public void nullCheck(AMD64Address address) { 3237 testl(AMD64.rax, address); 3238 } 3239 3240 @Override 3241 public void align(int modulus) { 3242 if (position() % modulus != 0) { 3243 nop(modulus - (position() % modulus)); 3244 } 3245 } 3246 3247 /** 3248 * Emits a direct call instruction. Note that the actual call target is not specified, because 3249 * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is 3250 * responsible to add the call address to the appropriate patching tables. 3251 */ 3252 public final void call() { 3253 annotatePatchingImmediate(1, 4); 3254 emitByte(0xE8); 3255 emitInt(0); 3256 } 3257 3258 public final void call(Register src) { 3259 prefix(src); 3260 emitByte(0xFF); 3261 emitModRM(2, src); 3262 } 3263 3264 public final void int3() { 3265 emitByte(0xCC); 3266 } 3267 3268 public final void pause() { 3269 emitByte(0xF3); 3270 emitByte(0x90); 3271 } 3272 3273 private void emitx87(int b1, int b2, int i) { 3274 assert 0 <= i && i < 8 : "illegal stack offset"; 3275 emitByte(b1); 3276 emitByte(b2 + i); 3277 } 3278 3279 public final void fldd(AMD64Address src) { 3280 emitByte(0xDD); 3281 emitOperandHelper(0, src, 0); 3282 } 3283 3284 public final void flds(AMD64Address src) { 3285 emitByte(0xD9); 3286 emitOperandHelper(0, src, 0); 3287 } 3288 3289 public final void fldln2() { 3290 emitByte(0xD9); 3291 emitByte(0xED); 3292 } 3293 3294 public final void fldlg2() { 3295 emitByte(0xD9); 3296 emitByte(0xEC); 3297 } 3298 3299 public final void fyl2x() { 3300 emitByte(0xD9); 3301 emitByte(0xF1); 3302 } 3303 3304 public final void fstps(AMD64Address src) { 3305 emitByte(0xD9); 3306 emitOperandHelper(3, src, 0); 3307 } 3308 3309 public final void fstpd(AMD64Address src) { 3310 emitByte(0xDD); 3311 emitOperandHelper(3, src, 0); 3312 } 3313 3314 private void emitFPUArith(int b1, int b2, int i) { 3315 assert 0 <= i && i < 8 : "illegal FPU register: " + i; 3316 emitByte(b1); 3317 emitByte(b2 + i); 3318 } 3319 3320 public void ffree(int i) { 3321 emitFPUArith(0xDD, 0xC0, i); 3322 } 3323 3324 public void fincstp() { 3325 emitByte(0xD9); 3326 emitByte(0xF7); 3327 } 3328 3329 public void fxch(int i) { 3330 emitFPUArith(0xD9, 0xC8, i); 3331 } 3332 3333 public void fnstswAX() { 3334 emitByte(0xDF); 3335 emitByte(0xE0); 3336 } 3337 3338 public void fwait() { 3339 emitByte(0x9B); 3340 } 3341 3342 public void fprem() { 3343 emitByte(0xD9); 3344 emitByte(0xF8); 3345 } 3346 3347 public final void fsin() { 3348 emitByte(0xD9); 3349 emitByte(0xFE); 3350 } 3351 3352 public final void fcos() { 3353 emitByte(0xD9); 3354 emitByte(0xFF); 3355 } 3356 3357 public final void fptan() { 3358 emitByte(0xD9); 3359 emitByte(0xF2); 3360 } 3361 3362 public final void fstp(int i) { 3363 emitx87(0xDD, 0xD8, i); 3364 } 3365 3366 @Override 3367 public AMD64Address makeAddress(Register base, int displacement) { 3368 return new AMD64Address(base, displacement); 3369 } 3370 3371 @Override 3372 public AMD64Address getPlaceholder(int instructionStartPosition) { 3373 return new AMD64Address(AMD64.rip, Register.None, Scale.Times1, 0, instructionStartPosition); 3374 } 3375 3376 private void prefetchPrefix(AMD64Address src) { 3377 prefix(src); 3378 emitByte(0x0F); 3379 } 3380 3381 public void prefetchnta(AMD64Address src) { 3382 prefetchPrefix(src); 3383 emitByte(0x18); 3384 emitOperandHelper(0, src, 0); 3385 } 3386 3387 void prefetchr(AMD64Address src) { 3388 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3389 prefetchPrefix(src); 3390 emitByte(0x0D); 3391 emitOperandHelper(0, src, 0); 3392 } 3393 3394 public void prefetcht0(AMD64Address src) { 3395 assert supports(CPUFeature.SSE); 3396 prefetchPrefix(src); 3397 emitByte(0x18); 3398 emitOperandHelper(1, src, 0); 3399 } 3400 3401 public void prefetcht1(AMD64Address src) { 3402 assert supports(CPUFeature.SSE); 3403 prefetchPrefix(src); 3404 emitByte(0x18); 3405 emitOperandHelper(2, src, 0); 3406 } 3407 3408 public void prefetcht2(AMD64Address src) { 3409 assert supports(CPUFeature.SSE); 3410 prefix(src); 3411 emitByte(0x0f); 3412 emitByte(0x18); 3413 emitOperandHelper(3, src, 0); 3414 } 3415 3416 public void prefetchw(AMD64Address src) { 3417 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3418 prefix(src); 3419 emitByte(0x0f); 3420 emitByte(0x0D); 3421 emitOperandHelper(1, src, 0); 3422 } 3423 3424 public void rdtsc() { 3425 emitByte(0x0F); 3426 emitByte(0x31); 3427 } 3428 3429 /** 3430 * Emits an instruction which is considered to be illegal. This is used if we deliberately want 3431 * to crash the program (debugging etc.). 3432 */ 3433 public void illegal() { 3434 emitByte(0x0f); 3435 emitByte(0x0b); 3436 } 3437 3438 public void lfence() { 3439 emitByte(0x0f); 3440 emitByte(0xae); 3441 emitByte(0xe8); 3442 } 3443 3444 public final void vptest(Register dst, Register src) { 3445 VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src); 3446 } 3447 3448 public final void vpxor(Register dst, Register nds, Register src) { 3449 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src); 3450 } 3451 3452 public final void vpxor(Register dst, Register nds, AMD64Address src) { 3453 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src); 3454 } 3455 3456 public final void vmovdqu(Register dst, AMD64Address src) { 3457 VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src); 3458 } 3459 3460 public final void vpmovzxbw(Register dst, AMD64Address src) { 3461 VexRMOp.VPMOVZXBW.emit(this, AVXSize.YMM, dst, src); 3462 } 3463 3464 public final void vzeroupper() { 3465 emitVEX(L128, P_, M_0F, W0, 0, 0); 3466 emitByte(0x77); 3467 } 3468 3469 // This instruction produces ZF or CF flags 3470 public final void kortestq(Register src1, Register src2) { 3471 assert supports(CPUFeature.AVX512BW); 3472 assert src1.getRegisterCategory().equals(MASK) && src2.getRegisterCategory().equals(MASK); 3473 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_, M_0F, W1); 3474 emitByte(0x98); 3475 emitModRM(src1, src2); 3476 } 3477 3478 public final void kmovq(Register dst, Register src) { 3479 assert supports(CPUFeature.AVX512BW); 3480 assert dst.getRegisterCategory().equals(MASK) || dst.getRegisterCategory().equals(CPU); 3481 assert src.getRegisterCategory().equals(MASK) || src.getRegisterCategory().equals(CPU); 3482 assert !(dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(CPU)); 3483 3484 if (dst.getRegisterCategory().equals(MASK)) { 3485 if (src.getRegisterCategory().equals(MASK)) { 3486 // kmovq(KRegister dst, KRegister src) 3487 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_, M_0F, W1); 3488 emitByte(0x90); 3489 emitModRM(dst, src); 3490 } else { 3491 // kmovq(KRegister dst, Register src) 3492 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1); 3493 emitByte(0x92); 3494 emitModRM(dst, src); 3495 } 3496 } else { 3497 if (src.getRegisterCategory().equals(MASK)) { 3498 // kmovq(Register dst, KRegister src) 3499 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1); 3500 emitByte(0x93); 3501 emitModRM(dst, src); 3502 } else { 3503 throw GraalError.shouldNotReachHere(); 3504 } 3505 } 3506 } 3507 3508 public final void evmovdqu64(Register dst, AMD64Address src) { 3509 assert supports(CPUFeature.AVX512F); 3510 assert dst.getRegisterCategory().equals(XMM); 3511 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F3, M_0F, W1, Z0, B0); 3512 emitByte(0x6F); 3513 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3514 } 3515 3516 public final void evpmovzxbw(Register dst, AMD64Address src) { 3517 assert supports(CPUFeature.AVX512BW); 3518 assert dst.getRegisterCategory().equals(XMM); 3519 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0); 3520 emitByte(0x30); 3521 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3522 } 3523 3524 public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src) { 3525 assert supports(CPUFeature.AVX512BW); 3526 assert kdst.getRegisterCategory().equals(MASK) && nds.getRegisterCategory().equals(XMM); 3527 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F, WIG, Z0, B0); 3528 emitByte(0x74); 3529 emitEVEXOperandHelper(kdst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3530 } 3531 }