1 /* 2 * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.asm.amd64; 26 27 import static jdk.vm.ci.amd64.AMD64.CPU; 28 import static jdk.vm.ci.amd64.AMD64.MASK; 29 import static jdk.vm.ci.amd64.AMD64.XMM; 30 import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX512BW; 31 import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX512CD; 32 import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX512DQ; 33 import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX512F; 34 import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX512VL; 35 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD; 36 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop; 37 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseIntelNops; 38 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop; 39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD; 40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND; 41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP; 42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR; 43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB; 44 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB; 45 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR; 46 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC; 47 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC; 48 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG; 49 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT; 50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0; 51 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0; 52 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1; 53 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.BYTE; 54 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.DWORD; 55 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PD; 56 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PS; 57 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.QWORD; 58 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SD; 59 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SS; 60 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.WORD; 61 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128; 62 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256; 63 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L512; 64 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ; 65 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F; 66 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38; 67 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A; 68 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_; 69 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66; 70 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2; 71 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3; 72 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0; 73 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1; 74 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG; 75 import static org.graalvm.compiler.core.common.NumUtil.isByte; 76 import static org.graalvm.compiler.core.common.NumUtil.isInt; 77 import static org.graalvm.compiler.core.common.NumUtil.isShiftCount; 78 import static org.graalvm.compiler.core.common.NumUtil.isUByte; 79 80 import java.util.EnumSet; 81 82 import org.graalvm.compiler.asm.Label; 83 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 84 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize; 85 import org.graalvm.compiler.core.common.calc.Condition; 86 import org.graalvm.compiler.debug.GraalError; 87 88 import jdk.vm.ci.amd64.AMD64; 89 import jdk.vm.ci.amd64.AMD64.CPUFeature; 90 import jdk.vm.ci.code.Register; 91 import jdk.vm.ci.code.Register.RegisterCategory; 92 import jdk.vm.ci.code.TargetDescription; 93 94 /** 95 * This class implements an assembler that can encode most X86 instructions. 96 */ 97 public class AMD64Assembler extends AMD64BaseAssembler { 98 99 /** 100 * Constructs an assembler for the AMD64 architecture. 101 */ 102 public AMD64Assembler(TargetDescription target) { 103 super(target); 104 } 105 106 /** 107 * The x86 condition codes used for conditional jumps/moves. 108 */ 109 public enum ConditionFlag { 110 Zero(0x4, "|zero|"), 111 NotZero(0x5, "|nzero|"), 112 Equal(0x4, "="), 113 NotEqual(0x5, "!="), 114 Less(0xc, "<"), 115 LessEqual(0xe, "<="), 116 Greater(0xf, ">"), 117 GreaterEqual(0xd, ">="), 118 Below(0x2, "|<|"), 119 BelowEqual(0x6, "|<=|"), 120 Above(0x7, "|>|"), 121 AboveEqual(0x3, "|>=|"), 122 Overflow(0x0, "|of|"), 123 NoOverflow(0x1, "|nof|"), 124 CarrySet(0x2, "|carry|"), 125 CarryClear(0x3, "|ncarry|"), 126 Negative(0x8, "|neg|"), 127 Positive(0x9, "|pos|"), 128 Parity(0xa, "|par|"), 129 NoParity(0xb, "|npar|"); 130 131 private final int value; 132 private final String operator; 133 134 ConditionFlag(int value, String operator) { 135 this.value = value; 136 this.operator = operator; 137 } 138 139 public ConditionFlag negate() { 140 switch (this) { 141 case Zero: 142 return NotZero; 143 case NotZero: 144 return Zero; 145 case Equal: 146 return NotEqual; 147 case NotEqual: 148 return Equal; 149 case Less: 150 return GreaterEqual; 151 case LessEqual: 152 return Greater; 153 case Greater: 154 return LessEqual; 155 case GreaterEqual: 156 return Less; 157 case Below: 158 return AboveEqual; 159 case BelowEqual: 160 return Above; 161 case Above: 162 return BelowEqual; 163 case AboveEqual: 164 return Below; 165 case Overflow: 166 return NoOverflow; 167 case NoOverflow: 168 return Overflow; 169 case CarrySet: 170 return CarryClear; 171 case CarryClear: 172 return CarrySet; 173 case Negative: 174 return Positive; 175 case Positive: 176 return Negative; 177 case Parity: 178 return NoParity; 179 case NoParity: 180 return Parity; 181 } 182 throw new IllegalArgumentException(); 183 } 184 185 public int getValue() { 186 return value; 187 } 188 189 @Override 190 public String toString() { 191 return operator; 192 } 193 } 194 195 /** 196 * Operand size and register type constraints. 197 */ 198 private enum OpAssertion { 199 ByteAssertion(CPU, CPU, BYTE), 200 ByteOrLargerAssertion(CPU, CPU, BYTE, WORD, DWORD, QWORD), 201 WordOrLargerAssertion(CPU, CPU, WORD, DWORD, QWORD), 202 DwordOrLargerAssertion(CPU, CPU, DWORD, QWORD), 203 WordOrDwordAssertion(CPU, CPU, WORD, QWORD), 204 QwordAssertion(CPU, CPU, QWORD), 205 FloatAssertion(XMM, XMM, SS, SD, PS, PD), 206 PackedFloatAssertion(XMM, XMM, PS, PD), 207 SingleAssertion(XMM, XMM, SS), 208 DoubleAssertion(XMM, XMM, SD), 209 PackedDoubleAssertion(XMM, XMM, PD), 210 IntToFloatAssertion(XMM, CPU, DWORD, QWORD), 211 FloatToIntAssertion(CPU, XMM, DWORD, QWORD); 212 213 private final RegisterCategory resultCategory; 214 private final RegisterCategory inputCategory; 215 private final OperandSize[] allowedSizes; 216 217 OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) { 218 this.resultCategory = resultCategory; 219 this.inputCategory = inputCategory; 220 this.allowedSizes = allowedSizes; 221 } 222 223 protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) { 224 assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op; 225 assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op; 226 227 for (OperandSize s : allowedSizes) { 228 if (size == s) { 229 return true; 230 } 231 } 232 233 assert false : "invalid operand size " + size + " used in " + op; 234 return false; 235 } 236 237 } 238 239 protected static final int P_0F = 0x0F; 240 protected static final int P_0F38 = 0x380F; 241 protected static final int P_0F3A = 0x3A0F; 242 243 /** 244 * Base class for AMD64 opcodes. 245 */ 246 public static class AMD64Op { 247 248 private final String opcode; 249 250 protected final int prefix1; 251 protected final int prefix2; 252 protected final int op; 253 254 private final boolean dstIsByte; 255 private final boolean srcIsByte; 256 257 private final OpAssertion assertion; 258 private final CPUFeature feature; 259 260 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 261 this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature); 262 } 263 264 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 265 this.opcode = opcode; 266 this.prefix1 = prefix1; 267 this.prefix2 = prefix2; 268 this.op = op; 269 270 this.dstIsByte = dstIsByte; 271 this.srcIsByte = srcIsByte; 272 273 this.assertion = assertion; 274 this.feature = feature; 275 } 276 277 protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) { 278 if (prefix1 != 0) { 279 asm.emitByte(prefix1); 280 } 281 if (size.getSizePrefix() != 0) { 282 asm.emitByte(size.getSizePrefix()); 283 } 284 int rexPrefix = 0x40 | rxb; 285 if (size == QWORD) { 286 rexPrefix |= 0x08; 287 } 288 if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) { 289 asm.emitByte(rexPrefix); 290 } 291 if (prefix2 > 0xFF) { 292 asm.emitShort(prefix2); 293 } else if (prefix2 > 0) { 294 asm.emitByte(prefix2); 295 } 296 asm.emitByte(op); 297 } 298 299 protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) { 300 assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode); 301 assert assertion.checkOperands(this, size, resultReg, inputReg); 302 return true; 303 } 304 305 public OperandSize[] getAllowedSizes() { 306 return assertion.allowedSizes; 307 } 308 309 protected final boolean isSSEInstruction() { 310 if (feature == null) { 311 return false; 312 } 313 switch (feature) { 314 case SSE: 315 case SSE2: 316 case SSE3: 317 case SSSE3: 318 case SSE4A: 319 case SSE4_1: 320 case SSE4_2: 321 return true; 322 default: 323 return false; 324 } 325 } 326 327 public final OpAssertion getAssertion() { 328 return assertion; 329 } 330 331 @Override 332 public String toString() { 333 return opcode; 334 } 335 } 336 337 /** 338 * Base class for AMD64 opcodes with immediate operands. 339 */ 340 public static class AMD64ImmOp extends AMD64Op { 341 342 private final boolean immIsByte; 343 344 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) { 345 this(opcode, immIsByte, prefix, op, assertion, null); 346 } 347 348 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 349 super(opcode, 0, prefix, op, assertion, feature); 350 this.immIsByte = immIsByte; 351 } 352 353 protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) { 354 if (immIsByte) { 355 assert imm == (byte) imm; 356 asm.emitByte(imm); 357 } else { 358 size.emitImmediate(asm, imm); 359 } 360 } 361 362 protected final int immediateSize(OperandSize size) { 363 if (immIsByte) { 364 return 1; 365 } else { 366 return size.getBytes(); 367 } 368 } 369 } 370 371 /** 372 * Opcode with operand order of either RM or MR for 2 address forms. 373 */ 374 public abstract static class AMD64RROp extends AMD64Op { 375 376 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 377 super(opcode, prefix1, prefix2, op, assertion, feature); 378 } 379 380 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 381 super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature); 382 } 383 384 public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src); 385 } 386 387 /** 388 * Opcode with operand order of RM. 389 */ 390 public static class AMD64RMOp extends AMD64RROp { 391 // @formatter:off 392 public static final AMD64RMOp IMUL = new AMD64RMOp("IMUL", P_0F, 0xAF, OpAssertion.ByteOrLargerAssertion); 393 public static final AMD64RMOp BSF = new AMD64RMOp("BSF", P_0F, 0xBC); 394 public static final AMD64RMOp BSR = new AMD64RMOp("BSR", P_0F, 0xBD); 395 // POPCNT, TZCNT, and LZCNT support word operation. However, the legacy size prefix should 396 // be emitted before the mandatory prefix 0xF3. Since we are not emitting bit count for 397 // 16-bit operands, here we simply use DwordOrLargerAssertion. 398 public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, OpAssertion.DwordOrLargerAssertion, CPUFeature.POPCNT); 399 public static final AMD64RMOp TZCNT = new AMD64RMOp("TZCNT", 0xF3, P_0F, 0xBC, OpAssertion.DwordOrLargerAssertion, CPUFeature.BMI1); 400 public static final AMD64RMOp LZCNT = new AMD64RMOp("LZCNT", 0xF3, P_0F, 0xBD, OpAssertion.DwordOrLargerAssertion, CPUFeature.LZCNT); 401 public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB", P_0F, 0xB6, false, true, OpAssertion.WordOrLargerAssertion); 402 public static final AMD64RMOp MOVZX = new AMD64RMOp("MOVZX", P_0F, 0xB7, OpAssertion.DwordOrLargerAssertion); 403 public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB", P_0F, 0xBE, false, true, OpAssertion.WordOrLargerAssertion); 404 public static final AMD64RMOp MOVSX = new AMD64RMOp("MOVSX", P_0F, 0xBF, OpAssertion.DwordOrLargerAssertion); 405 public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD", 0x63, OpAssertion.QwordAssertion); 406 public static final AMD64RMOp MOVB = new AMD64RMOp("MOVB", 0x8A, OpAssertion.ByteAssertion); 407 public static final AMD64RMOp MOV = new AMD64RMOp("MOV", 0x8B); 408 public static final AMD64RMOp CMP = new AMD64RMOp("CMP", 0x3B); 409 410 // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix 411 public static final AMD64RMOp MOVD = new AMD64RMOp("MOVD", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 412 public static final AMD64RMOp MOVQ = new AMD64RMOp("MOVQ", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 413 public static final AMD64RMOp MOVSS = new AMD64RMOp("MOVSS", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE); 414 public static final AMD64RMOp MOVSD = new AMD64RMOp("MOVSD", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE); 415 416 // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient. 417 public static final AMD64RMOp TESTB = new AMD64RMOp("TEST", 0x84, OpAssertion.ByteAssertion); 418 public static final AMD64RMOp TEST = new AMD64RMOp("TEST", 0x85); 419 // @formatter:on 420 421 protected AMD64RMOp(String opcode, int op) { 422 this(opcode, 0, op); 423 } 424 425 protected AMD64RMOp(String opcode, int op, OpAssertion assertion) { 426 this(opcode, 0, op, assertion); 427 } 428 429 protected AMD64RMOp(String opcode, int prefix, int op) { 430 this(opcode, 0, prefix, op, null); 431 } 432 433 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) { 434 this(opcode, 0, prefix, op, assertion, null); 435 } 436 437 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 438 this(opcode, 0, prefix, op, assertion, feature); 439 } 440 441 protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) { 442 super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null); 443 } 444 445 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) { 446 this(opcode, prefix1, prefix2, op, OpAssertion.WordOrLargerAssertion, feature); 447 } 448 449 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 450 super(opcode, prefix1, prefix2, op, assertion, feature); 451 } 452 453 @Override 454 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 455 assert verify(asm, size, dst, src); 456 if (isSSEInstruction()) { 457 Register nds = Register.None; 458 switch (op) { 459 case 0x10: 460 case 0x51: 461 if ((size == SS) || (size == SD)) { 462 nds = dst; 463 } 464 break; 465 case 0x2A: 466 case 0x54: 467 case 0x55: 468 case 0x56: 469 case 0x57: 470 case 0x58: 471 case 0x59: 472 case 0x5A: 473 case 0x5C: 474 case 0x5D: 475 case 0x5E: 476 case 0x5F: 477 nds = dst; 478 break; 479 default: 480 break; 481 } 482 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, size == QWORD); 483 asm.emitByte(op); 484 asm.emitModRM(dst, src); 485 } else { 486 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 487 asm.emitModRM(dst, src); 488 } 489 } 490 491 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) { 492 assert verify(asm, size, dst, null); 493 if (isSSEInstruction()) { 494 Register nds = Register.None; 495 switch (op) { 496 case 0x51: 497 if ((size == SS) || (size == SD)) { 498 nds = dst; 499 } 500 break; 501 case 0x2A: 502 case 0x54: 503 case 0x55: 504 case 0x56: 505 case 0x57: 506 case 0x58: 507 case 0x59: 508 case 0x5A: 509 case 0x5C: 510 case 0x5D: 511 case 0x5E: 512 case 0x5F: 513 nds = dst; 514 break; 515 default: 516 break; 517 } 518 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, size == QWORD); 519 asm.emitByte(op); 520 asm.emitOperandHelper(dst, src, 0); 521 } else { 522 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 523 asm.emitOperandHelper(dst, src, 0); 524 } 525 } 526 } 527 528 /** 529 * Opcode with operand order of MR. 530 */ 531 public static class AMD64MROp extends AMD64RROp { 532 // @formatter:off 533 public static final AMD64MROp MOVB = new AMD64MROp("MOVB", 0x88, OpAssertion.ByteAssertion); 534 public static final AMD64MROp MOV = new AMD64MROp("MOV", 0x89); 535 536 // MOVD and MOVQ are the same opcode, just with different operand size prefix 537 // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used. 538 public static final AMD64MROp MOVD = new AMD64MROp("MOVD", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 539 public static final AMD64MROp MOVQ = new AMD64MROp("MOVQ", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 540 541 // MOVSS and MOVSD are the same opcode, just with different operand size prefix 542 public static final AMD64MROp MOVSS = new AMD64MROp("MOVSS", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE); 543 public static final AMD64MROp MOVSD = new AMD64MROp("MOVSD", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE); 544 // @formatter:on 545 546 protected AMD64MROp(String opcode, int op) { 547 this(opcode, 0, op); 548 } 549 550 protected AMD64MROp(String opcode, int op, OpAssertion assertion) { 551 this(opcode, 0, op, assertion); 552 } 553 554 protected AMD64MROp(String opcode, int prefix, int op) { 555 this(opcode, prefix, op, OpAssertion.WordOrLargerAssertion); 556 } 557 558 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) { 559 this(opcode, prefix, op, assertion, null); 560 } 561 562 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 563 this(opcode, 0, prefix, op, assertion, feature); 564 } 565 566 protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 567 super(opcode, prefix1, prefix2, op, assertion, feature); 568 } 569 570 @Override 571 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 572 assert verify(asm, size, src, dst); 573 if (isSSEInstruction()) { 574 Register nds = Register.None; 575 switch (op) { 576 case 0x11: 577 if ((size == SS) || (size == SD)) { 578 nds = src; 579 } 580 break; 581 default: 582 break; 583 } 584 asm.simdPrefix(src, nds, dst, size, prefix1, prefix2, size == QWORD); 585 asm.emitByte(op); 586 asm.emitModRM(src, dst); 587 } else { 588 emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding); 589 asm.emitModRM(src, dst); 590 } 591 } 592 593 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) { 594 assert verify(asm, size, src, null); 595 if (isSSEInstruction()) { 596 asm.simdPrefix(src, Register.None, dst, size, prefix1, prefix2, size == QWORD); 597 asm.emitByte(op); 598 } else { 599 emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0); 600 } 601 asm.emitOperandHelper(src, dst, 0); 602 } 603 } 604 605 /** 606 * Opcodes with operand order of M. 607 */ 608 public static class AMD64MOp extends AMD64Op { 609 // @formatter:off 610 public static final AMD64MOp NOT = new AMD64MOp("NOT", 0xF7, 2); 611 public static final AMD64MOp NEG = new AMD64MOp("NEG", 0xF7, 3); 612 public static final AMD64MOp MUL = new AMD64MOp("MUL", 0xF7, 4); 613 public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5); 614 public static final AMD64MOp DIV = new AMD64MOp("DIV", 0xF7, 6); 615 public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7); 616 public static final AMD64MOp INC = new AMD64MOp("INC", 0xFF, 0); 617 public static final AMD64MOp DEC = new AMD64MOp("DEC", 0xFF, 1); 618 public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6); 619 public static final AMD64MOp POP = new AMD64MOp("POP", 0x8F, 0, OpAssertion.WordOrDwordAssertion); 620 // @formatter:on 621 622 private final int ext; 623 624 protected AMD64MOp(String opcode, int op, int ext) { 625 this(opcode, 0, op, ext); 626 } 627 628 protected AMD64MOp(String opcode, int prefix, int op, int ext) { 629 this(opcode, prefix, op, ext, OpAssertion.WordOrLargerAssertion); 630 } 631 632 protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) { 633 this(opcode, 0, op, ext, assertion); 634 } 635 636 protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) { 637 super(opcode, 0, prefix, op, assertion, null); 638 this.ext = ext; 639 } 640 641 public final void emit(AMD64Assembler asm, OperandSize size, Register dst) { 642 assert verify(asm, size, dst, null); 643 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 644 asm.emitModRM(ext, dst); 645 } 646 647 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) { 648 assert verify(asm, size, null, null); 649 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 650 asm.emitOperandHelper(ext, dst, 0); 651 } 652 } 653 654 /** 655 * Opcodes with operand order of MI. 656 */ 657 public static class AMD64MIOp extends AMD64ImmOp { 658 // @formatter:off 659 public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true, 0xC6, 0, OpAssertion.ByteAssertion); 660 public static final AMD64MIOp MOV = new AMD64MIOp("MOV", false, 0xC7, 0); 661 public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0); 662 // @formatter:on 663 664 private final int ext; 665 666 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) { 667 this(opcode, immIsByte, op, ext, OpAssertion.WordOrLargerAssertion); 668 } 669 670 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) { 671 this(opcode, immIsByte, 0, op, ext, assertion); 672 } 673 674 protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) { 675 super(opcode, immIsByte, prefix, op, assertion); 676 this.ext = ext; 677 } 678 679 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) { 680 emit(asm, size, dst, imm, false); 681 } 682 683 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm, boolean annotateImm) { 684 assert verify(asm, size, dst, null); 685 int insnPos = asm.position(); 686 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 687 asm.emitModRM(ext, dst); 688 int immPos = asm.position(); 689 emitImmediate(asm, size, imm); 690 int nextInsnPos = asm.position(); 691 if (annotateImm && asm.codePatchingAnnotationConsumer != null) { 692 asm.codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 693 } 694 } 695 696 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) { 697 emit(asm, size, dst, imm, false); 698 } 699 700 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm, boolean annotateImm) { 701 assert verify(asm, size, null, null); 702 int insnPos = asm.position(); 703 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 704 asm.emitOperandHelper(ext, dst, immediateSize(size)); 705 int immPos = asm.position(); 706 emitImmediate(asm, size, imm); 707 int nextInsnPos = asm.position(); 708 if (annotateImm && asm.codePatchingAnnotationConsumer != null) { 709 asm.codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 710 } 711 } 712 } 713 714 /** 715 * Opcodes with operand order of RMI. 716 * 717 * We only have one form of round as the operation is always treated with single variant input, 718 * making its extension to 3 address forms redundant. 719 */ 720 public static class AMD64RMIOp extends AMD64ImmOp { 721 // @formatter:off 722 public static final AMD64RMIOp IMUL = new AMD64RMIOp("IMUL", false, 0x69); 723 public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true, 0x6B); 724 public static final AMD64RMIOp ROUNDSS = new AMD64RMIOp("ROUNDSS", true, P_0F3A, 0x0A, OpAssertion.PackedDoubleAssertion, CPUFeature.SSE4_1); 725 public static final AMD64RMIOp ROUNDSD = new AMD64RMIOp("ROUNDSD", true, P_0F3A, 0x0B, OpAssertion.PackedDoubleAssertion, CPUFeature.SSE4_1); 726 // @formatter:on 727 728 protected AMD64RMIOp(String opcode, boolean immIsByte, int op) { 729 this(opcode, immIsByte, 0, op, OpAssertion.WordOrLargerAssertion, null); 730 } 731 732 protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 733 super(opcode, immIsByte, prefix, op, assertion, feature); 734 } 735 736 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) { 737 assert verify(asm, size, dst, src); 738 if (isSSEInstruction()) { 739 Register nds = Register.None; 740 switch (op) { 741 case 0x0A: 742 case 0x0B: 743 nds = dst; 744 break; 745 default: 746 break; 747 } 748 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, false); 749 asm.emitByte(op); 750 asm.emitModRM(dst, src); 751 } else { 752 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 753 asm.emitModRM(dst, src); 754 } 755 emitImmediate(asm, size, imm); 756 } 757 758 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) { 759 assert verify(asm, size, dst, null); 760 if (isSSEInstruction()) { 761 Register nds = Register.None; 762 switch (op) { 763 case 0x0A: 764 case 0x0B: 765 nds = dst; 766 break; 767 default: 768 break; 769 } 770 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, false); 771 asm.emitByte(op); 772 } else { 773 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 774 } 775 asm.emitOperandHelper(dst, src, immediateSize(size)); 776 emitImmediate(asm, size, imm); 777 } 778 } 779 780 public static class SSEOp extends AMD64RMOp { 781 // @formatter:off 782 public static final SSEOp CVTSI2SS = new SSEOp("CVTSI2SS", 0xF3, P_0F, 0x2A, OpAssertion.IntToFloatAssertion); 783 public static final SSEOp CVTSI2SD = new SSEOp("CVTSI2SD", 0xF2, P_0F, 0x2A, OpAssertion.IntToFloatAssertion); 784 public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatToIntAssertion); 785 public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatToIntAssertion); 786 public static final SSEOp UCOMIS = new SSEOp("UCOMIS", P_0F, 0x2E, OpAssertion.PackedFloatAssertion); 787 public static final SSEOp SQRT = new SSEOp("SQRT", P_0F, 0x51); 788 public static final SSEOp AND = new SSEOp("AND", P_0F, 0x54, OpAssertion.PackedFloatAssertion); 789 public static final SSEOp ANDN = new SSEOp("ANDN", P_0F, 0x55, OpAssertion.PackedFloatAssertion); 790 public static final SSEOp OR = new SSEOp("OR", P_0F, 0x56, OpAssertion.PackedFloatAssertion); 791 public static final SSEOp XOR = new SSEOp("XOR", P_0F, 0x57, OpAssertion.PackedFloatAssertion); 792 public static final SSEOp ADD = new SSEOp("ADD", P_0F, 0x58); 793 public static final SSEOp MUL = new SSEOp("MUL", P_0F, 0x59); 794 public static final SSEOp CVTSS2SD = new SSEOp("CVTSS2SD", P_0F, 0x5A, OpAssertion.SingleAssertion); 795 public static final SSEOp CVTSD2SS = new SSEOp("CVTSD2SS", P_0F, 0x5A, OpAssertion.DoubleAssertion); 796 public static final SSEOp SUB = new SSEOp("SUB", P_0F, 0x5C); 797 public static final SSEOp MIN = new SSEOp("MIN", P_0F, 0x5D); 798 public static final SSEOp DIV = new SSEOp("DIV", P_0F, 0x5E); 799 public static final SSEOp MAX = new SSEOp("MAX", P_0F, 0x5F); 800 // @formatter:on 801 802 protected SSEOp(String opcode, int prefix, int op) { 803 this(opcode, prefix, op, OpAssertion.FloatAssertion); 804 } 805 806 protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) { 807 this(opcode, 0, prefix, op, assertion); 808 } 809 810 protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) { 811 super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2); 812 } 813 } 814 815 /** 816 * Arithmetic operation with operand order of RM, MR or MI. 817 */ 818 public static final class AMD64BinaryArithmetic { 819 // @formatter:off 820 public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0); 821 public static final AMD64BinaryArithmetic OR = new AMD64BinaryArithmetic("OR", 1); 822 public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2); 823 public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3); 824 public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4); 825 public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5); 826 public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6); 827 public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7); 828 // @formatter:on 829 830 private final AMD64MIOp byteImmOp; 831 private final AMD64MROp byteMrOp; 832 private final AMD64RMOp byteRmOp; 833 834 private final AMD64MIOp immOp; 835 private final AMD64MIOp immSxOp; 836 private final AMD64MROp mrOp; 837 private final AMD64RMOp rmOp; 838 839 private AMD64BinaryArithmetic(String opcode, int code) { 840 int baseOp = code << 3; 841 842 byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion); 843 byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion); 844 byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion); 845 846 immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.WordOrLargerAssertion); 847 immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.WordOrLargerAssertion); 848 mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.WordOrLargerAssertion); 849 rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.WordOrLargerAssertion); 850 } 851 852 public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) { 853 if (size == BYTE) { 854 return byteImmOp; 855 } else if (sx) { 856 return immSxOp; 857 } else { 858 return immOp; 859 } 860 } 861 862 public AMD64MROp getMROpcode(OperandSize size) { 863 if (size == BYTE) { 864 return byteMrOp; 865 } else { 866 return mrOp; 867 } 868 } 869 870 public AMD64RMOp getRMOpcode(OperandSize size) { 871 if (size == BYTE) { 872 return byteRmOp; 873 } else { 874 return rmOp; 875 } 876 } 877 } 878 879 /** 880 * Shift operation with operand order of M1, MC or MI. 881 */ 882 public static final class AMD64Shift { 883 // @formatter:off 884 public static final AMD64Shift ROL = new AMD64Shift("ROL", 0); 885 public static final AMD64Shift ROR = new AMD64Shift("ROR", 1); 886 public static final AMD64Shift RCL = new AMD64Shift("RCL", 2); 887 public static final AMD64Shift RCR = new AMD64Shift("RCR", 3); 888 public static final AMD64Shift SHL = new AMD64Shift("SHL", 4); 889 public static final AMD64Shift SHR = new AMD64Shift("SHR", 5); 890 public static final AMD64Shift SAR = new AMD64Shift("SAR", 7); 891 // @formatter:on 892 893 public final AMD64MOp m1Op; 894 public final AMD64MOp mcOp; 895 public final AMD64MIOp miOp; 896 897 private AMD64Shift(String opcode, int code) { 898 m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.WordOrLargerAssertion); 899 mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.WordOrLargerAssertion); 900 miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.WordOrLargerAssertion); 901 } 902 } 903 904 private enum EVEXFeatureAssertion { 905 AVX512F_ALL(EnumSet.of(AVX512F), EnumSet.of(AVX512F), EnumSet.of(AVX512F)), 906 AVX512F_128ONLY(EnumSet.of(AVX512F), null, null), 907 AVX512F_VL(EnumSet.of(AVX512F, AVX512VL), EnumSet.of(AVX512F, AVX512VL), EnumSet.of(AVX512F)), 908 AVX512CD_VL(EnumSet.of(AVX512F, AVX512CD, AVX512VL), EnumSet.of(AVX512F, AVX512CD, AVX512VL), EnumSet.of(AVX512F, AVX512CD)), 909 AVX512DQ_VL(EnumSet.of(AVX512F, AVX512DQ, AVX512VL), EnumSet.of(AVX512F, AVX512DQ, AVX512VL), EnumSet.of(AVX512F, AVX512DQ)), 910 AVX512BW_VL(EnumSet.of(AVX512F, AVX512BW, AVX512VL), EnumSet.of(AVX512F, AVX512BW, AVX512VL), EnumSet.of(AVX512F, AVX512BW)); 911 912 private final EnumSet<CPUFeature> l128features; 913 private final EnumSet<CPUFeature> l256features; 914 private final EnumSet<CPUFeature> l512features; 915 916 EVEXFeatureAssertion(EnumSet<CPUFeature> l128features, EnumSet<CPUFeature> l256features, EnumSet<CPUFeature> l512features) { 917 this.l128features = l128features; 918 this.l256features = l256features; 919 this.l512features = l512features; 920 } 921 922 public boolean check(AMD64 arch, int l) { 923 switch (l) { 924 case L128: 925 assert l128features != null && arch.getFeatures().containsAll(l128features) : "emitting illegal 128 bit instruction"; 926 break; 927 case L256: 928 assert l256features != null && arch.getFeatures().containsAll(l256features) : "emitting illegal 256 bit instruction"; 929 break; 930 case L512: 931 assert l512features != null && arch.getFeatures().containsAll(l512features) : "emitting illegal 512 bit instruction"; 932 break; 933 } 934 return true; 935 } 936 937 public boolean supports(EnumSet<CPUFeature> features, AVXSize avxSize) { 938 switch (avxSize) { 939 case XMM: 940 return l128features != null && features.containsAll(l128features); 941 case YMM: 942 return l256features != null && features.containsAll(l256features); 943 case ZMM: 944 return l512features != null && features.containsAll(l512features); 945 default: 946 throw GraalError.shouldNotReachHere(); 947 } 948 } 949 } 950 951 private enum VEXOpAssertion { 952 AVX1(CPUFeature.AVX, CPUFeature.AVX, null), 953 AVX1_2(CPUFeature.AVX, CPUFeature.AVX2, null), 954 AVX2(CPUFeature.AVX2, CPUFeature.AVX2, null), 955 AVX1_128ONLY(CPUFeature.AVX, null, null), 956 AVX1_256ONLY(null, CPUFeature.AVX, null), 957 AVX2_256ONLY(null, CPUFeature.AVX2, null), 958 XMM_CPU(CPUFeature.AVX, null, null, XMM, null, CPU, null), 959 XMM_XMM_CPU(CPUFeature.AVX, null, null, XMM, XMM, CPU, null), 960 CPU_XMM(CPUFeature.AVX, null, null, CPU, null, XMM, null), 961 AVX1_2_CPU_XMM(CPUFeature.AVX, CPUFeature.AVX2, null, CPU, null, XMM, null), 962 BMI1(CPUFeature.BMI1, null, null, CPU, CPU, CPU, null), 963 BMI2(CPUFeature.BMI2, null, null, CPU, CPU, CPU, null), 964 FMA(CPUFeature.FMA, null, null, XMM, XMM, XMM, null), 965 966 XMM_CPU_AVX512F_128ONLY(CPUFeature.AVX, null, EVEXFeatureAssertion.AVX512F_128ONLY, XMM, null, CPU, null), 967 AVX1_AVX512F_ALL(CPUFeature.AVX, CPUFeature.AVX, EVEXFeatureAssertion.AVX512F_ALL), 968 AVX1_AVX512F_VL(CPUFeature.AVX, CPUFeature.AVX, EVEXFeatureAssertion.AVX512F_VL); 969 970 private final CPUFeature l128feature; 971 private final CPUFeature l256feature; 972 private final EVEXFeatureAssertion l512features; 973 974 private final RegisterCategory rCategory; 975 private final RegisterCategory vCategory; 976 private final RegisterCategory mCategory; 977 private final RegisterCategory imm8Category; 978 979 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature, EVEXFeatureAssertion l512features) { 980 this(l128feature, l256feature, l512features, XMM, XMM, XMM, XMM); 981 } 982 983 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature, EVEXFeatureAssertion l512features, RegisterCategory rCategory, RegisterCategory vCategory, RegisterCategory mCategory, 984 RegisterCategory imm8Category) { 985 this.l128feature = l128feature; 986 this.l256feature = l256feature; 987 this.l512features = l512features; 988 this.rCategory = rCategory; 989 this.vCategory = vCategory; 990 this.mCategory = mCategory; 991 this.imm8Category = imm8Category; 992 } 993 994 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m) { 995 return check(arch, getLFlag(size), r, v, m, null); 996 } 997 998 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m, Register imm8) { 999 return check(arch, getLFlag(size), r, v, m, imm8); 1000 } 1001 1002 public boolean check(AMD64 arch, int l, Register r, Register v, Register m, Register imm8) { 1003 if (isAVX512Register(r) || isAVX512Register(v) || isAVX512Register(m) || l == L512) { 1004 assert l512features != null && l512features.check(arch, l); 1005 } else if (l == L128) { 1006 assert l128feature != null && arch.getFeatures().contains(l128feature) : "emitting illegal 128 bit instruction"; 1007 } else if (l == L256) { 1008 assert l256feature != null && arch.getFeatures().contains(l256feature) : "emitting illegal 256 bit instruction"; 1009 } 1010 if (r != null) { 1011 assert r.getRegisterCategory().equals(rCategory); 1012 } 1013 if (v != null) { 1014 assert v.getRegisterCategory().equals(vCategory); 1015 } 1016 if (m != null) { 1017 assert m.getRegisterCategory().equals(mCategory); 1018 } 1019 if (imm8 != null) { 1020 assert imm8.getRegisterCategory().equals(imm8Category); 1021 } 1022 return true; 1023 } 1024 1025 public boolean supports(EnumSet<CPUFeature> features, AVXSize avxSize, boolean useZMMRegisters) { 1026 if (useZMMRegisters || avxSize == AVXSize.ZMM) { 1027 return l512features != null && l512features.supports(features, avxSize); 1028 } else if (avxSize == AVXSize.XMM) { 1029 return l128feature != null && features.contains(l128feature); 1030 } else if (avxSize == AVXSize.YMM) { 1031 return l256feature != null && features.contains(l256feature); 1032 } 1033 throw GraalError.shouldNotReachHere(); 1034 } 1035 } 1036 1037 /** 1038 * Base class for VEX-encoded instructions. 1039 */ 1040 public static class VexOp { 1041 protected final int pp; 1042 protected final int mmmmm; 1043 protected final int w; 1044 protected final int op; 1045 1046 private final String opcode; 1047 protected final VEXOpAssertion assertion; 1048 1049 protected final EVEXTuple evexTuple; 1050 protected final int wEvex; 1051 1052 protected VexOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex) { 1053 this.pp = pp; 1054 this.mmmmm = mmmmm; 1055 this.w = w; 1056 this.op = op; 1057 this.opcode = opcode; 1058 this.assertion = assertion; 1059 this.evexTuple = evexTuple; 1060 this.wEvex = wEvex; 1061 } 1062 1063 protected VexOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1064 this(opcode, pp, mmmmm, w, op, assertion, EVEXTuple.INVALID, WIG); 1065 } 1066 1067 public final boolean isSupported(AMD64Assembler vasm, AVXSize size) { 1068 return isSupported(vasm, size, false); 1069 } 1070 1071 public final boolean isSupported(AMD64Assembler vasm, AVXSize size, boolean useZMMRegisters) { 1072 return assertion.supports(((AMD64) vasm.target.arch).getFeatures(), size, useZMMRegisters); 1073 } 1074 1075 @Override 1076 public String toString() { 1077 return opcode; 1078 } 1079 1080 protected final int getDisp8Scale(boolean useEvex, AVXSize size) { 1081 return useEvex ? evexTuple.getDisp8ScalingFactor(size) : DEFAULT_DISP8_SCALE; 1082 } 1083 1084 } 1085 1086 /** 1087 * VEX-encoded instructions with an operand order of RM, but the M operand must be a register. 1088 */ 1089 public static class VexRROp extends VexOp { 1090 // @formatter:off 1091 public static final VexRROp VMASKMOVDQU = new VexRROp("VMASKMOVDQU", P_66, M_0F, WIG, 0xF7, VEXOpAssertion.AVX1_128ONLY, EVEXTuple.INVALID, WIG); 1092 // @formatter:on 1093 1094 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex) { 1095 super(opcode, pp, mmmmm, w, op, assertion, evexTuple, wEvex); 1096 } 1097 1098 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1099 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); 1100 assert op != 0x1A || op != 0x5A; 1101 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, wEvex, false); 1102 asm.emitByte(op); 1103 asm.emitModRM(dst, src); 1104 } 1105 } 1106 1107 /** 1108 * VEX-encoded instructions with an operand order of RM. 1109 */ 1110 public static class VexRMOp extends VexRROp { 1111 // @formatter:off 1112 public static final VexRMOp VCVTTSS2SI = new VexRMOp("VCVTTSS2SI", P_F3, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM); 1113 public static final VexRMOp VCVTTSS2SQ = new VexRMOp("VCVTTSS2SQ", P_F3, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM); 1114 public static final VexRMOp VCVTTSD2SI = new VexRMOp("VCVTTSD2SI", P_F2, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM); 1115 public static final VexRMOp VCVTTSD2SQ = new VexRMOp("VCVTTSD2SQ", P_F2, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM); 1116 public static final VexRMOp VCVTPS2PD = new VexRMOp("VCVTPS2PD", P_, M_0F, WIG, 0x5A); 1117 public static final VexRMOp VCVTPD2PS = new VexRMOp("VCVTPD2PS", P_66, M_0F, WIG, 0x5A); 1118 public static final VexRMOp VCVTDQ2PS = new VexRMOp("VCVTDQ2PS", P_, M_0F, WIG, 0x5B); 1119 public static final VexRMOp VCVTTPS2DQ = new VexRMOp("VCVTTPS2DQ", P_F3, M_0F, WIG, 0x5B); 1120 public static final VexRMOp VCVTTPD2DQ = new VexRMOp("VCVTTPD2DQ", P_66, M_0F, WIG, 0xE6); 1121 public static final VexRMOp VCVTDQ2PD = new VexRMOp("VCVTDQ2PD", P_F3, M_0F, WIG, 0xE6); 1122 public static final VexRMOp VBROADCASTSS = new VexRMOp("VBROADCASTSS", P_66, M_0F38, W0, 0x18); 1123 public static final VexRMOp VBROADCASTSD = new VexRMOp("VBROADCASTSD", P_66, M_0F38, W0, 0x19, VEXOpAssertion.AVX1_256ONLY); 1124 public static final VexRMOp VBROADCASTF128 = new VexRMOp("VBROADCASTF128", P_66, M_0F38, W0, 0x1A, VEXOpAssertion.AVX1_256ONLY); 1125 public static final VexRMOp VPBROADCASTI128 = new VexRMOp("VPBROADCASTI128", P_66, M_0F38, W0, 0x5A, VEXOpAssertion.AVX2_256ONLY); 1126 public static final VexRMOp VPBROADCASTB = new VexRMOp("VPBROADCASTB", P_66, M_0F38, W0, 0x78, VEXOpAssertion.AVX2); 1127 public static final VexRMOp VPBROADCASTW = new VexRMOp("VPBROADCASTW", P_66, M_0F38, W0, 0x79, VEXOpAssertion.AVX2); 1128 public static final VexRMOp VPBROADCASTD = new VexRMOp("VPBROADCASTD", P_66, M_0F38, W0, 0x58, VEXOpAssertion.AVX2); 1129 public static final VexRMOp VPBROADCASTQ = new VexRMOp("VPBROADCASTQ", P_66, M_0F38, W0, 0x59, VEXOpAssertion.AVX2); 1130 public static final VexRMOp VPMOVMSKB = new VexRMOp("VPMOVMSKB", P_66, M_0F, WIG, 0xD7, VEXOpAssertion.AVX1_2_CPU_XMM); 1131 public static final VexRMOp VPMOVSXBW = new VexRMOp("VPMOVSXBW", P_66, M_0F38, WIG, 0x20); 1132 public static final VexRMOp VPMOVSXBD = new VexRMOp("VPMOVSXBD", P_66, M_0F38, WIG, 0x21); 1133 public static final VexRMOp VPMOVSXBQ = new VexRMOp("VPMOVSXBQ", P_66, M_0F38, WIG, 0x22); 1134 public static final VexRMOp VPMOVSXWD = new VexRMOp("VPMOVSXWD", P_66, M_0F38, WIG, 0x23); 1135 public static final VexRMOp VPMOVSXWQ = new VexRMOp("VPMOVSXWQ", P_66, M_0F38, WIG, 0x24); 1136 public static final VexRMOp VPMOVSXDQ = new VexRMOp("VPMOVSXDQ", P_66, M_0F38, WIG, 0x25); 1137 public static final VexRMOp VPMOVZXBW = new VexRMOp("VPMOVZXBW", P_66, M_0F38, WIG, 0x30); 1138 public static final VexRMOp VPMOVZXBD = new VexRMOp("VPMOVZXBD", P_66, M_0F38, WIG, 0x31); 1139 public static final VexRMOp VPMOVZXBQ = new VexRMOp("VPMOVZXBQ", P_66, M_0F38, WIG, 0x32); 1140 public static final VexRMOp VPMOVZXWD = new VexRMOp("VPMOVZXWD", P_66, M_0F38, WIG, 0x33); 1141 public static final VexRMOp VPMOVZXWQ = new VexRMOp("VPMOVZXWQ", P_66, M_0F38, WIG, 0x34); 1142 public static final VexRMOp VPMOVZXDQ = new VexRMOp("VPMOVZXDQ", P_66, M_0F38, WIG, 0x35); 1143 public static final VexRMOp VPTEST = new VexRMOp("VPTEST", P_66, M_0F38, WIG, 0x17); 1144 public static final VexRMOp VSQRTPD = new VexRMOp("VSQRTPD", P_66, M_0F, WIG, 0x51); 1145 public static final VexRMOp VSQRTPS = new VexRMOp("VSQRTPS", P_, M_0F, WIG, 0x51); 1146 public static final VexRMOp VSQRTSD = new VexRMOp("VSQRTSD", P_F2, M_0F, WIG, 0x51); 1147 public static final VexRMOp VSQRTSS = new VexRMOp("VSQRTSS", P_F3, M_0F, WIG, 0x51); 1148 public static final VexRMOp VUCOMISS = new VexRMOp("VUCOMISS", P_, M_0F, WIG, 0x2E); 1149 public static final VexRMOp VUCOMISD = new VexRMOp("VUCOMISD", P_66, M_0F, WIG, 0x2E); 1150 // @formatter:on 1151 1152 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op) { 1153 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1, EVEXTuple.INVALID, WIG); 1154 } 1155 1156 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1157 this(opcode, pp, mmmmm, w, op, assertion, EVEXTuple.INVALID, WIG); 1158 } 1159 1160 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex) { 1161 super(opcode, pp, mmmmm, w, op, assertion, evexTuple, wEvex); 1162 } 1163 1164 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) { 1165 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1166 boolean useEvex = asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, wEvex, false); 1167 asm.emitByte(op); 1168 asm.emitOperandHelper(dst, src, 0, getDisp8Scale(useEvex, size)); 1169 } 1170 } 1171 1172 /** 1173 * VEX-encoded move instructions. 1174 * <p> 1175 * These instructions have two opcodes: op is the forward move instruction with an operand order 1176 * of RM, and opReverse is the reverse move instruction with an operand order of MR. 1177 */ 1178 public static final class VexMoveOp extends VexRMOp { 1179 // @formatter:off 1180 public static final VexMoveOp VMOVDQA32 = new VexMoveOp("VMOVDQA32", P_66, M_0F, WIG, 0x6F, 0x7F, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, W0); 1181 public static final VexMoveOp VMOVDQA64 = new VexMoveOp("VMOVDQA64", P_66, M_0F, WIG, 0x6F, 0x7F, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, W1); 1182 public static final VexMoveOp VMOVDQU32 = new VexMoveOp("VMOVDQU32", P_F3, M_0F, WIG, 0x6F, 0x7F, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, W0); 1183 public static final VexMoveOp VMOVDQU64 = new VexMoveOp("VMOVDQU64", P_F3, M_0F, WIG, 0x6F, 0x7F, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, W1); 1184 public static final VexMoveOp VMOVAPS = new VexMoveOp("VMOVAPS", P_, M_0F, WIG, 0x28, 0x29, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, W0); 1185 public static final VexMoveOp VMOVAPD = new VexMoveOp("VMOVAPD", P_66, M_0F, WIG, 0x28, 0x29, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, W1); 1186 public static final VexMoveOp VMOVUPS = new VexMoveOp("VMOVUPS", P_, M_0F, WIG, 0x10, 0x11, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, W0); 1187 public static final VexMoveOp VMOVUPD = new VexMoveOp("VMOVUPD", P_66, M_0F, WIG, 0x10, 0x11, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, W1); 1188 public static final VexMoveOp VMOVSS = new VexMoveOp("VMOVSS", P_F3, M_0F, WIG, 0x10, 0x11, VEXOpAssertion.AVX1_AVX512F_ALL, EVEXTuple.T1S_32BIT, W0); 1189 public static final VexMoveOp VMOVSD = new VexMoveOp("VMOVSD", P_F2, M_0F, WIG, 0x10, 0x11, VEXOpAssertion.AVX1_AVX512F_ALL, EVEXTuple.T1S_64BIT, W1); 1190 public static final VexMoveOp VMOVD = new VexMoveOp("VMOVD", P_66, M_0F, W0, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU_AVX512F_128ONLY, EVEXTuple.T1F_32BIT, W0); 1191 public static final VexMoveOp VMOVQ = new VexMoveOp("VMOVQ", P_66, M_0F, W1, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU_AVX512F_128ONLY, EVEXTuple.T1S_64BIT, W1); 1192 // @formatter:on 1193 1194 private final int opReverse; 1195 1196 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) { 1197 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1, EVEXTuple.INVALID, WIG); 1198 } 1199 1200 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) { 1201 this(opcode, pp, mmmmm, w, op, opReverse, assertion, EVEXTuple.INVALID, WIG); 1202 } 1203 1204 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex) { 1205 super(opcode, pp, mmmmm, w, op, assertion, evexTuple, wEvex); 1206 this.opReverse = opReverse; 1207 } 1208 1209 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src) { 1210 assert assertion.check((AMD64) asm.target.arch, size, src, null, null); 1211 boolean useEvex = asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, wEvex, false); 1212 asm.emitByte(opReverse); 1213 asm.emitOperandHelper(src, dst, 0, getDisp8Scale(useEvex, size)); 1214 } 1215 1216 public void emitReverse(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1217 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); 1218 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, wEvex, false); 1219 asm.emitByte(opReverse); 1220 asm.emitModRM(src, dst); 1221 } 1222 } 1223 1224 public interface VexRRIOp { 1225 void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8); 1226 } 1227 1228 /** 1229 * VEX-encoded instructions with an operand order of RMI. 1230 */ 1231 public static final class VexRMIOp extends VexOp implements VexRRIOp { 1232 // @formatter:off 1233 public static final VexRMIOp VPERMQ = new VexRMIOp("VPERMQ", P_66, M_0F3A, W1, 0x00, VEXOpAssertion.AVX2_256ONLY); 1234 public static final VexRMIOp VPSHUFLW = new VexRMIOp("VPSHUFLW", P_F2, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1235 public static final VexRMIOp VPSHUFHW = new VexRMIOp("VPSHUFHW", P_F3, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1236 public static final VexRMIOp VPSHUFD = new VexRMIOp("VPSHUFD", P_66, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1237 // @formatter:on 1238 1239 private VexRMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1240 super(opcode, pp, mmmmm, w, op, assertion); 1241 } 1242 1243 @Override 1244 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1245 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); 1246 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, wEvex, false); 1247 asm.emitByte(op); 1248 asm.emitModRM(dst, src); 1249 asm.emitByte(imm8); 1250 } 1251 1252 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src, int imm8) { 1253 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1254 boolean useEvex = asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, wEvex, false); 1255 asm.emitByte(op); 1256 asm.emitOperandHelper(dst, src, 1, getDisp8Scale(useEvex, size)); 1257 asm.emitByte(imm8); 1258 } 1259 } 1260 1261 /** 1262 * VEX-encoded instructions with an operand order of MRI. 1263 */ 1264 public static final class VexMRIOp extends VexOp implements VexRRIOp { 1265 // @formatter:off 1266 public static final VexMRIOp VEXTRACTF128 = new VexMRIOp("VEXTRACTF128", P_66, M_0F3A, W0, 0x19, VEXOpAssertion.AVX1_256ONLY); 1267 public static final VexMRIOp VEXTRACTI128 = new VexMRIOp("VEXTRACTI128", P_66, M_0F3A, W0, 0x39, VEXOpAssertion.AVX2_256ONLY); 1268 public static final VexMRIOp VPEXTRB = new VexMRIOp("VPEXTRB", P_66, M_0F3A, W0, 0x14, VEXOpAssertion.XMM_CPU); 1269 public static final VexMRIOp VPEXTRW = new VexMRIOp("VPEXTRW", P_66, M_0F3A, W0, 0x15, VEXOpAssertion.XMM_CPU); 1270 public static final VexMRIOp VPEXTRD = new VexMRIOp("VPEXTRD", P_66, M_0F3A, W0, 0x16, VEXOpAssertion.XMM_CPU); 1271 public static final VexMRIOp VPEXTRQ = new VexMRIOp("VPEXTRQ", P_66, M_0F3A, W1, 0x16, VEXOpAssertion.XMM_CPU); 1272 // @formatter:on 1273 1274 private VexMRIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1275 super(opcode, pp, mmmmm, w, op, assertion); 1276 } 1277 1278 @Override 1279 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1280 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); 1281 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, wEvex, false); 1282 asm.emitByte(op); 1283 asm.emitModRM(src, dst); 1284 asm.emitByte(imm8); 1285 } 1286 1287 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src, int imm8) { 1288 assert assertion.check((AMD64) asm.target.arch, size, src, null, null); 1289 boolean useEvex = asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, wEvex, false); 1290 asm.emitByte(op); 1291 asm.emitOperandHelper(src, dst, 1, getDisp8Scale(useEvex, size)); 1292 asm.emitByte(imm8); 1293 } 1294 } 1295 1296 /** 1297 * VEX-encoded instructions with an operand order of RVMR. 1298 */ 1299 public static class VexRVMROp extends VexOp { 1300 // @formatter:off 1301 public static final VexRVMROp VPBLENDVB = new VexRVMROp("VPBLENDVB", P_66, M_0F3A, W0, 0x4C, VEXOpAssertion.AVX1_2); 1302 public static final VexRVMROp VPBLENDVPS = new VexRVMROp("VPBLENDVPS", P_66, M_0F3A, W0, 0x4A, VEXOpAssertion.AVX1); 1303 public static final VexRVMROp VPBLENDVPD = new VexRVMROp("VPBLENDVPD", P_66, M_0F3A, W0, 0x4B, VEXOpAssertion.AVX1); 1304 // @formatter:on 1305 1306 protected VexRVMROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1307 super(opcode, pp, mmmmm, w, op, assertion); 1308 } 1309 1310 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, Register src2) { 1311 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, src2); 1312 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false); 1313 asm.emitByte(op); 1314 asm.emitModRM(dst, src2); 1315 asm.emitByte(mask.encoding() << 4); 1316 } 1317 1318 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, AMD64Address src2) { 1319 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, null); 1320 boolean useEvex = asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false); 1321 asm.emitByte(op); 1322 asm.emitOperandHelper(dst, src2, 0, getDisp8Scale(useEvex, size)); 1323 asm.emitByte(mask.encoding() << 4); 1324 } 1325 } 1326 1327 /** 1328 * VEX-encoded instructions with an operand order of RVM. 1329 */ 1330 public static class VexRVMOp extends VexOp { 1331 // @formatter:off 1332 public static final VexRVMOp VANDPS = new VexRVMOp("VANDPS", P_, M_0F, WIG, 0x54); 1333 public static final VexRVMOp VANDPD = new VexRVMOp("VANDPD", P_66, M_0F, WIG, 0x54); 1334 public static final VexRVMOp VANDNPS = new VexRVMOp("VANDNPS", P_, M_0F, WIG, 0x55); 1335 public static final VexRVMOp VANDNPD = new VexRVMOp("VANDNPD", P_66, M_0F, WIG, 0x55); 1336 public static final VexRVMOp VORPS = new VexRVMOp("VORPS", P_, M_0F, WIG, 0x56); 1337 public static final VexRVMOp VORPD = new VexRVMOp("VORPD", P_66, M_0F, WIG, 0x56); 1338 public static final VexRVMOp VXORPS = new VexRVMOp("VXORPS", P_, M_0F, WIG, 0x57); 1339 public static final VexRVMOp VXORPD = new VexRVMOp("VXORPD", P_66, M_0F, WIG, 0x57); 1340 public static final VexRVMOp VADDPS = new VexRVMOp("VADDPS", P_, M_0F, WIG, 0x58); 1341 public static final VexRVMOp VADDPD = new VexRVMOp("VADDPD", P_66, M_0F, WIG, 0x58); 1342 public static final VexRVMOp VADDSS = new VexRVMOp("VADDSS", P_F3, M_0F, WIG, 0x58); 1343 public static final VexRVMOp VADDSD = new VexRVMOp("VADDSD", P_F2, M_0F, WIG, 0x58); 1344 public static final VexRVMOp VMULPS = new VexRVMOp("VMULPS", P_, M_0F, WIG, 0x59); 1345 public static final VexRVMOp VMULPD = new VexRVMOp("VMULPD", P_66, M_0F, WIG, 0x59); 1346 public static final VexRVMOp VMULSS = new VexRVMOp("VMULSS", P_F3, M_0F, WIG, 0x59); 1347 public static final VexRVMOp VMULSD = new VexRVMOp("VMULSD", P_F2, M_0F, WIG, 0x59); 1348 public static final VexRVMOp VSUBPS = new VexRVMOp("VSUBPS", P_, M_0F, WIG, 0x5C); 1349 public static final VexRVMOp VSUBPD = new VexRVMOp("VSUBPD", P_66, M_0F, WIG, 0x5C); 1350 public static final VexRVMOp VSUBSS = new VexRVMOp("VSUBSS", P_F3, M_0F, WIG, 0x5C); 1351 public static final VexRVMOp VSUBSD = new VexRVMOp("VSUBSD", P_F2, M_0F, WIG, 0x5C); 1352 public static final VexRVMOp VMINPS = new VexRVMOp("VMINPS", P_, M_0F, WIG, 0x5D); 1353 public static final VexRVMOp VMINPD = new VexRVMOp("VMINPD", P_66, M_0F, WIG, 0x5D); 1354 public static final VexRVMOp VMINSS = new VexRVMOp("VMINSS", P_F3, M_0F, WIG, 0x5D); 1355 public static final VexRVMOp VMINSD = new VexRVMOp("VMINSD", P_F2, M_0F, WIG, 0x5D); 1356 public static final VexRVMOp VDIVPS = new VexRVMOp("VDIVPS", P_, M_0F, WIG, 0x5E); 1357 public static final VexRVMOp VDIVPD = new VexRVMOp("VDIVPD", P_66, M_0F, WIG, 0x5E); 1358 public static final VexRVMOp VDIVSS = new VexRVMOp("VDIVPS", P_F3, M_0F, WIG, 0x5E); 1359 public static final VexRVMOp VDIVSD = new VexRVMOp("VDIVPD", P_F2, M_0F, WIG, 0x5E); 1360 public static final VexRVMOp VMAXPS = new VexRVMOp("VMAXPS", P_, M_0F, WIG, 0x5F); 1361 public static final VexRVMOp VMAXPD = new VexRVMOp("VMAXPD", P_66, M_0F, WIG, 0x5F); 1362 public static final VexRVMOp VMAXSS = new VexRVMOp("VMAXSS", P_F3, M_0F, WIG, 0x5F); 1363 public static final VexRVMOp VMAXSD = new VexRVMOp("VMAXSD", P_F2, M_0F, WIG, 0x5F); 1364 public static final VexRVMOp VADDSUBPS = new VexRVMOp("VADDSUBPS", P_F2, M_0F, WIG, 0xD0); 1365 public static final VexRVMOp VADDSUBPD = new VexRVMOp("VADDSUBPD", P_66, M_0F, WIG, 0xD0); 1366 public static final VexRVMOp VPAND = new VexRVMOp("VPAND", P_66, M_0F, WIG, 0xDB, VEXOpAssertion.AVX1_2); 1367 public static final VexRVMOp VPOR = new VexRVMOp("VPOR", P_66, M_0F, WIG, 0xEB, VEXOpAssertion.AVX1_2); 1368 public static final VexRVMOp VPXOR = new VexRVMOp("VPXOR", P_66, M_0F, WIG, 0xEF, VEXOpAssertion.AVX1_2); 1369 public static final VexRVMOp VPADDB = new VexRVMOp("VPADDB", P_66, M_0F, WIG, 0xFC, VEXOpAssertion.AVX1_2); 1370 public static final VexRVMOp VPADDW = new VexRVMOp("VPADDW", P_66, M_0F, WIG, 0xFD, VEXOpAssertion.AVX1_2); 1371 public static final VexRVMOp VPADDD = new VexRVMOp("VPADDD", P_66, M_0F, WIG, 0xFE, VEXOpAssertion.AVX1_2); 1372 public static final VexRVMOp VPADDQ = new VexRVMOp("VPADDQ", P_66, M_0F, WIG, 0xD4, VEXOpAssertion.AVX1_2); 1373 public static final VexRVMOp VPMULHUW = new VexRVMOp("VPMULHUW", P_66, M_0F, WIG, 0xE4, VEXOpAssertion.AVX1_2); 1374 public static final VexRVMOp VPMULHW = new VexRVMOp("VPMULHW", P_66, M_0F, WIG, 0xE5, VEXOpAssertion.AVX1_2); 1375 public static final VexRVMOp VPMULLW = new VexRVMOp("VPMULLW", P_66, M_0F, WIG, 0xD5, VEXOpAssertion.AVX1_2); 1376 public static final VexRVMOp VPMULLD = new VexRVMOp("VPMULLD", P_66, M_0F38, WIG, 0x40, VEXOpAssertion.AVX1_2); 1377 public static final VexRVMOp VPSUBB = new VexRVMOp("VPSUBB", P_66, M_0F, WIG, 0xF8, VEXOpAssertion.AVX1_2); 1378 public static final VexRVMOp VPSUBW = new VexRVMOp("VPSUBW", P_66, M_0F, WIG, 0xF9, VEXOpAssertion.AVX1_2); 1379 public static final VexRVMOp VPSUBD = new VexRVMOp("VPSUBD", P_66, M_0F, WIG, 0xFA, VEXOpAssertion.AVX1_2); 1380 public static final VexRVMOp VPSUBQ = new VexRVMOp("VPSUBQ", P_66, M_0F, WIG, 0xFB, VEXOpAssertion.AVX1_2); 1381 public static final VexRVMOp VPSHUFB = new VexRVMOp("VPSHUFB", P_66, M_0F38, WIG, 0x00, VEXOpAssertion.AVX1_2); 1382 public static final VexRVMOp VCVTSD2SS = new VexRVMOp("VCVTSD2SS", P_F2, M_0F, WIG, 0x5A); 1383 public static final VexRVMOp VCVTSS2SD = new VexRVMOp("VCVTSS2SD", P_F3, M_0F, WIG, 0x5A); 1384 public static final VexRVMOp VCVTSI2SD = new VexRVMOp("VCVTSI2SD", P_F2, M_0F, W0, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1385 public static final VexRVMOp VCVTSQ2SD = new VexRVMOp("VCVTSQ2SD", P_F2, M_0F, W1, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1386 public static final VexRVMOp VCVTSI2SS = new VexRVMOp("VCVTSI2SS", P_F3, M_0F, W0, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1387 public static final VexRVMOp VCVTSQ2SS = new VexRVMOp("VCVTSQ2SS", P_F3, M_0F, W1, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1388 public static final VexRVMOp VPCMPEQB = new VexRVMOp("VPCMPEQB", P_66, M_0F, WIG, 0x74, VEXOpAssertion.AVX1_2); 1389 public static final VexRVMOp VPCMPEQW = new VexRVMOp("VPCMPEQW", P_66, M_0F, WIG, 0x75, VEXOpAssertion.AVX1_2); 1390 public static final VexRVMOp VPCMPEQD = new VexRVMOp("VPCMPEQD", P_66, M_0F, WIG, 0x76, VEXOpAssertion.AVX1_2); 1391 public static final VexRVMOp VPCMPEQQ = new VexRVMOp("VPCMPEQQ", P_66, M_0F38, WIG, 0x29, VEXOpAssertion.AVX1_2); 1392 public static final VexRVMOp VPCMPGTB = new VexRVMOp("VPCMPGTB", P_66, M_0F, WIG, 0x64, VEXOpAssertion.AVX1_2); 1393 public static final VexRVMOp VPCMPGTW = new VexRVMOp("VPCMPGTW", P_66, M_0F, WIG, 0x65, VEXOpAssertion.AVX1_2); 1394 public static final VexRVMOp VPCMPGTD = new VexRVMOp("VPCMPGTD", P_66, M_0F, WIG, 0x66, VEXOpAssertion.AVX1_2); 1395 public static final VexRVMOp VPCMPGTQ = new VexRVMOp("VPCMPGTQ", P_66, M_0F38, WIG, 0x37, VEXOpAssertion.AVX1_2); 1396 public static final VexRVMOp VFMADD231SS = new VexRVMOp("VFMADD231SS", P_66, M_0F38, W0, 0xB9, VEXOpAssertion.FMA); 1397 public static final VexRVMOp VFMADD231SD = new VexRVMOp("VFMADD231SD", P_66, M_0F38, W1, 0xB9, VEXOpAssertion.FMA); 1398 // @formatter:on 1399 1400 private VexRVMOp(String opcode, int pp, int mmmmm, int w, int op) { 1401 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1402 } 1403 1404 protected VexRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1405 super(opcode, pp, mmmmm, w, op, assertion); 1406 } 1407 1408 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1409 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1410 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false); 1411 asm.emitByte(op); 1412 asm.emitModRM(dst, src2); 1413 } 1414 1415 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { 1416 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1417 boolean useEvex = asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false); 1418 asm.emitByte(op); 1419 asm.emitOperandHelper(dst, src2, 0, getDisp8Scale(useEvex, size)); 1420 } 1421 } 1422 1423 public static final class VexGeneralPurposeRVMOp extends VexRVMOp { 1424 // @formatter:off 1425 public static final VexGeneralPurposeRVMOp ANDN = new VexGeneralPurposeRVMOp("ANDN", P_, M_0F38, WIG, 0xF2, VEXOpAssertion.BMI1); 1426 public static final VexGeneralPurposeRVMOp MULX = new VexGeneralPurposeRVMOp("MULX", P_F2, M_0F38, WIG, 0xF6, VEXOpAssertion.BMI2); 1427 public static final VexGeneralPurposeRVMOp PDEP = new VexGeneralPurposeRVMOp("PDEP", P_F2, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1428 public static final VexGeneralPurposeRVMOp PEXT = new VexGeneralPurposeRVMOp("PEXT", P_F3, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1429 // @formatter:on 1430 1431 private VexGeneralPurposeRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1432 super(opcode, pp, mmmmm, w, op, assertion); 1433 } 1434 1435 @Override 1436 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1437 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, src2, null); 1438 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1439 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false); 1440 asm.emitByte(op); 1441 asm.emitModRM(dst, src2); 1442 } 1443 1444 @Override 1445 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { 1446 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, null, null); 1447 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1448 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false); 1449 asm.emitByte(op); 1450 asm.emitOperandHelper(dst, src2, 0); 1451 } 1452 } 1453 1454 public static final class VexGeneralPurposeRMVOp extends VexOp { 1455 // @formatter:off 1456 public static final VexGeneralPurposeRMVOp BEXTR = new VexGeneralPurposeRMVOp("BEXTR", P_, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI1); 1457 public static final VexGeneralPurposeRMVOp BZHI = new VexGeneralPurposeRMVOp("BZHI", P_, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1458 public static final VexGeneralPurposeRMVOp SARX = new VexGeneralPurposeRMVOp("SARX", P_F3, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1459 public static final VexGeneralPurposeRMVOp SHRX = new VexGeneralPurposeRMVOp("SHRX", P_F2, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1460 public static final VexGeneralPurposeRMVOp SHLX = new VexGeneralPurposeRMVOp("SHLX", P_66, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1461 // @formatter:on 1462 1463 private VexGeneralPurposeRMVOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1464 super(opcode, pp, mmmmm, w, op, assertion); 1465 } 1466 1467 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1468 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, src1, null); 1469 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1470 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false); 1471 asm.emitByte(op); 1472 asm.emitModRM(dst, src1); 1473 } 1474 1475 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src1, Register src2) { 1476 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, null, null); 1477 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1478 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false); 1479 asm.emitByte(op); 1480 asm.emitOperandHelper(dst, src1, 0); 1481 } 1482 } 1483 1484 public static final class VexGeneralPurposeRMOp extends VexRMOp { 1485 // @formatter:off 1486 public static final VexGeneralPurposeRMOp BLSI = new VexGeneralPurposeRMOp("BLSI", P_, M_0F38, WIG, 0xF3, 3, VEXOpAssertion.BMI1); 1487 public static final VexGeneralPurposeRMOp BLSMSK = new VexGeneralPurposeRMOp("BLSMSK", P_, M_0F38, WIG, 0xF3, 2, VEXOpAssertion.BMI1); 1488 public static final VexGeneralPurposeRMOp BLSR = new VexGeneralPurposeRMOp("BLSR", P_, M_0F38, WIG, 0xF3, 1, VEXOpAssertion.BMI1); 1489 // @formatter:on 1490 private final int ext; 1491 1492 private VexGeneralPurposeRMOp(String opcode, int pp, int mmmmm, int w, int op, int ext, VEXOpAssertion assertion) { 1493 super(opcode, pp, mmmmm, w, op, assertion); 1494 this.ext = ext; 1495 } 1496 1497 @Override 1498 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1499 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1500 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false); 1501 asm.emitByte(op); 1502 asm.emitModRM(ext, src); 1503 } 1504 1505 @Override 1506 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) { 1507 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1508 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, wEvex, false); 1509 asm.emitByte(op); 1510 asm.emitOperandHelper(ext, src, 0); 1511 } 1512 } 1513 1514 /** 1515 * VEX-encoded shift instructions with an operand order of either RVM or VMI. 1516 */ 1517 public static final class VexShiftOp extends VexRVMOp implements VexRRIOp { 1518 // @formatter:off 1519 public static final VexShiftOp VPSRLW = new VexShiftOp("VPSRLW", P_66, M_0F, WIG, 0xD1, 0x71, 2); 1520 public static final VexShiftOp VPSRLD = new VexShiftOp("VPSRLD", P_66, M_0F, WIG, 0xD2, 0x72, 2); 1521 public static final VexShiftOp VPSRLQ = new VexShiftOp("VPSRLQ", P_66, M_0F, WIG, 0xD3, 0x73, 2); 1522 public static final VexShiftOp VPSRAW = new VexShiftOp("VPSRAW", P_66, M_0F, WIG, 0xE1, 0x71, 4); 1523 public static final VexShiftOp VPSRAD = new VexShiftOp("VPSRAD", P_66, M_0F, WIG, 0xE2, 0x72, 4); 1524 public static final VexShiftOp VPSLLW = new VexShiftOp("VPSLLW", P_66, M_0F, WIG, 0xF1, 0x71, 6); 1525 public static final VexShiftOp VPSLLD = new VexShiftOp("VPSLLD", P_66, M_0F, WIG, 0xF2, 0x72, 6); 1526 public static final VexShiftOp VPSLLQ = new VexShiftOp("VPSLLQ", P_66, M_0F, WIG, 0xF3, 0x73, 6); 1527 // @formatter:on 1528 1529 private final int immOp; 1530 private final int r; 1531 1532 private VexShiftOp(String opcode, int pp, int mmmmm, int w, int op, int immOp, int r) { 1533 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1_2); 1534 this.immOp = immOp; 1535 this.r = r; 1536 } 1537 1538 @Override 1539 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1540 assert assertion.check((AMD64) asm.target.arch, size, null, dst, src); 1541 asm.vexPrefix(null, dst, src, size, pp, mmmmm, w, wEvex, false); 1542 asm.emitByte(immOp); 1543 asm.emitModRM(r, src); 1544 asm.emitByte(imm8); 1545 } 1546 } 1547 1548 public static final class VexMaskMoveOp extends VexOp { 1549 // @formatter:off 1550 public static final VexMaskMoveOp VMASKMOVPS = new VexMaskMoveOp("VMASKMOVPS", P_66, M_0F38, W0, 0x2C, 0x2E); 1551 public static final VexMaskMoveOp VMASKMOVPD = new VexMaskMoveOp("VMASKMOVPD", P_66, M_0F38, W0, 0x2D, 0x2F); 1552 public static final VexMaskMoveOp VPMASKMOVD = new VexMaskMoveOp("VPMASKMOVD", P_66, M_0F38, W0, 0x8C, 0x8E, VEXOpAssertion.AVX2); 1553 public static final VexMaskMoveOp VPMASKMOVQ = new VexMaskMoveOp("VPMASKMOVQ", P_66, M_0F38, W1, 0x8C, 0x8E, VEXOpAssertion.AVX2); 1554 // @formatter:on 1555 1556 private final int opReverse; 1557 1558 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) { 1559 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1); 1560 } 1561 1562 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) { 1563 super(opcode, pp, mmmmm, w, op, assertion); 1564 this.opReverse = opReverse; 1565 } 1566 1567 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, AMD64Address src) { 1568 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, null); 1569 asm.vexPrefix(dst, mask, src, size, pp, mmmmm, w, wEvex, false); 1570 asm.emitByte(op); 1571 asm.emitOperandHelper(dst, src, 0); 1572 } 1573 1574 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register mask, Register src) { 1575 assert assertion.check((AMD64) asm.target.arch, size, src, mask, null); 1576 boolean useEvex = asm.vexPrefix(src, mask, dst, size, pp, mmmmm, w, wEvex, false); 1577 asm.emitByte(opReverse); 1578 asm.emitOperandHelper(src, dst, 0, getDisp8Scale(useEvex, size)); 1579 } 1580 } 1581 1582 /** 1583 * VEX-encoded instructions with an operand order of RVMI. 1584 */ 1585 public static final class VexRVMIOp extends VexOp { 1586 // @formatter:off 1587 public static final VexRVMIOp VSHUFPS = new VexRVMIOp("VSHUFPS", P_, M_0F, WIG, 0xC6); 1588 public static final VexRVMIOp VSHUFPD = new VexRVMIOp("VSHUFPD", P_66, M_0F, WIG, 0xC6); 1589 public static final VexRVMIOp VINSERTF128 = new VexRVMIOp("VINSERTF128", P_66, M_0F3A, W0, 0x18, VEXOpAssertion.AVX1_256ONLY); 1590 public static final VexRVMIOp VINSERTI128 = new VexRVMIOp("VINSERTI128", P_66, M_0F3A, W0, 0x38, VEXOpAssertion.AVX2_256ONLY); 1591 // @formatter:on 1592 1593 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op) { 1594 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1595 } 1596 1597 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1598 super(opcode, pp, mmmmm, w, op, assertion); 1599 } 1600 1601 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, int imm8) { 1602 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1603 assert (imm8 & 0xFF) == imm8; 1604 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false); 1605 asm.emitByte(op); 1606 asm.emitModRM(dst, src2); 1607 asm.emitByte(imm8); 1608 } 1609 1610 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, int imm8) { 1611 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1612 assert (imm8 & 0xFF) == imm8; 1613 boolean useEvex = asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false); 1614 asm.emitByte(op); 1615 asm.emitOperandHelper(dst, src2, 1, getDisp8Scale(useEvex, size)); 1616 asm.emitByte(imm8); 1617 } 1618 } 1619 1620 /** 1621 * VEX-encoded comparison operation with an operand order of RVMI. The immediate operand is a 1622 * comparison operator. 1623 */ 1624 public static final class VexFloatCompareOp extends VexOp { 1625 // @formatter:off 1626 public static final VexFloatCompareOp VCMPPS = new VexFloatCompareOp("VCMPPS", P_, M_0F, WIG, 0xC2); 1627 public static final VexFloatCompareOp VCMPPD = new VexFloatCompareOp("VCMPPD", P_66, M_0F, WIG, 0xC2); 1628 public static final VexFloatCompareOp VCMPSS = new VexFloatCompareOp("VCMPSS", P_F2, M_0F, WIG, 0xC2); 1629 public static final VexFloatCompareOp VCMPSD = new VexFloatCompareOp("VCMPSD", P_F2, M_0F, WIG, 0xC2); 1630 // @formatter:on 1631 1632 public enum Predicate { 1633 EQ_OQ(0x00), 1634 LT_OS(0x01), 1635 LE_OS(0x02), 1636 UNORD_Q(0x03), 1637 NEQ_UQ(0x04), 1638 NLT_US(0x05), 1639 NLE_US(0x06), 1640 ORD_Q(0x07), 1641 EQ_UQ(0x08), 1642 NGE_US(0x09), 1643 NGT_US(0x0a), 1644 FALSE_OQ(0x0b), 1645 NEQ_OQ(0x0c), 1646 GE_OS(0x0d), 1647 GT_OS(0x0e), 1648 TRUE_UQ(0x0f), 1649 EQ_OS(0x10), 1650 LT_OQ(0x11), 1651 LE_OQ(0x12), 1652 UNORD_S(0x13), 1653 NEQ_US(0x14), 1654 NLT_UQ(0x15), 1655 NLE_UQ(0x16), 1656 ORD_S(0x17), 1657 EQ_US(0x18), 1658 NGE_UQ(0x19), 1659 NGT_UQ(0x1a), 1660 FALSE_OS(0x1b), 1661 NEQ_OS(0x1c), 1662 GE_OQ(0x1d), 1663 GT_OQ(0x1e), 1664 TRUE_US(0x1f); 1665 1666 private int imm8; 1667 1668 Predicate(int imm8) { 1669 this.imm8 = imm8; 1670 } 1671 1672 public static Predicate getPredicate(Condition condition, boolean unorderedIsTrue) { 1673 if (unorderedIsTrue) { 1674 switch (condition) { 1675 case EQ: 1676 return EQ_UQ; 1677 case NE: 1678 return NEQ_UQ; 1679 case LT: 1680 return NGE_UQ; 1681 case LE: 1682 return NGT_UQ; 1683 case GT: 1684 return NLE_UQ; 1685 case GE: 1686 return NLT_UQ; 1687 default: 1688 throw GraalError.shouldNotReachHere(); 1689 } 1690 } else { 1691 switch (condition) { 1692 case EQ: 1693 return EQ_OQ; 1694 case NE: 1695 return NEQ_OQ; 1696 case LT: 1697 return LT_OQ; 1698 case LE: 1699 return LE_OQ; 1700 case GT: 1701 return GT_OQ; 1702 case GE: 1703 return GE_OQ; 1704 default: 1705 throw GraalError.shouldNotReachHere(); 1706 } 1707 } 1708 } 1709 } 1710 1711 private VexFloatCompareOp(String opcode, int pp, int mmmmm, int w, int op) { 1712 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1713 } 1714 1715 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, Predicate p) { 1716 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1717 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false); 1718 asm.emitByte(op); 1719 asm.emitModRM(dst, src2); 1720 asm.emitByte(p.imm8); 1721 } 1722 1723 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, Predicate p) { 1724 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1725 boolean useEvex = asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, wEvex, false); 1726 asm.emitByte(op); 1727 asm.emitOperandHelper(dst, src2, 1, getDisp8Scale(useEvex, size)); 1728 asm.emitByte(p.imm8); 1729 } 1730 } 1731 1732 public final void addl(AMD64Address dst, int imm32) { 1733 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1734 } 1735 1736 public final void addl(Register dst, int imm32) { 1737 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1738 } 1739 1740 public final void addl(Register dst, Register src) { 1741 ADD.rmOp.emit(this, DWORD, dst, src); 1742 } 1743 1744 public final void addpd(Register dst, Register src) { 1745 SSEOp.ADD.emit(this, PD, dst, src); 1746 } 1747 1748 public final void addpd(Register dst, AMD64Address src) { 1749 SSEOp.ADD.emit(this, PD, dst, src); 1750 } 1751 1752 public final void addsd(Register dst, Register src) { 1753 SSEOp.ADD.emit(this, SD, dst, src); 1754 } 1755 1756 public final void addsd(Register dst, AMD64Address src) { 1757 SSEOp.ADD.emit(this, SD, dst, src); 1758 } 1759 1760 private void addrNop4() { 1761 // 4 bytes: NOP DWORD PTR [EAX+0] 1762 emitByte(0x0F); 1763 emitByte(0x1F); 1764 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc); 1765 emitByte(0); // 8-bits offset (1 byte) 1766 } 1767 1768 private void addrNop5() { 1769 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 1770 emitByte(0x0F); 1771 emitByte(0x1F); 1772 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4); 1773 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1774 emitByte(0); // 8-bits offset (1 byte) 1775 } 1776 1777 private void addrNop7() { 1778 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 1779 emitByte(0x0F); 1780 emitByte(0x1F); 1781 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc); 1782 emitInt(0); // 32-bits offset (4 bytes) 1783 } 1784 1785 private void addrNop8() { 1786 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 1787 emitByte(0x0F); 1788 emitByte(0x1F); 1789 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4); 1790 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1791 emitInt(0); // 32-bits offset (4 bytes) 1792 } 1793 1794 public final void andl(Register dst, int imm32) { 1795 AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1796 } 1797 1798 public final void andl(Register dst, Register src) { 1799 AND.rmOp.emit(this, DWORD, dst, src); 1800 } 1801 1802 public final void andpd(Register dst, Register src) { 1803 SSEOp.AND.emit(this, PD, dst, src); 1804 } 1805 1806 public final void andpd(Register dst, AMD64Address src) { 1807 SSEOp.AND.emit(this, PD, dst, src); 1808 } 1809 1810 public final void bsfq(Register dst, Register src) { 1811 prefixq(dst, src); 1812 emitByte(0x0F); 1813 emitByte(0xBC); 1814 emitModRM(dst, src); 1815 } 1816 1817 public final void bsrl(Register dst, Register src) { 1818 prefix(dst, src); 1819 emitByte(0x0F); 1820 emitByte(0xBD); 1821 emitModRM(dst, src); 1822 } 1823 1824 public final void bswapl(Register reg) { 1825 prefix(reg); 1826 emitByte(0x0F); 1827 emitModRM(1, reg); 1828 } 1829 1830 public final void cdql() { 1831 emitByte(0x99); 1832 } 1833 1834 public final void cmovl(ConditionFlag cc, Register dst, Register src) { 1835 prefix(dst, src); 1836 emitByte(0x0F); 1837 emitByte(0x40 | cc.getValue()); 1838 emitModRM(dst, src); 1839 } 1840 1841 public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) { 1842 prefix(src, dst); 1843 emitByte(0x0F); 1844 emitByte(0x40 | cc.getValue()); 1845 emitOperandHelper(dst, src, 0); 1846 } 1847 1848 public final void cmpb(Register dst, Register src) { 1849 CMP.byteRmOp.emit(this, BYTE, dst, src); 1850 } 1851 1852 public final void cmpw(Register dst, Register src) { 1853 CMP.rmOp.emit(this, WORD, dst, src); 1854 } 1855 1856 public final void cmpl(Register dst, int imm32) { 1857 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1858 } 1859 1860 public final void cmpl(Register dst, Register src) { 1861 CMP.rmOp.emit(this, DWORD, dst, src); 1862 } 1863 1864 public final void cmpl(Register dst, AMD64Address src) { 1865 CMP.rmOp.emit(this, DWORD, dst, src); 1866 } 1867 1868 public final void cmpl(AMD64Address dst, int imm32) { 1869 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1870 } 1871 1872 /** 1873 * The 8-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg into 1874 * adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the compared 1875 * values were equal, and cleared otherwise. 1876 */ 1877 public final void cmpxchgb(Register reg, AMD64Address adr) { // cmpxchg 1878 prefixb(adr, reg); 1879 emitByte(0x0F); 1880 emitByte(0xB0); 1881 emitOperandHelper(reg, adr, 0); 1882 } 1883 1884 /** 1885 * The 16-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg 1886 * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the 1887 * compared values were equal, and cleared otherwise. 1888 */ 1889 public final void cmpxchgw(Register reg, AMD64Address adr) { // cmpxchg 1890 emitByte(0x66); // Switch to 16-bit mode. 1891 prefix(adr, reg); 1892 emitByte(0x0F); 1893 emitByte(0xB1); 1894 emitOperandHelper(reg, adr, 0); 1895 } 1896 1897 /** 1898 * The 32-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg 1899 * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the 1900 * compared values were equal, and cleared otherwise. 1901 */ 1902 public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg 1903 prefix(adr, reg); 1904 emitByte(0x0F); 1905 emitByte(0xB1); 1906 emitOperandHelper(reg, adr, 0); 1907 } 1908 1909 public final void cvtsi2sdl(Register dst, Register src) { 1910 SSEOp.CVTSI2SD.emit(this, DWORD, dst, src); 1911 } 1912 1913 public final void cvttsd2sil(Register dst, Register src) { 1914 SSEOp.CVTTSD2SI.emit(this, DWORD, dst, src); 1915 } 1916 1917 public final void decl(AMD64Address dst) { 1918 prefix(dst); 1919 emitByte(0xFF); 1920 emitOperandHelper(1, dst, 0); 1921 } 1922 1923 public final void divsd(Register dst, Register src) { 1924 SSEOp.DIV.emit(this, SD, dst, src); 1925 } 1926 1927 public final void hlt() { 1928 emitByte(0xF4); 1929 } 1930 1931 public final void imull(Register dst, Register src, int value) { 1932 if (isByte(value)) { 1933 AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value); 1934 } else { 1935 AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value); 1936 } 1937 } 1938 1939 public final void incl(AMD64Address dst) { 1940 prefix(dst); 1941 emitByte(0xFF); 1942 emitOperandHelper(0, dst, 0); 1943 } 1944 1945 public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { 1946 int shortSize = 2; 1947 int longSize = 6; 1948 long disp = jumpTarget - position(); 1949 if (!forceDisp32 && isByte(disp - shortSize)) { 1950 // 0111 tttn #8-bit disp 1951 emitByte(0x70 | cc.getValue()); 1952 emitByte((int) ((disp - shortSize) & 0xFF)); 1953 } else { 1954 // 0000 1111 1000 tttn #32-bit disp 1955 assert isInt(disp - longSize) : "must be 32bit offset (call4)"; 1956 emitByte(0x0F); 1957 emitByte(0x80 | cc.getValue()); 1958 emitInt((int) (disp - longSize)); 1959 } 1960 } 1961 1962 public final void jcc(ConditionFlag cc, Label l) { 1963 assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc"; 1964 if (l.isBound()) { 1965 jcc(cc, l.position(), false); 1966 } else { 1967 // Note: could eliminate cond. jumps to this jump if condition 1968 // is the same however, seems to be rather unlikely case. 1969 // Note: use jccb() if label to be bound is very close to get 1970 // an 8-bit displacement 1971 l.addPatchAt(position(), this); 1972 emitByte(0x0F); 1973 emitByte(0x80 | cc.getValue()); 1974 emitInt(0); 1975 } 1976 1977 } 1978 1979 public final void jccb(ConditionFlag cc, Label l) { 1980 if (l.isBound()) { 1981 int shortSize = 2; 1982 int entry = l.position(); 1983 assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp"; 1984 long disp = entry - position(); 1985 // 0111 tttn #8-bit disp 1986 emitByte(0x70 | cc.getValue()); 1987 emitByte((int) ((disp - shortSize) & 0xFF)); 1988 } else { 1989 l.addPatchAt(position(), this); 1990 emitByte(0x70 | cc.getValue()); 1991 emitByte(0); 1992 } 1993 } 1994 1995 public final void jmp(int jumpTarget, boolean forceDisp32) { 1996 int shortSize = 2; 1997 int longSize = 5; 1998 long disp = jumpTarget - position(); 1999 if (!forceDisp32 && isByte(disp - shortSize)) { 2000 emitByte(0xEB); 2001 emitByte((int) ((disp - shortSize) & 0xFF)); 2002 } else { 2003 emitByte(0xE9); 2004 emitInt((int) (disp - longSize)); 2005 } 2006 } 2007 2008 @Override 2009 public final void jmp(Label l) { 2010 if (l.isBound()) { 2011 jmp(l.position(), false); 2012 } else { 2013 // By default, forward jumps are always 32-bit displacements, since 2014 // we can't yet know where the label will be bound. If you're sure that 2015 // the forward jump will not run beyond 256 bytes, use jmpb to 2016 // force an 8-bit displacement. 2017 2018 l.addPatchAt(position(), this); 2019 emitByte(0xE9); 2020 emitInt(0); 2021 } 2022 } 2023 2024 public final void jmp(Register entry) { 2025 prefix(entry); 2026 emitByte(0xFF); 2027 emitModRM(4, entry); 2028 } 2029 2030 public final void jmp(AMD64Address adr) { 2031 prefix(adr); 2032 emitByte(0xFF); 2033 emitOperandHelper(AMD64.rsp, adr, 0); 2034 } 2035 2036 public final void jmpb(Label l) { 2037 if (l.isBound()) { 2038 int shortSize = 2; 2039 // Displacement is relative to byte just after jmpb instruction 2040 int displacement = l.position() - position() - shortSize; 2041 GraalError.guarantee(isByte(displacement), "Displacement too large to be encoded as a byte: %d", displacement); 2042 emitByte(0xEB); 2043 emitByte(displacement & 0xFF); 2044 } else { 2045 l.addPatchAt(position(), this); 2046 emitByte(0xEB); 2047 emitByte(0); 2048 } 2049 } 2050 2051 public final void lead(Register dst, AMD64Address src) { 2052 prefix(src, dst); 2053 emitByte(0x8D); 2054 emitOperandHelper(dst, src, 0); 2055 } 2056 2057 public final void leaq(Register dst, AMD64Address src) { 2058 prefixq(src, dst); 2059 emitByte(0x8D); 2060 emitOperandHelper(dst, src, 0); 2061 } 2062 2063 public final void leave() { 2064 emitByte(0xC9); 2065 } 2066 2067 public final void lock() { 2068 emitByte(0xF0); 2069 } 2070 2071 public final void movapd(Register dst, Register src) { 2072 assert inRC(XMM, dst) && inRC(XMM, src); 2073 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2074 emitByte(0x28); 2075 emitModRM(dst, src); 2076 } 2077 2078 public final void movaps(Register dst, Register src) { 2079 assert inRC(XMM, dst) && inRC(XMM, src); 2080 simdPrefix(dst, Register.None, src, PS, P_0F, false); 2081 emitByte(0x28); 2082 emitModRM(dst, src); 2083 } 2084 2085 public final void movb(AMD64Address dst, int imm8) { 2086 prefix(dst); 2087 emitByte(0xC6); 2088 emitOperandHelper(0, dst, 1); 2089 emitByte(imm8); 2090 } 2091 2092 public final void movb(AMD64Address dst, Register src) { 2093 assert inRC(CPU, src) : "must have byte register"; 2094 prefixb(dst, src); 2095 emitByte(0x88); 2096 emitOperandHelper(src, dst, 0); 2097 } 2098 2099 public final void movl(Register dst, int imm32) { 2100 movl(dst, imm32, false); 2101 } 2102 2103 public final void movl(Register dst, int imm32, boolean annotateImm) { 2104 int insnPos = position(); 2105 prefix(dst); 2106 emitByte(0xB8 + encode(dst)); 2107 int immPos = position(); 2108 emitInt(imm32); 2109 int nextInsnPos = position(); 2110 if (annotateImm && codePatchingAnnotationConsumer != null) { 2111 codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 2112 } 2113 } 2114 2115 public final void movl(Register dst, Register src) { 2116 prefix(dst, src); 2117 emitByte(0x8B); 2118 emitModRM(dst, src); 2119 } 2120 2121 public final void movl(Register dst, AMD64Address src) { 2122 prefix(src, dst); 2123 emitByte(0x8B); 2124 emitOperandHelper(dst, src, 0); 2125 } 2126 2127 /** 2128 * @param wide use 4 byte encoding for displacements that would normally fit in a byte 2129 */ 2130 public final void movl(Register dst, AMD64Address src, boolean wide) { 2131 prefix(src, dst); 2132 emitByte(0x8B); 2133 emitOperandHelper(dst, src, wide, 0); 2134 } 2135 2136 public final void movl(AMD64Address dst, int imm32) { 2137 prefix(dst); 2138 emitByte(0xC7); 2139 emitOperandHelper(0, dst, 4); 2140 emitInt(imm32); 2141 } 2142 2143 public final void movl(AMD64Address dst, Register src) { 2144 prefix(dst, src); 2145 emitByte(0x89); 2146 emitOperandHelper(src, dst, 0); 2147 } 2148 2149 /** 2150 * New CPUs require use of movsd and movss to avoid partial register stall when loading from 2151 * memory. But for old Opteron use movlpd instead of movsd. The selection is done in 2152 * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and 2153 * {@link AMD64MacroAssembler#movflt(Register, Register)}. 2154 */ 2155 public final void movlpd(Register dst, AMD64Address src) { 2156 assert inRC(XMM, dst); 2157 simdPrefix(dst, dst, src, PD, P_0F, false); 2158 emitByte(0x12); 2159 emitOperandHelper(dst, src, 0); 2160 } 2161 2162 public final void movlhps(Register dst, Register src) { 2163 assert inRC(XMM, dst) && inRC(XMM, src); 2164 simdPrefix(dst, src, src, PS, P_0F, false); 2165 emitByte(0x16); 2166 emitModRM(dst, src); 2167 } 2168 2169 public final void movq(Register dst, AMD64Address src) { 2170 movq(dst, src, false); 2171 } 2172 2173 public final void movq(Register dst, AMD64Address src, boolean force4BytesDisplacement) { 2174 if (inRC(XMM, dst)) { 2175 // Insn: MOVQ xmm, r/m64 2176 // Code: F3 0F 7E /r 2177 // An alternative instruction would be 66 REX.W 0F 6E /r. We prefer the REX.W free 2178 // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction 2179 // when applicable. 2180 simdPrefix(dst, Register.None, src, SS, P_0F, false); 2181 emitByte(0x7E); 2182 emitOperandHelper(dst, src, force4BytesDisplacement, 0); 2183 } else { 2184 // gpr version of movq 2185 prefixq(src, dst); 2186 emitByte(0x8B); 2187 emitOperandHelper(dst, src, force4BytesDisplacement, 0); 2188 } 2189 } 2190 2191 public final void movq(Register dst, Register src) { 2192 assert inRC(CPU, dst) && inRC(CPU, src); 2193 prefixq(dst, src); 2194 emitByte(0x8B); 2195 emitModRM(dst, src); 2196 } 2197 2198 public final void movq(AMD64Address dst, Register src) { 2199 if (inRC(XMM, src)) { 2200 // Insn: MOVQ r/m64, xmm 2201 // Code: 66 0F D6 /r 2202 // An alternative instruction would be 66 REX.W 0F 7E /r. We prefer the REX.W free 2203 // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction 2204 // when applicable. 2205 simdPrefix(src, Register.None, dst, PD, P_0F, false); 2206 emitByte(0xD6); 2207 emitOperandHelper(src, dst, 0); 2208 } else { 2209 // gpr version of movq 2210 prefixq(dst, src); 2211 emitByte(0x89); 2212 emitOperandHelper(src, dst, 0); 2213 } 2214 } 2215 2216 public final void movsbl(Register dst, AMD64Address src) { 2217 prefix(src, dst); 2218 emitByte(0x0F); 2219 emitByte(0xBE); 2220 emitOperandHelper(dst, src, 0); 2221 } 2222 2223 public final void movsbl(Register dst, Register src) { 2224 prefix(dst, false, src, true); 2225 emitByte(0x0F); 2226 emitByte(0xBE); 2227 emitModRM(dst, src); 2228 } 2229 2230 public final void movsbq(Register dst, AMD64Address src) { 2231 prefixq(src, dst); 2232 emitByte(0x0F); 2233 emitByte(0xBE); 2234 emitOperandHelper(dst, src, 0); 2235 } 2236 2237 public final void movsbq(Register dst, Register src) { 2238 prefixq(dst, src); 2239 emitByte(0x0F); 2240 emitByte(0xBE); 2241 emitModRM(dst, src); 2242 } 2243 2244 public final void movsd(Register dst, Register src) { 2245 AMD64RMOp.MOVSD.emit(this, SD, dst, src); 2246 } 2247 2248 public final void movsd(Register dst, AMD64Address src) { 2249 AMD64RMOp.MOVSD.emit(this, SD, dst, src); 2250 } 2251 2252 public final void movsd(AMD64Address dst, Register src) { 2253 AMD64MROp.MOVSD.emit(this, SD, dst, src); 2254 } 2255 2256 public final void movss(Register dst, Register src) { 2257 AMD64RMOp.MOVSS.emit(this, SS, dst, src); 2258 } 2259 2260 public final void movss(Register dst, AMD64Address src) { 2261 AMD64RMOp.MOVSS.emit(this, SS, dst, src); 2262 } 2263 2264 public final void movss(AMD64Address dst, Register src) { 2265 AMD64MROp.MOVSS.emit(this, SS, dst, src); 2266 } 2267 2268 public final void mulpd(Register dst, Register src) { 2269 SSEOp.MUL.emit(this, PD, dst, src); 2270 } 2271 2272 public final void mulpd(Register dst, AMD64Address src) { 2273 SSEOp.MUL.emit(this, PD, dst, src); 2274 } 2275 2276 public final void mulsd(Register dst, Register src) { 2277 SSEOp.MUL.emit(this, SD, dst, src); 2278 } 2279 2280 public final void mulsd(Register dst, AMD64Address src) { 2281 SSEOp.MUL.emit(this, SD, dst, src); 2282 } 2283 2284 public final void mulss(Register dst, Register src) { 2285 SSEOp.MUL.emit(this, SS, dst, src); 2286 } 2287 2288 public final void movswl(Register dst, AMD64Address src) { 2289 AMD64RMOp.MOVSX.emit(this, DWORD, dst, src); 2290 } 2291 2292 public final void movswq(Register dst, AMD64Address src) { 2293 AMD64RMOp.MOVSX.emit(this, QWORD, dst, src); 2294 } 2295 2296 public final void movw(AMD64Address dst, int imm16) { 2297 emitByte(0x66); // switch to 16-bit mode 2298 prefix(dst); 2299 emitByte(0xC7); 2300 emitOperandHelper(0, dst, 2); 2301 emitShort(imm16); 2302 } 2303 2304 public final void movw(AMD64Address dst, Register src) { 2305 emitByte(0x66); 2306 prefix(dst, src); 2307 emitByte(0x89); 2308 emitOperandHelper(src, dst, 0); 2309 } 2310 2311 public final void movw(Register dst, AMD64Address src) { 2312 emitByte(0x66); 2313 prefix(src, dst); 2314 emitByte(0x8B); 2315 emitOperandHelper(dst, src, 0); 2316 } 2317 2318 public final void movzbl(Register dst, AMD64Address src) { 2319 prefix(src, dst); 2320 emitByte(0x0F); 2321 emitByte(0xB6); 2322 emitOperandHelper(dst, src, 0); 2323 } 2324 2325 public final void movzbl(Register dst, Register src) { 2326 AMD64RMOp.MOVZXB.emit(this, DWORD, dst, src); 2327 } 2328 2329 public final void movzbq(Register dst, Register src) { 2330 AMD64RMOp.MOVZXB.emit(this, QWORD, dst, src); 2331 } 2332 2333 public final void movzbq(Register dst, AMD64Address src) { 2334 AMD64RMOp.MOVZXB.emit(this, QWORD, dst, src); 2335 } 2336 2337 public final void movzwl(Register dst, AMD64Address src) { 2338 AMD64RMOp.MOVZX.emit(this, DWORD, dst, src); 2339 } 2340 2341 public final void movzwq(Register dst, AMD64Address src) { 2342 AMD64RMOp.MOVZX.emit(this, QWORD, dst, src); 2343 } 2344 2345 public final void negl(Register dst) { 2346 NEG.emit(this, DWORD, dst); 2347 } 2348 2349 public final void notl(Register dst) { 2350 NOT.emit(this, DWORD, dst); 2351 } 2352 2353 public final void notq(Register dst) { 2354 NOT.emit(this, QWORD, dst); 2355 } 2356 2357 @Override 2358 public final void ensureUniquePC() { 2359 nop(); 2360 } 2361 2362 public final void nop() { 2363 nop(1); 2364 } 2365 2366 public void nop(int count) { 2367 int i = count; 2368 if (UseNormalNop) { 2369 assert i > 0 : " "; 2370 // The fancy nops aren't currently recognized by debuggers making it a 2371 // pain to disassemble code while debugging. If assert are on clearly 2372 // speed is not an issue so simply use the single byte traditional nop 2373 // to do alignment. 2374 2375 for (; i > 0; i--) { 2376 emitByte(0x90); 2377 } 2378 return; 2379 } 2380 2381 if (UseAddressNop) { 2382 if (UseIntelNops) { 2383 intelNops(i); 2384 } else { 2385 amdNops(i); 2386 } 2387 return; 2388 } 2389 2390 // Using nops with size prefixes "0x66 0x90". 2391 // From AMD Optimization Guide: 2392 // 1: 0x90 2393 // 2: 0x66 0x90 2394 // 3: 0x66 0x66 0x90 2395 // 4: 0x66 0x66 0x66 0x90 2396 // 5: 0x66 0x66 0x90 0x66 0x90 2397 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2398 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2399 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2400 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2401 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2402 // 2403 while (i > 12) { 2404 i -= 4; 2405 emitByte(0x66); // size prefix 2406 emitByte(0x66); 2407 emitByte(0x66); 2408 emitByte(0x90); // nop 2409 } 2410 // 1 - 12 nops 2411 if (i > 8) { 2412 if (i > 9) { 2413 i -= 1; 2414 emitByte(0x66); 2415 } 2416 i -= 3; 2417 emitByte(0x66); 2418 emitByte(0x66); 2419 emitByte(0x90); 2420 } 2421 // 1 - 8 nops 2422 if (i > 4) { 2423 if (i > 6) { 2424 i -= 1; 2425 emitByte(0x66); 2426 } 2427 i -= 3; 2428 emitByte(0x66); 2429 emitByte(0x66); 2430 emitByte(0x90); 2431 } 2432 switch (i) { 2433 case 4: 2434 emitByte(0x66); 2435 emitByte(0x66); 2436 emitByte(0x66); 2437 emitByte(0x90); 2438 break; 2439 case 3: 2440 emitByte(0x66); 2441 emitByte(0x66); 2442 emitByte(0x90); 2443 break; 2444 case 2: 2445 emitByte(0x66); 2446 emitByte(0x90); 2447 break; 2448 case 1: 2449 emitByte(0x90); 2450 break; 2451 default: 2452 assert i == 0; 2453 } 2454 } 2455 2456 private void amdNops(int count) { 2457 int i = count; 2458 // 2459 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD. 2460 // 1: 0x90 2461 // 2: 0x66 0x90 2462 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2463 // 4: 0x0F 0x1F 0x40 0x00 2464 // 5: 0x0F 0x1F 0x44 0x00 0x00 2465 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2466 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2467 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2468 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2469 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2470 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2471 2472 // The rest coding is AMD specific - use consecutive Address nops 2473 2474 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2475 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2476 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2477 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2478 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2479 // Size prefixes (0x66) are added for larger sizes 2480 2481 while (i >= 22) { 2482 i -= 11; 2483 emitByte(0x66); // size prefix 2484 emitByte(0x66); // size prefix 2485 emitByte(0x66); // size prefix 2486 addrNop8(); 2487 } 2488 // Generate first nop for size between 21-12 2489 switch (i) { 2490 case 21: 2491 i -= 11; 2492 emitByte(0x66); // size prefix 2493 emitByte(0x66); // size prefix 2494 emitByte(0x66); // size prefix 2495 addrNop8(); 2496 break; 2497 case 20: 2498 case 19: 2499 i -= 10; 2500 emitByte(0x66); // size prefix 2501 emitByte(0x66); // size prefix 2502 addrNop8(); 2503 break; 2504 case 18: 2505 case 17: 2506 i -= 9; 2507 emitByte(0x66); // size prefix 2508 addrNop8(); 2509 break; 2510 case 16: 2511 case 15: 2512 i -= 8; 2513 addrNop8(); 2514 break; 2515 case 14: 2516 case 13: 2517 i -= 7; 2518 addrNop7(); 2519 break; 2520 case 12: 2521 i -= 6; 2522 emitByte(0x66); // size prefix 2523 addrNop5(); 2524 break; 2525 default: 2526 assert i < 12; 2527 } 2528 2529 // Generate second nop for size between 11-1 2530 switch (i) { 2531 case 11: 2532 emitByte(0x66); // size prefix 2533 emitByte(0x66); // size prefix 2534 emitByte(0x66); // size prefix 2535 addrNop8(); 2536 break; 2537 case 10: 2538 emitByte(0x66); // size prefix 2539 emitByte(0x66); // size prefix 2540 addrNop8(); 2541 break; 2542 case 9: 2543 emitByte(0x66); // size prefix 2544 addrNop8(); 2545 break; 2546 case 8: 2547 addrNop8(); 2548 break; 2549 case 7: 2550 addrNop7(); 2551 break; 2552 case 6: 2553 emitByte(0x66); // size prefix 2554 addrNop5(); 2555 break; 2556 case 5: 2557 addrNop5(); 2558 break; 2559 case 4: 2560 addrNop4(); 2561 break; 2562 case 3: 2563 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2564 emitByte(0x66); // size prefix 2565 emitByte(0x66); // size prefix 2566 emitByte(0x90); // nop 2567 break; 2568 case 2: 2569 emitByte(0x66); // size prefix 2570 emitByte(0x90); // nop 2571 break; 2572 case 1: 2573 emitByte(0x90); // nop 2574 break; 2575 default: 2576 assert i == 0; 2577 } 2578 } 2579 2580 @SuppressWarnings("fallthrough") 2581 private void intelNops(int count) { 2582 // 2583 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 2584 // 1: 0x90 2585 // 2: 0x66 0x90 2586 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2587 // 4: 0x0F 0x1F 0x40 0x00 2588 // 5: 0x0F 0x1F 0x44 0x00 0x00 2589 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2590 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2591 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2592 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2593 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2594 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2595 2596 // The rest coding is Intel specific - don't use consecutive address nops 2597 2598 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2599 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2600 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2601 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2602 2603 int i = count; 2604 while (i >= 15) { 2605 // For Intel don't generate consecutive addess nops (mix with regular nops) 2606 i -= 15; 2607 emitByte(0x66); // size prefix 2608 emitByte(0x66); // size prefix 2609 emitByte(0x66); // size prefix 2610 addrNop8(); 2611 emitByte(0x66); // size prefix 2612 emitByte(0x66); // size prefix 2613 emitByte(0x66); // size prefix 2614 emitByte(0x90); 2615 // nop 2616 } 2617 switch (i) { 2618 case 14: 2619 emitByte(0x66); // size prefix 2620 // fall through 2621 case 13: 2622 emitByte(0x66); // size prefix 2623 // fall through 2624 case 12: 2625 addrNop8(); 2626 emitByte(0x66); // size prefix 2627 emitByte(0x66); // size prefix 2628 emitByte(0x66); // size prefix 2629 emitByte(0x90); 2630 // nop 2631 break; 2632 case 11: 2633 emitByte(0x66); // size prefix 2634 // fall through 2635 case 10: 2636 emitByte(0x66); // size prefix 2637 // fall through 2638 case 9: 2639 emitByte(0x66); // size prefix 2640 // fall through 2641 case 8: 2642 addrNop8(); 2643 break; 2644 case 7: 2645 addrNop7(); 2646 break; 2647 case 6: 2648 emitByte(0x66); // size prefix 2649 // fall through 2650 case 5: 2651 addrNop5(); 2652 break; 2653 case 4: 2654 addrNop4(); 2655 break; 2656 case 3: 2657 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2658 emitByte(0x66); // size prefix 2659 // fall through 2660 case 2: 2661 emitByte(0x66); // size prefix 2662 // fall through 2663 case 1: 2664 emitByte(0x90); 2665 // nop 2666 break; 2667 default: 2668 assert i == 0; 2669 } 2670 } 2671 2672 public final void orl(Register dst, Register src) { 2673 OR.rmOp.emit(this, DWORD, dst, src); 2674 } 2675 2676 public final void orl(Register dst, int imm32) { 2677 OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2678 } 2679 2680 // Insn: VPACKUSWB xmm1, xmm2, xmm3/m128 2681 // ----- 2682 // Insn: VPACKUSWB xmm1, xmm1, xmm2 2683 2684 public final void packuswb(Register dst, Register src) { 2685 assert inRC(XMM, dst) && inRC(XMM, src); 2686 // Code: VEX.NDS.128.66.0F.WIG 67 /r 2687 simdPrefix(dst, dst, src, PD, P_0F, false); 2688 emitByte(0x67); 2689 emitModRM(dst, src); 2690 } 2691 2692 public final void pop(Register dst) { 2693 prefix(dst); 2694 emitByte(0x58 + encode(dst)); 2695 } 2696 2697 public void popfq() { 2698 emitByte(0x9D); 2699 } 2700 2701 public final void ptest(Register dst, Register src) { 2702 assert supports(CPUFeature.SSE4_1); 2703 assert inRC(XMM, dst) && inRC(XMM, src); 2704 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2705 emitByte(0x17); 2706 emitModRM(dst, src); 2707 } 2708 2709 public final void pcmpeqb(Register dst, Register src) { 2710 assert supports(CPUFeature.SSE2); 2711 assert inRC(XMM, dst) && inRC(XMM, src); 2712 simdPrefix(dst, dst, src, PD, P_0F, false); 2713 emitByte(0x74); 2714 emitModRM(dst, src); 2715 } 2716 2717 public final void pcmpeqw(Register dst, Register src) { 2718 assert supports(CPUFeature.SSE2); 2719 assert inRC(XMM, dst) && inRC(XMM, src); 2720 simdPrefix(dst, dst, src, PD, P_0F, false); 2721 emitByte(0x75); 2722 emitModRM(dst, src); 2723 } 2724 2725 public final void pcmpeqd(Register dst, Register src) { 2726 assert supports(CPUFeature.SSE2); 2727 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2728 simdPrefix(dst, dst, src, PD, P_0F, false); 2729 emitByte(0x76); 2730 emitModRM(dst, src); 2731 } 2732 2733 public final void pcmpestri(Register dst, AMD64Address src, int imm8) { 2734 assert supports(CPUFeature.SSE4_2); 2735 assert inRC(XMM, dst); 2736 simdPrefix(dst, Register.None, src, PD, P_0F3A, false); 2737 emitByte(0x61); 2738 emitOperandHelper(dst, src, 0); 2739 emitByte(imm8); 2740 } 2741 2742 public final void pcmpestri(Register dst, Register src, int imm8) { 2743 assert supports(CPUFeature.SSE4_2); 2744 assert inRC(XMM, dst) && inRC(XMM, src); 2745 simdPrefix(dst, Register.None, src, PD, P_0F3A, false); 2746 emitByte(0x61); 2747 emitModRM(dst, src); 2748 emitByte(imm8); 2749 } 2750 2751 public final void pmovmskb(Register dst, Register src) { 2752 assert supports(CPUFeature.SSE2); 2753 assert inRC(CPU, dst) && inRC(XMM, src); 2754 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2755 emitByte(0xD7); 2756 emitModRM(dst, src); 2757 } 2758 2759 private void pmovSZx(Register dst, AMD64Address src, int op) { 2760 assert supports(CPUFeature.SSE4_1); 2761 assert inRC(XMM, dst); 2762 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2763 emitByte(op); 2764 emitOperandHelper(dst, src, 0); 2765 } 2766 2767 public final void pmovsxbw(Register dst, AMD64Address src) { 2768 pmovSZx(dst, src, 0x20); 2769 } 2770 2771 public final void pmovsxbd(Register dst, AMD64Address src) { 2772 pmovSZx(dst, src, 0x21); 2773 } 2774 2775 public final void pmovsxbq(Register dst, AMD64Address src) { 2776 pmovSZx(dst, src, 0x22); 2777 } 2778 2779 public final void pmovsxwd(Register dst, AMD64Address src) { 2780 pmovSZx(dst, src, 0x23); 2781 } 2782 2783 public final void pmovsxwq(Register dst, AMD64Address src) { 2784 pmovSZx(dst, src, 0x24); 2785 } 2786 2787 public final void pmovsxdq(Register dst, AMD64Address src) { 2788 pmovSZx(dst, src, 0x25); 2789 } 2790 2791 // Insn: VPMOVZXBW xmm1, xmm2/m64 2792 public final void pmovzxbw(Register dst, AMD64Address src) { 2793 pmovSZx(dst, src, 0x30); 2794 } 2795 2796 public final void pmovzxbd(Register dst, AMD64Address src) { 2797 pmovSZx(dst, src, 0x31); 2798 } 2799 2800 public final void pmovzxbq(Register dst, AMD64Address src) { 2801 pmovSZx(dst, src, 0x32); 2802 } 2803 2804 public final void pmovzxwd(Register dst, AMD64Address src) { 2805 pmovSZx(dst, src, 0x33); 2806 } 2807 2808 public final void pmovzxwq(Register dst, AMD64Address src) { 2809 pmovSZx(dst, src, 0x34); 2810 } 2811 2812 public final void pmovzxdq(Register dst, AMD64Address src) { 2813 pmovSZx(dst, src, 0x35); 2814 } 2815 2816 public final void pmovzxbw(Register dst, Register src) { 2817 assert supports(CPUFeature.SSE4_1); 2818 assert inRC(XMM, dst) && inRC(XMM, src); 2819 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2820 emitByte(0x30); 2821 emitModRM(dst, src); 2822 } 2823 2824 public final void push(Register src) { 2825 prefix(src); 2826 emitByte(0x50 + encode(src)); 2827 } 2828 2829 public void pushfq() { 2830 emitByte(0x9c); 2831 } 2832 2833 public final void paddd(Register dst, Register src) { 2834 assert inRC(XMM, dst) && inRC(XMM, src); 2835 simdPrefix(dst, dst, src, PD, P_0F, false); 2836 emitByte(0xFE); 2837 emitModRM(dst, src); 2838 } 2839 2840 public final void paddq(Register dst, Register src) { 2841 assert inRC(XMM, dst) && inRC(XMM, src); 2842 simdPrefix(dst, dst, src, PD, P_0F, false); 2843 emitByte(0xD4); 2844 emitModRM(dst, src); 2845 } 2846 2847 public final void pextrw(Register dst, Register src, int imm8) { 2848 assert inRC(CPU, dst) && inRC(XMM, src); 2849 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2850 emitByte(0xC5); 2851 emitModRM(dst, src); 2852 emitByte(imm8); 2853 } 2854 2855 public final void pinsrw(Register dst, Register src, int imm8) { 2856 assert inRC(XMM, dst) && inRC(CPU, src); 2857 simdPrefix(dst, dst, src, PD, P_0F, false); 2858 emitByte(0xC4); 2859 emitModRM(dst, src); 2860 emitByte(imm8); 2861 } 2862 2863 public final void por(Register dst, Register src) { 2864 assert inRC(XMM, dst) && inRC(XMM, src); 2865 simdPrefix(dst, dst, src, PD, P_0F, false); 2866 emitByte(0xEB); 2867 emitModRM(dst, src); 2868 } 2869 2870 public final void pand(Register dst, Register src) { 2871 assert inRC(XMM, dst) && inRC(XMM, src); 2872 simdPrefix(dst, dst, src, PD, P_0F, false); 2873 emitByte(0xDB); 2874 emitModRM(dst, src); 2875 } 2876 2877 public final void pxor(Register dst, Register src) { 2878 assert inRC(XMM, dst) && inRC(XMM, src); 2879 simdPrefix(dst, dst, src, PD, P_0F, false); 2880 emitByte(0xEF); 2881 emitModRM(dst, src); 2882 } 2883 2884 public final void pslld(Register dst, int imm8) { 2885 assert isUByte(imm8) : "invalid value"; 2886 assert inRC(XMM, dst); 2887 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 2888 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); 2889 emitByte(0x72); 2890 emitModRM(6, dst); 2891 emitByte(imm8 & 0xFF); 2892 } 2893 2894 public final void psllq(Register dst, Register shift) { 2895 assert inRC(XMM, dst) && inRC(XMM, shift); 2896 simdPrefix(dst, dst, shift, PD, P_0F, false); 2897 emitByte(0xF3); 2898 emitModRM(dst, shift); 2899 } 2900 2901 public final void psllq(Register dst, int imm8) { 2902 assert isUByte(imm8) : "invalid value"; 2903 assert inRC(XMM, dst); 2904 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 2905 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); 2906 emitByte(0x73); 2907 emitModRM(6, dst); 2908 emitByte(imm8); 2909 } 2910 2911 public final void psrad(Register dst, int imm8) { 2912 assert isUByte(imm8) : "invalid value"; 2913 assert inRC(XMM, dst); 2914 // XMM4 is for /4 encoding: 66 0F 72 /4 ib 2915 simdPrefix(AMD64.xmm4, dst, dst, PD, P_0F, false); 2916 emitByte(0x72); 2917 emitModRM(4, dst); 2918 emitByte(imm8); 2919 } 2920 2921 public final void psrld(Register dst, int imm8) { 2922 assert isUByte(imm8) : "invalid value"; 2923 assert inRC(XMM, dst); 2924 // XMM2 is for /2 encoding: 66 0F 72 /2 ib 2925 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); 2926 emitByte(0x72); 2927 emitModRM(2, dst); 2928 emitByte(imm8); 2929 } 2930 2931 public final void psrlq(Register dst, int imm8) { 2932 assert isUByte(imm8) : "invalid value"; 2933 assert inRC(XMM, dst); 2934 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 2935 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); 2936 emitByte(0x73); 2937 emitModRM(2, dst); 2938 emitByte(imm8); 2939 } 2940 2941 public final void psrldq(Register dst, int imm8) { 2942 assert isUByte(imm8) : "invalid value"; 2943 assert inRC(XMM, dst); 2944 simdPrefix(AMD64.xmm3, dst, dst, PD, P_0F, false); 2945 emitByte(0x73); 2946 emitModRM(3, dst); 2947 emitByte(imm8); 2948 } 2949 2950 public final void pshufb(Register dst, Register src) { 2951 assert supports(CPUFeature.SSSE3); 2952 assert inRC(XMM, dst) && inRC(XMM, src); 2953 simdPrefix(dst, dst, src, PD, P_0F38, false); 2954 emitByte(0x00); 2955 emitModRM(dst, src); 2956 } 2957 2958 public final void pshuflw(Register dst, Register src, int imm8) { 2959 assert supports(CPUFeature.SSE2); 2960 assert isUByte(imm8) : "invalid value"; 2961 assert inRC(XMM, dst) && inRC(XMM, src); 2962 simdPrefix(dst, Register.None, src, SD, P_0F, false); 2963 emitByte(0x70); 2964 emitModRM(dst, src); 2965 emitByte(imm8); 2966 } 2967 2968 public final void pshufd(Register dst, Register src, int imm8) { 2969 assert isUByte(imm8) : "invalid value"; 2970 assert inRC(XMM, dst) && inRC(XMM, src); 2971 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2972 emitByte(0x70); 2973 emitModRM(dst, src); 2974 emitByte(imm8); 2975 } 2976 2977 public final void psubd(Register dst, Register src) { 2978 assert inRC(XMM, dst) && inRC(XMM, src); 2979 simdPrefix(dst, dst, src, PD, P_0F, false); 2980 emitByte(0xFA); 2981 emitModRM(dst, src); 2982 } 2983 2984 public final void punpcklbw(Register dst, Register src) { 2985 assert supports(CPUFeature.SSE2); 2986 assert inRC(XMM, dst) && inRC(XMM, src); 2987 simdPrefix(dst, dst, src, PD, P_0F, false); 2988 emitByte(0x60); 2989 emitModRM(dst, src); 2990 } 2991 2992 public final void rcpps(Register dst, Register src) { 2993 assert inRC(XMM, dst) && inRC(XMM, src); 2994 simdPrefix(dst, Register.None, src, PS, P_0F, false); 2995 emitByte(0x53); 2996 emitModRM(dst, src); 2997 } 2998 2999 public final void ret(int imm16) { 3000 if (imm16 == 0) { 3001 emitByte(0xC3); 3002 } else { 3003 emitByte(0xC2); 3004 emitShort(imm16); 3005 } 3006 } 3007 3008 public final void sarl(Register dst, int imm8) { 3009 prefix(dst); 3010 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3011 if (imm8 == 1) { 3012 emitByte(0xD1); 3013 emitModRM(7, dst); 3014 } else { 3015 emitByte(0xC1); 3016 emitModRM(7, dst); 3017 emitByte(imm8); 3018 } 3019 } 3020 3021 public final void shll(Register dst, int imm8) { 3022 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3023 prefix(dst); 3024 if (imm8 == 1) { 3025 emitByte(0xD1); 3026 emitModRM(4, dst); 3027 } else { 3028 emitByte(0xC1); 3029 emitModRM(4, dst); 3030 emitByte(imm8); 3031 } 3032 } 3033 3034 public final void shll(Register dst) { 3035 // Multiply dst by 2, CL times. 3036 prefix(dst); 3037 emitByte(0xD3); 3038 emitModRM(4, dst); 3039 } 3040 3041 // Insn: SHLX r32a, r/m32, r32b 3042 3043 public final void shlxl(Register dst, Register src1, Register src2) { 3044 VexGeneralPurposeRMVOp.SHLX.emit(this, AVXSize.DWORD, dst, src1, src2); 3045 } 3046 3047 public final void shrl(Register dst, int imm8) { 3048 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3049 prefix(dst); 3050 emitByte(0xC1); 3051 emitModRM(5, dst); 3052 emitByte(imm8); 3053 } 3054 3055 public final void shrl(Register dst) { 3056 // Unsigned divide dst by 2, CL times. 3057 prefix(dst); 3058 emitByte(0xD3); 3059 emitModRM(5, dst); 3060 } 3061 3062 public final void subl(AMD64Address dst, int imm32) { 3063 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 3064 } 3065 3066 public final void subl(Register dst, int imm32) { 3067 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 3068 } 3069 3070 public final void subl(Register dst, Register src) { 3071 SUB.rmOp.emit(this, DWORD, dst, src); 3072 } 3073 3074 public final void subpd(Register dst, Register src) { 3075 SSEOp.SUB.emit(this, PD, dst, src); 3076 } 3077 3078 public final void subsd(Register dst, Register src) { 3079 SSEOp.SUB.emit(this, SD, dst, src); 3080 } 3081 3082 public final void subsd(Register dst, AMD64Address src) { 3083 SSEOp.SUB.emit(this, SD, dst, src); 3084 } 3085 3086 public final void testl(Register dst, int imm32) { 3087 // not using emitArith because test 3088 // doesn't support sign-extension of 3089 // 8bit operands 3090 if (dst.encoding == 0) { 3091 emitByte(0xA9); 3092 } else { 3093 prefix(dst); 3094 emitByte(0xF7); 3095 emitModRM(0, dst); 3096 } 3097 emitInt(imm32); 3098 } 3099 3100 public final void testl(Register dst, Register src) { 3101 prefix(dst, src); 3102 emitByte(0x85); 3103 emitModRM(dst, src); 3104 } 3105 3106 public final void testl(Register dst, AMD64Address src) { 3107 prefix(src, dst); 3108 emitByte(0x85); 3109 emitOperandHelper(dst, src, 0); 3110 } 3111 3112 public final void unpckhpd(Register dst, Register src) { 3113 assert inRC(XMM, dst) && inRC(XMM, src); 3114 simdPrefix(dst, dst, src, PD, P_0F, false); 3115 emitByte(0x15); 3116 emitModRM(dst, src); 3117 } 3118 3119 public final void unpcklpd(Register dst, Register src) { 3120 assert inRC(XMM, dst) && inRC(XMM, src); 3121 simdPrefix(dst, dst, src, PD, P_0F, false); 3122 emitByte(0x14); 3123 emitModRM(dst, src); 3124 } 3125 3126 public final void xorl(Register dst, Register src) { 3127 XOR.rmOp.emit(this, DWORD, dst, src); 3128 } 3129 3130 public final void xorq(Register dst, Register src) { 3131 XOR.rmOp.emit(this, QWORD, dst, src); 3132 } 3133 3134 public final void xorpd(Register dst, Register src) { 3135 SSEOp.XOR.emit(this, PD, dst, src); 3136 } 3137 3138 public final void xorps(Register dst, Register src) { 3139 SSEOp.XOR.emit(this, PS, dst, src); 3140 } 3141 3142 protected final void decl(Register dst) { 3143 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 3144 prefix(dst); 3145 emitByte(0xFF); 3146 emitModRM(1, dst); 3147 } 3148 3149 protected final void incl(Register dst) { 3150 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3151 prefix(dst); 3152 emitByte(0xFF); 3153 emitModRM(0, dst); 3154 } 3155 3156 public final void addq(Register dst, int imm32) { 3157 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3158 } 3159 3160 public final void addq(AMD64Address dst, int imm32) { 3161 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3162 } 3163 3164 public final void addq(Register dst, Register src) { 3165 ADD.rmOp.emit(this, QWORD, dst, src); 3166 } 3167 3168 public final void addq(AMD64Address dst, Register src) { 3169 ADD.mrOp.emit(this, QWORD, dst, src); 3170 } 3171 3172 public final void andq(Register dst, int imm32) { 3173 AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3174 } 3175 3176 public final void bsrq(Register dst, Register src) { 3177 prefixq(dst, src); 3178 emitByte(0x0F); 3179 emitByte(0xBD); 3180 emitModRM(dst, src); 3181 } 3182 3183 public final void bswapq(Register reg) { 3184 prefixq(reg); 3185 emitByte(0x0F); 3186 emitByte(0xC8 + encode(reg)); 3187 } 3188 3189 public final void cdqq() { 3190 rexw(); 3191 emitByte(0x99); 3192 } 3193 3194 public final void repStosb() { 3195 emitByte(0xf3); 3196 rexw(); 3197 emitByte(0xaa); 3198 } 3199 3200 public final void repStosq() { 3201 emitByte(0xf3); 3202 rexw(); 3203 emitByte(0xab); 3204 } 3205 3206 public final void cmovq(ConditionFlag cc, Register dst, Register src) { 3207 prefixq(dst, src); 3208 emitByte(0x0F); 3209 emitByte(0x40 | cc.getValue()); 3210 emitModRM(dst, src); 3211 } 3212 3213 public final void setb(ConditionFlag cc, Register dst) { 3214 prefix(dst, true); 3215 emitByte(0x0F); 3216 emitByte(0x90 | cc.getValue()); 3217 emitModRM(0, dst); 3218 } 3219 3220 public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) { 3221 prefixq(src, dst); 3222 emitByte(0x0F); 3223 emitByte(0x40 | cc.getValue()); 3224 emitOperandHelper(dst, src, 0); 3225 } 3226 3227 public final void cmpq(Register dst, int imm32) { 3228 CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3229 } 3230 3231 public final void cmpq(Register dst, Register src) { 3232 CMP.rmOp.emit(this, QWORD, dst, src); 3233 } 3234 3235 public final void cmpq(Register dst, AMD64Address src) { 3236 CMP.rmOp.emit(this, QWORD, dst, src); 3237 } 3238 3239 public final void cmpxchgq(Register reg, AMD64Address adr) { 3240 prefixq(adr, reg); 3241 emitByte(0x0F); 3242 emitByte(0xB1); 3243 emitOperandHelper(reg, adr, 0); 3244 } 3245 3246 public final void cvtdq2pd(Register dst, Register src) { 3247 assert inRC(XMM, dst) && inRC(XMM, src); 3248 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3249 emitByte(0xE6); 3250 emitModRM(dst, src); 3251 } 3252 3253 public final void cvtsi2sdq(Register dst, Register src) { 3254 SSEOp.CVTSI2SD.emit(this, QWORD, dst, src); 3255 } 3256 3257 public final void cvttsd2siq(Register dst, Register src) { 3258 SSEOp.CVTTSD2SI.emit(this, QWORD, dst, src); 3259 } 3260 3261 public final void cvttpd2dq(Register dst, Register src) { 3262 assert inRC(XMM, dst) && inRC(XMM, src); 3263 simdPrefix(dst, Register.None, src, PD, P_0F, false); 3264 emitByte(0xE6); 3265 emitModRM(dst, src); 3266 } 3267 3268 public final void decq(Register dst) { 3269 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 3270 prefixq(dst); 3271 emitByte(0xFF); 3272 emitModRM(1, dst); 3273 } 3274 3275 public final void decq(AMD64Address dst) { 3276 DEC.emit(this, QWORD, dst); 3277 } 3278 3279 public final void imulq(Register dst, Register src) { 3280 prefixq(dst, src); 3281 emitByte(0x0F); 3282 emitByte(0xAF); 3283 emitModRM(dst, src); 3284 } 3285 3286 public final void incq(Register dst) { 3287 // Don't use it directly. Use Macroincrementq() instead. 3288 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3289 prefixq(dst); 3290 emitByte(0xFF); 3291 emitModRM(0, dst); 3292 } 3293 3294 public final void incq(AMD64Address dst) { 3295 INC.emit(this, QWORD, dst); 3296 } 3297 3298 public final void movq(Register dst, long imm64) { 3299 movq(dst, imm64, false); 3300 } 3301 3302 public final void movq(Register dst, long imm64, boolean annotateImm) { 3303 int insnPos = position(); 3304 prefixq(dst); 3305 emitByte(0xB8 + encode(dst)); 3306 int immPos = position(); 3307 emitLong(imm64); 3308 int nextInsnPos = position(); 3309 if (annotateImm && codePatchingAnnotationConsumer != null) { 3310 codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 3311 } 3312 } 3313 3314 public final void movslq(Register dst, int imm32) { 3315 prefixq(dst); 3316 emitByte(0xC7); 3317 emitModRM(0, dst); 3318 emitInt(imm32); 3319 } 3320 3321 public final void movdq(Register dst, AMD64Address src) { 3322 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src); 3323 } 3324 3325 public final void movdq(AMD64Address dst, Register src) { 3326 AMD64MROp.MOVQ.emit(this, QWORD, dst, src); 3327 } 3328 3329 public final void movdq(Register dst, Register src) { 3330 if (inRC(XMM, dst) && inRC(CPU, src)) { 3331 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src); 3332 } else if (inRC(XMM, src) && inRC(CPU, dst)) { 3333 AMD64MROp.MOVQ.emit(this, QWORD, dst, src); 3334 } else { 3335 throw new InternalError("should not reach here"); 3336 } 3337 } 3338 3339 public final void movdl(Register dst, Register src) { 3340 if (inRC(XMM, dst) && inRC(CPU, src)) { 3341 AMD64RMOp.MOVD.emit(this, DWORD, dst, src); 3342 } else if (inRC(XMM, src) && inRC(CPU, dst)) { 3343 AMD64MROp.MOVD.emit(this, DWORD, dst, src); 3344 } else { 3345 throw new InternalError("should not reach here"); 3346 } 3347 } 3348 3349 public final void movdl(Register dst, AMD64Address src) { 3350 AMD64RMOp.MOVD.emit(this, DWORD, dst, src); 3351 } 3352 3353 public final void movddup(Register dst, Register src) { 3354 assert supports(CPUFeature.SSE3); 3355 assert inRC(XMM, dst) && inRC(XMM, src); 3356 simdPrefix(dst, Register.None, src, SD, P_0F, false); 3357 emitByte(0x12); 3358 emitModRM(dst, src); 3359 } 3360 3361 public final void movdqu(Register dst, AMD64Address src) { 3362 assert inRC(XMM, dst); 3363 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3364 emitByte(0x6F); 3365 emitOperandHelper(dst, src, 0); 3366 } 3367 3368 public final void movdqu(Register dst, Register src) { 3369 assert inRC(XMM, dst) && inRC(XMM, src); 3370 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3371 emitByte(0x6F); 3372 emitModRM(dst, src); 3373 } 3374 3375 // Insn: VMOVDQU xmm2/m128, xmm1 3376 3377 public final void movdqu(AMD64Address dst, Register src) { 3378 assert inRC(XMM, src); 3379 // Code: VEX.128.F3.0F.WIG 7F /r 3380 simdPrefix(src, Register.None, dst, SS, P_0F, false); 3381 emitByte(0x7F); 3382 emitOperandHelper(src, dst, 0); 3383 } 3384 3385 public final void movslq(AMD64Address dst, int imm32) { 3386 prefixq(dst); 3387 emitByte(0xC7); 3388 emitOperandHelper(0, dst, 4); 3389 emitInt(imm32); 3390 } 3391 3392 public final void movslq(Register dst, AMD64Address src) { 3393 prefixq(src, dst); 3394 emitByte(0x63); 3395 emitOperandHelper(dst, src, 0); 3396 } 3397 3398 public final void movslq(Register dst, Register src) { 3399 prefixq(dst, src); 3400 emitByte(0x63); 3401 emitModRM(dst, src); 3402 } 3403 3404 public final void negq(Register dst) { 3405 prefixq(dst); 3406 emitByte(0xF7); 3407 emitModRM(3, dst); 3408 } 3409 3410 public final void orq(Register dst, Register src) { 3411 OR.rmOp.emit(this, QWORD, dst, src); 3412 } 3413 3414 public final void shlq(Register dst, int imm8) { 3415 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3416 prefixq(dst); 3417 if (imm8 == 1) { 3418 emitByte(0xD1); 3419 emitModRM(4, dst); 3420 } else { 3421 emitByte(0xC1); 3422 emitModRM(4, dst); 3423 emitByte(imm8); 3424 } 3425 } 3426 3427 public final void shlq(Register dst) { 3428 // Multiply dst by 2, CL times. 3429 prefixq(dst); 3430 emitByte(0xD3); 3431 emitModRM(4, dst); 3432 } 3433 3434 public final void shrq(Register dst, int imm8) { 3435 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3436 prefixq(dst); 3437 if (imm8 == 1) { 3438 emitByte(0xD1); 3439 emitModRM(5, dst); 3440 } else { 3441 emitByte(0xC1); 3442 emitModRM(5, dst); 3443 emitByte(imm8); 3444 } 3445 } 3446 3447 public final void shrq(Register dst) { 3448 prefixq(dst); 3449 emitByte(0xD3); 3450 // Unsigned divide dst by 2, CL times. 3451 emitModRM(5, dst); 3452 } 3453 3454 public final void sarq(Register dst, int imm8) { 3455 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3456 prefixq(dst); 3457 if (imm8 == 1) { 3458 emitByte(0xD1); 3459 emitModRM(7, dst); 3460 } else { 3461 emitByte(0xC1); 3462 emitModRM(7, dst); 3463 emitByte(imm8); 3464 } 3465 } 3466 3467 public final void sbbq(Register dst, Register src) { 3468 SBB.rmOp.emit(this, QWORD, dst, src); 3469 } 3470 3471 public final void subq(Register dst, int imm32) { 3472 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3473 } 3474 3475 public final void subq(AMD64Address dst, int imm32) { 3476 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3477 } 3478 3479 public final void subqWide(Register dst, int imm32) { 3480 // don't use the sign-extending version, forcing a 32-bit immediate 3481 SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32); 3482 } 3483 3484 public final void subq(Register dst, Register src) { 3485 SUB.rmOp.emit(this, QWORD, dst, src); 3486 } 3487 3488 public final void testq(Register dst, Register src) { 3489 prefixq(dst, src); 3490 emitByte(0x85); 3491 emitModRM(dst, src); 3492 } 3493 3494 public final void btrq(Register src, int imm8) { 3495 prefixq(src); 3496 emitByte(0x0F); 3497 emitByte(0xBA); 3498 emitModRM(6, src); 3499 emitByte(imm8); 3500 } 3501 3502 public final void xaddb(AMD64Address dst, Register src) { 3503 prefixb(dst, src); 3504 emitByte(0x0F); 3505 emitByte(0xC0); 3506 emitOperandHelper(src, dst, 0); 3507 } 3508 3509 public final void xaddw(AMD64Address dst, Register src) { 3510 emitByte(0x66); // Switch to 16-bit mode. 3511 prefix(dst, src); 3512 emitByte(0x0F); 3513 emitByte(0xC1); 3514 emitOperandHelper(src, dst, 0); 3515 } 3516 3517 public final void xaddl(AMD64Address dst, Register src) { 3518 prefix(dst, src); 3519 emitByte(0x0F); 3520 emitByte(0xC1); 3521 emitOperandHelper(src, dst, 0); 3522 } 3523 3524 public final void xaddq(AMD64Address dst, Register src) { 3525 prefixq(dst, src); 3526 emitByte(0x0F); 3527 emitByte(0xC1); 3528 emitOperandHelper(src, dst, 0); 3529 } 3530 3531 public final void xchgb(Register dst, AMD64Address src) { 3532 prefixb(src, dst); 3533 emitByte(0x86); 3534 emitOperandHelper(dst, src, 0); 3535 } 3536 3537 public final void xchgw(Register dst, AMD64Address src) { 3538 emitByte(0x66); 3539 prefix(src, dst); 3540 emitByte(0x87); 3541 emitOperandHelper(dst, src, 0); 3542 } 3543 3544 public final void xchgl(Register dst, AMD64Address src) { 3545 prefix(src, dst); 3546 emitByte(0x87); 3547 emitOperandHelper(dst, src, 0); 3548 } 3549 3550 public final void xchgq(Register dst, AMD64Address src) { 3551 prefixq(src, dst); 3552 emitByte(0x87); 3553 emitOperandHelper(dst, src, 0); 3554 } 3555 3556 public final void membar(int barriers) { 3557 if (target.isMP) { 3558 // We only have to handle StoreLoad 3559 if ((barriers & STORE_LOAD) != 0) { 3560 // All usable chips support "locked" instructions which suffice 3561 // as barriers, and are much faster than the alternative of 3562 // using cpuid instruction. We use here a locked add [rsp],0. 3563 // This is conveniently otherwise a no-op except for blowing 3564 // flags. 3565 // Any change to this code may need to revisit other places in 3566 // the code where this idiom is used, in particular the 3567 // orderAccess code. 3568 lock(); 3569 addl(new AMD64Address(AMD64.rsp, 0), 0); // Assert the lock# signal here 3570 } 3571 } 3572 } 3573 3574 @Override 3575 protected final void patchJumpTarget(int branch, int branchTarget) { 3576 int op = getByte(branch); 3577 assert op == 0xE8 // call 3578 || op == 0x00 // jump table entry 3579 || op == 0xE9 // jmp 3580 || op == 0xEB // short jmp 3581 || (op & 0xF0) == 0x70 // short jcc 3582 || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc 3583 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op; 3584 3585 if (op == 0x00) { 3586 int offsetToJumpTableBase = getShort(branch + 1); 3587 int jumpTableBase = branch - offsetToJumpTableBase; 3588 int imm32 = branchTarget - jumpTableBase; 3589 emitInt(imm32, branch); 3590 } else if (op == 0xEB || (op & 0xF0) == 0x70) { 3591 3592 // short offset operators (jmp and jcc) 3593 final int imm8 = branchTarget - (branch + 2); 3594 /* 3595 * Since a wrongly patched short branch can potentially lead to working but really bad 3596 * behaving code we should always fail with an exception instead of having an assert. 3597 */ 3598 GraalError.guarantee(isByte(imm8), "Displacement too large to be encoded as a byte: %d", imm8); 3599 emitByte(imm8, branch + 1); 3600 3601 } else { 3602 3603 int off = 1; 3604 if (op == 0x0F) { 3605 off = 2; 3606 } 3607 3608 int imm32 = branchTarget - (branch + 4 + off); 3609 emitInt(imm32, branch + off); 3610 } 3611 } 3612 3613 public void nullCheck(AMD64Address address) { 3614 testl(AMD64.rax, address); 3615 } 3616 3617 @Override 3618 public void align(int modulus) { 3619 if (position() % modulus != 0) { 3620 nop(modulus - (position() % modulus)); 3621 } 3622 } 3623 3624 /** 3625 * Emits a direct call instruction. Note that the actual call target is not specified, because 3626 * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is 3627 * responsible to add the call address to the appropriate patching tables. 3628 */ 3629 public final void call() { 3630 annotatePatchingImmediate(1, 4); 3631 emitByte(0xE8); 3632 emitInt(0); 3633 } 3634 3635 public final void call(Register src) { 3636 prefix(src); 3637 emitByte(0xFF); 3638 emitModRM(2, src); 3639 } 3640 3641 public final void int3() { 3642 emitByte(0xCC); 3643 } 3644 3645 public final void pause() { 3646 emitByte(0xF3); 3647 emitByte(0x90); 3648 } 3649 3650 private void emitx87(int b1, int b2, int i) { 3651 assert 0 <= i && i < 8 : "illegal stack offset"; 3652 emitByte(b1); 3653 emitByte(b2 + i); 3654 } 3655 3656 public final void fldd(AMD64Address src) { 3657 emitByte(0xDD); 3658 emitOperandHelper(0, src, 0); 3659 } 3660 3661 public final void flds(AMD64Address src) { 3662 emitByte(0xD9); 3663 emitOperandHelper(0, src, 0); 3664 } 3665 3666 public final void fldln2() { 3667 emitByte(0xD9); 3668 emitByte(0xED); 3669 } 3670 3671 public final void fldlg2() { 3672 emitByte(0xD9); 3673 emitByte(0xEC); 3674 } 3675 3676 public final void fyl2x() { 3677 emitByte(0xD9); 3678 emitByte(0xF1); 3679 } 3680 3681 public final void fstps(AMD64Address src) { 3682 emitByte(0xD9); 3683 emitOperandHelper(3, src, 0); 3684 } 3685 3686 public final void fstpd(AMD64Address src) { 3687 emitByte(0xDD); 3688 emitOperandHelper(3, src, 0); 3689 } 3690 3691 private void emitFPUArith(int b1, int b2, int i) { 3692 assert 0 <= i && i < 8 : "illegal FPU register: " + i; 3693 emitByte(b1); 3694 emitByte(b2 + i); 3695 } 3696 3697 public void ffree(int i) { 3698 emitFPUArith(0xDD, 0xC0, i); 3699 } 3700 3701 public void fincstp() { 3702 emitByte(0xD9); 3703 emitByte(0xF7); 3704 } 3705 3706 public void fxch(int i) { 3707 emitFPUArith(0xD9, 0xC8, i); 3708 } 3709 3710 public void fnstswAX() { 3711 emitByte(0xDF); 3712 emitByte(0xE0); 3713 } 3714 3715 public void fwait() { 3716 emitByte(0x9B); 3717 } 3718 3719 public void fprem() { 3720 emitByte(0xD9); 3721 emitByte(0xF8); 3722 } 3723 3724 public final void fsin() { 3725 emitByte(0xD9); 3726 emitByte(0xFE); 3727 } 3728 3729 public final void fcos() { 3730 emitByte(0xD9); 3731 emitByte(0xFF); 3732 } 3733 3734 public final void fptan() { 3735 emitByte(0xD9); 3736 emitByte(0xF2); 3737 } 3738 3739 public final void fstp(int i) { 3740 emitx87(0xDD, 0xD8, i); 3741 } 3742 3743 @Override 3744 public AMD64Address makeAddress(Register base, int displacement) { 3745 return new AMD64Address(base, displacement); 3746 } 3747 3748 @Override 3749 public AMD64Address getPlaceholder(int instructionStartPosition) { 3750 return new AMD64Address(AMD64.rip, Register.None, Scale.Times1, 0, instructionStartPosition); 3751 } 3752 3753 private void prefetchPrefix(AMD64Address src) { 3754 prefix(src); 3755 emitByte(0x0F); 3756 } 3757 3758 public void prefetchnta(AMD64Address src) { 3759 prefetchPrefix(src); 3760 emitByte(0x18); 3761 emitOperandHelper(0, src, 0); 3762 } 3763 3764 void prefetchr(AMD64Address src) { 3765 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3766 prefetchPrefix(src); 3767 emitByte(0x0D); 3768 emitOperandHelper(0, src, 0); 3769 } 3770 3771 public void prefetcht0(AMD64Address src) { 3772 assert supports(CPUFeature.SSE); 3773 prefetchPrefix(src); 3774 emitByte(0x18); 3775 emitOperandHelper(1, src, 0); 3776 } 3777 3778 public void prefetcht1(AMD64Address src) { 3779 assert supports(CPUFeature.SSE); 3780 prefetchPrefix(src); 3781 emitByte(0x18); 3782 emitOperandHelper(2, src, 0); 3783 } 3784 3785 public void prefetcht2(AMD64Address src) { 3786 assert supports(CPUFeature.SSE); 3787 prefix(src); 3788 emitByte(0x0f); 3789 emitByte(0x18); 3790 emitOperandHelper(3, src, 0); 3791 } 3792 3793 public void prefetchw(AMD64Address src) { 3794 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3795 prefix(src); 3796 emitByte(0x0f); 3797 emitByte(0x0D); 3798 emitOperandHelper(1, src, 0); 3799 } 3800 3801 public void rdtsc() { 3802 emitByte(0x0F); 3803 emitByte(0x31); 3804 } 3805 3806 /** 3807 * Emits an instruction which is considered to be illegal. This is used if we deliberately want 3808 * to crash the program (debugging etc.). 3809 */ 3810 public void illegal() { 3811 emitByte(0x0f); 3812 emitByte(0x0b); 3813 } 3814 3815 public void lfence() { 3816 emitByte(0x0f); 3817 emitByte(0xae); 3818 emitByte(0xe8); 3819 } 3820 3821 public final void vptest(Register dst, Register src) { 3822 VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src); 3823 } 3824 3825 public final void vpxor(Register dst, Register nds, Register src) { 3826 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src); 3827 } 3828 3829 public final void vpxor(Register dst, Register nds, AMD64Address src) { 3830 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src); 3831 } 3832 3833 public final void vmovdqu(Register dst, AMD64Address src) { 3834 VexMoveOp.VMOVDQU32.emit(this, AVXSize.YMM, dst, src); 3835 } 3836 3837 public final void vmovdqu(AMD64Address dst, Register src) { 3838 assert inRC(XMM, src); 3839 VexMoveOp.VMOVDQU32.emit(this, AVXSize.YMM, dst, src); 3840 } 3841 3842 public final void vpmovzxbw(Register dst, AMD64Address src) { 3843 assert supports(CPUFeature.AVX2); 3844 VexRMOp.VPMOVZXBW.emit(this, AVXSize.YMM, dst, src); 3845 } 3846 3847 public final void vzeroupper() { 3848 emitVEX(L128, P_, M_0F, W0, 0, 0, true); 3849 emitByte(0x77); 3850 } 3851 3852 // Insn: KORTESTD k1, k2 3853 3854 // This instruction produces ZF or CF flags 3855 public final void kortestd(Register src1, Register src2) { 3856 assert supports(CPUFeature.AVX512BW); 3857 assert inRC(MASK, src1) && inRC(MASK, src2); 3858 // Code: VEX.L0.66.0F.W1 98 /r 3859 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, W1, true); 3860 emitByte(0x98); 3861 emitModRM(src1, src2); 3862 } 3863 3864 // Insn: KORTESTQ k1, k2 3865 3866 // This instruction produces ZF or CF flags 3867 public final void kortestq(Register src1, Register src2) { 3868 assert supports(CPUFeature.AVX512BW); 3869 assert inRC(MASK, src1) && inRC(MASK, src2); 3870 // Code: VEX.L0.0F.W1 98 /r 3871 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_, M_0F, W1, W1, true); 3872 emitByte(0x98); 3873 emitModRM(src1, src2); 3874 } 3875 3876 public final void kmovd(Register dst, Register src) { 3877 assert supports(CPUFeature.AVX512BW); 3878 assert inRC(MASK, dst) || inRC(CPU, dst); 3879 assert inRC(MASK, src) || inRC(CPU, src); 3880 assert !(inRC(CPU, dst) && inRC(CPU, src)); 3881 3882 if (inRC(MASK, dst)) { 3883 if (inRC(MASK, src)) { 3884 // kmovd(KRegister dst, KRegister src): 3885 // Insn: KMOVD k1, k2/m32 3886 // Code: VEX.L0.66.0F.W1 90 /r 3887 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_66, M_0F, W1, W1, true); 3888 emitByte(0x90); 3889 emitModRM(dst, src); 3890 } else { 3891 // kmovd(KRegister dst, Register src) 3892 // Insn: KMOVD k1, r32 3893 // Code: VEX.L0.F2.0F.W0 92 /r 3894 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, W0, true); 3895 emitByte(0x92); 3896 emitModRM(dst, src); 3897 } 3898 } else { 3899 if (inRC(MASK, src)) { 3900 // kmovd(Register dst, KRegister src) 3901 // Insn: KMOVD r32, k1 3902 // Code: VEX.L0.F2.0F.W0 93 /r 3903 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, W0, true); 3904 emitByte(0x93); 3905 emitModRM(dst, src); 3906 } else { 3907 throw GraalError.shouldNotReachHere(); 3908 } 3909 } 3910 } 3911 3912 public final void kmovq(Register dst, Register src) { 3913 assert supports(CPUFeature.AVX512BW); 3914 assert inRC(MASK, dst) || inRC(CPU, dst); 3915 assert inRC(MASK, src) || inRC(CPU, src); 3916 assert !(inRC(CPU, dst) && inRC(CPU, src)); 3917 3918 if (inRC(MASK, dst)) { 3919 if (inRC(MASK, src)) { 3920 // kmovq(KRegister dst, KRegister src): 3921 // Insn: KMOVQ k1, k2/m64 3922 // Code: VEX.L0.0F.W1 90 /r 3923 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_, M_0F, W1, W1, true); 3924 emitByte(0x90); 3925 emitModRM(dst, src); 3926 } else { 3927 // kmovq(KRegister dst, Register src) 3928 // Insn: KMOVQ k1, r64 3929 // Code: VEX.L0.F2.0F.W1 92 /r 3930 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, W1, true); 3931 emitByte(0x92); 3932 emitModRM(dst, src); 3933 } 3934 } else { 3935 if (inRC(MASK, src)) { 3936 // kmovq(Register dst, KRegister src) 3937 // Insn: KMOVQ r64, k1 3938 // Code: VEX.L0.F2.0F.W1 93 /r 3939 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, W1, true); 3940 emitByte(0x93); 3941 emitModRM(dst, src); 3942 } else { 3943 throw GraalError.shouldNotReachHere(); 3944 } 3945 } 3946 } 3947 3948 // Insn: KTESTD k1, k2 3949 3950 public final void ktestd(Register src1, Register src2) { 3951 assert supports(CPUFeature.AVX512BW); 3952 assert inRC(MASK, src1) && inRC(MASK, src2); 3953 // Code: VEX.L0.66.0F.W1 99 /r 3954 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, W1, true); 3955 emitByte(0x99); 3956 emitModRM(src1, src2); 3957 } 3958 3959 public final void evmovdqu64(Register dst, AMD64Address src) { 3960 assert supports(CPUFeature.AVX512F); 3961 assert inRC(XMM, dst); 3962 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F3, M_0F, W1, Z0, B0); 3963 emitByte(0x6F); 3964 emitOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3965 } 3966 3967 // Insn: VPMOVZXBW zmm1, m256 3968 3969 public final void evpmovzxbw(Register dst, AMD64Address src) { 3970 assert supports(CPUFeature.AVX512BW); 3971 assert inRC(XMM, dst); 3972 // Code: EVEX.512.66.0F38.WIG 30 /r 3973 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0); 3974 emitByte(0x30); 3975 emitOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3976 } 3977 3978 public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src) { 3979 assert supports(CPUFeature.AVX512BW); 3980 assert inRC(MASK, kdst) && inRC(XMM, nds); 3981 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F, WIG, Z0, B0); 3982 emitByte(0x74); 3983 emitOperandHelper(kdst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3984 } 3985 3986 // Insn: VMOVDQU16 zmm1 {k1}{z}, zmm2/m512 3987 // ----- 3988 // Insn: VMOVDQU16 zmm1, m512 3989 3990 public final void evmovdqu16(Register dst, AMD64Address src) { 3991 assert supports(CPUFeature.AVX512BW); 3992 assert inRC(XMM, dst); 3993 // Code: EVEX.512.F2.0F.W1 6F /r 3994 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 3995 emitByte(0x6F); 3996 emitOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3997 } 3998 3999 // Insn: VMOVDQU16 zmm1, k1:z, m512 4000 4001 public final void evmovdqu16(Register dst, Register mask, AMD64Address src) { 4002 assert supports(CPUFeature.AVX512BW); 4003 assert inRC(XMM, dst) && inRC(MASK, mask); 4004 // Code: EVEX.512.F2.0F.W1 6F /r 4005 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z1, B0); 4006 emitByte(0x6F); 4007 emitOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 4008 } 4009 4010 // Insn: VMOVDQU16 zmm2/m512 {k1}{z}, zmm1 4011 // ----- 4012 // Insn: VMOVDQU16 m512, zmm1 4013 4014 public final void evmovdqu16(AMD64Address dst, Register src) { 4015 assert supports(CPUFeature.AVX512BW); 4016 assert inRC(XMM, src); 4017 // Code: EVEX.512.F2.0F.W1 7F /r 4018 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 4019 emitByte(0x7F); 4020 emitOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 4021 } 4022 4023 // Insn: VMOVDQU16 m512, k1, zmm1 4024 4025 public final void evmovdqu16(AMD64Address dst, Register mask, Register src) { 4026 assert supports(CPUFeature.AVX512BW); 4027 assert inRC(MASK, mask) && inRC(XMM, src); 4028 // Code: EVEX.512.F2.0F.W1 7F /r 4029 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 4030 emitByte(0x7F); 4031 emitOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 4032 } 4033 4034 // Insn: VPBROADCASTW zmm1 {k1}{z}, reg 4035 // ----- 4036 // Insn: VPBROADCASTW zmm1, reg 4037 4038 public final void evpbroadcastw(Register dst, Register src) { 4039 assert supports(CPUFeature.AVX512BW); 4040 assert inRC(XMM, dst) && inRC(CPU, src); 4041 // Code: EVEX.512.66.0F38.W0 7B /r 4042 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, W0, Z0, B0); 4043 emitByte(0x7B); 4044 emitModRM(dst, src); 4045 } 4046 4047 // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8 4048 // ----- 4049 // Insn: VPCMPUW k1, zmm2, zmm3, imm8 4050 4051 public final void evpcmpuw(Register kdst, Register nds, Register src, int vcc) { 4052 assert supports(CPUFeature.AVX512BW); 4053 assert inRC(MASK, kdst) && inRC(XMM, nds) && inRC(XMM, src); 4054 // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib 4055 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0); 4056 emitByte(0x3E); 4057 emitModRM(kdst, src); 4058 emitByte(vcc); 4059 } 4060 4061 // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8 4062 // ----- 4063 // Insn: VPCMPUW k1, k2, zmm2, zmm3, imm8 4064 4065 public final void evpcmpuw(Register kdst, Register mask, Register nds, Register src, int vcc) { 4066 assert supports(CPUFeature.AVX512BW); 4067 assert inRC(MASK, kdst) && inRC(MASK, mask); 4068 assert inRC(XMM, nds) && inRC(XMM, src); 4069 // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib 4070 evexPrefix(kdst, mask, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0); 4071 emitByte(0x3E); 4072 emitModRM(kdst, src); 4073 emitByte(vcc); 4074 } 4075 4076 // Insn: VPMOVWB ymm1/m256 {k1}{z}, zmm2 4077 // ----- 4078 // Insn: VPMOVWB m256, zmm2 4079 4080 public final void evpmovwb(AMD64Address dst, Register src) { 4081 assert supports(CPUFeature.AVX512BW); 4082 assert inRC(XMM, src); 4083 // Code: EVEX.512.F3.0F38.W0 30 /r 4084 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0); 4085 emitByte(0x30); 4086 emitOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 4087 } 4088 4089 // Insn: VPMOVWB m256, k1, zmm2 4090 4091 public final void evpmovwb(AMD64Address dst, Register mask, Register src) { 4092 assert supports(CPUFeature.AVX512BW); 4093 assert inRC(MASK, mask) && inRC(XMM, src); 4094 // Code: EVEX.512.F3.0F38.W0 30 /r 4095 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0); 4096 emitByte(0x30); 4097 emitOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 4098 } 4099 4100 // Insn: VPMOVZXBW zmm1 {k1}{z}, ymm2/m256 4101 // ----- 4102 // Insn: VPMOVZXBW zmm1, k1, m256 4103 4104 public final void evpmovzxbw(Register dst, Register mask, AMD64Address src) { 4105 assert supports(CPUFeature.AVX512BW); 4106 assert inRC(MASK, mask) && inRC(XMM, dst); 4107 // Code: EVEX.512.66.0F38.WIG 30 /r 4108 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0); 4109 emitByte(0x30); 4110 emitOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 4111 } 4112 4113 }