1 /* 2 * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.asm.amd64; 26 27 import static jdk.vm.ci.amd64.AMD64.CPU; 28 import static jdk.vm.ci.amd64.AMD64.MASK; 29 import static jdk.vm.ci.amd64.AMD64.XMM; 30 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD; 31 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop; 32 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseIntelNops; 33 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop; 34 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD; 35 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND; 36 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP; 37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR; 38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB; 39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB; 40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR; 41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC; 42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC; 43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG; 44 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT; 45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0; 46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0; 47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1; 48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.BYTE; 49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.DWORD; 50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PD; 51 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PS; 52 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.QWORD; 53 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SD; 54 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.SS; 55 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.WORD; 56 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128; 57 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256; 58 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ; 59 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F; 60 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38; 61 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A; 62 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_; 63 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66; 64 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2; 65 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3; 66 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0; 67 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1; 68 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG; 69 import static org.graalvm.compiler.core.common.NumUtil.isByte; 70 import static org.graalvm.compiler.core.common.NumUtil.isInt; 71 import static org.graalvm.compiler.core.common.NumUtil.isShiftCount; 72 import static org.graalvm.compiler.core.common.NumUtil.isUByte; 73 74 import java.util.EnumSet; 75 76 import org.graalvm.compiler.asm.Label; 77 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 78 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize; 79 import org.graalvm.compiler.core.common.calc.Condition; 80 import org.graalvm.compiler.debug.GraalError; 81 82 import jdk.vm.ci.amd64.AMD64; 83 import jdk.vm.ci.amd64.AMD64.CPUFeature; 84 import jdk.vm.ci.code.Register; 85 import jdk.vm.ci.code.Register.RegisterCategory; 86 import jdk.vm.ci.code.TargetDescription; 87 88 /** 89 * This class implements an assembler that can encode most X86 instructions. 90 */ 91 public class AMD64Assembler extends AMD64BaseAssembler { 92 93 /** 94 * Constructs an assembler for the AMD64 architecture. 95 */ 96 public AMD64Assembler(TargetDescription target) { 97 super(target); 98 } 99 100 /** 101 * The x86 condition codes used for conditional jumps/moves. 102 */ 103 public enum ConditionFlag { 104 Zero(0x4, "|zero|"), 105 NotZero(0x5, "|nzero|"), 106 Equal(0x4, "="), 107 NotEqual(0x5, "!="), 108 Less(0xc, "<"), 109 LessEqual(0xe, "<="), 110 Greater(0xf, ">"), 111 GreaterEqual(0xd, ">="), 112 Below(0x2, "|<|"), 113 BelowEqual(0x6, "|<=|"), 114 Above(0x7, "|>|"), 115 AboveEqual(0x3, "|>=|"), 116 Overflow(0x0, "|of|"), 117 NoOverflow(0x1, "|nof|"), 118 CarrySet(0x2, "|carry|"), 119 CarryClear(0x3, "|ncarry|"), 120 Negative(0x8, "|neg|"), 121 Positive(0x9, "|pos|"), 122 Parity(0xa, "|par|"), 123 NoParity(0xb, "|npar|"); 124 125 private final int value; 126 private final String operator; 127 128 ConditionFlag(int value, String operator) { 129 this.value = value; 130 this.operator = operator; 131 } 132 133 public ConditionFlag negate() { 134 switch (this) { 135 case Zero: 136 return NotZero; 137 case NotZero: 138 return Zero; 139 case Equal: 140 return NotEqual; 141 case NotEqual: 142 return Equal; 143 case Less: 144 return GreaterEqual; 145 case LessEqual: 146 return Greater; 147 case Greater: 148 return LessEqual; 149 case GreaterEqual: 150 return Less; 151 case Below: 152 return AboveEqual; 153 case BelowEqual: 154 return Above; 155 case Above: 156 return BelowEqual; 157 case AboveEqual: 158 return Below; 159 case Overflow: 160 return NoOverflow; 161 case NoOverflow: 162 return Overflow; 163 case CarrySet: 164 return CarryClear; 165 case CarryClear: 166 return CarrySet; 167 case Negative: 168 return Positive; 169 case Positive: 170 return Negative; 171 case Parity: 172 return NoParity; 173 case NoParity: 174 return Parity; 175 } 176 throw new IllegalArgumentException(); 177 } 178 179 public int getValue() { 180 return value; 181 } 182 183 @Override 184 public String toString() { 185 return operator; 186 } 187 } 188 189 /** 190 * Operand size and register type constraints. 191 */ 192 private enum OpAssertion { 193 ByteAssertion(CPU, CPU, BYTE), 194 ByteOrLargerAssertion(CPU, CPU, BYTE, WORD, DWORD, QWORD), 195 WordOrLargerAssertion(CPU, CPU, WORD, DWORD, QWORD), 196 DwordOrLargerAssertion(CPU, CPU, DWORD, QWORD), 197 WordOrDwordAssertion(CPU, CPU, WORD, QWORD), 198 QwordAssertion(CPU, CPU, QWORD), 199 FloatAssertion(XMM, XMM, SS, SD, PS, PD), 200 PackedFloatAssertion(XMM, XMM, PS, PD), 201 SingleAssertion(XMM, XMM, SS), 202 DoubleAssertion(XMM, XMM, SD), 203 PackedDoubleAssertion(XMM, XMM, PD), 204 IntToFloatAssertion(XMM, CPU, DWORD, QWORD), 205 FloatToIntAssertion(CPU, XMM, DWORD, QWORD); 206 207 private final RegisterCategory resultCategory; 208 private final RegisterCategory inputCategory; 209 private final OperandSize[] allowedSizes; 210 211 OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) { 212 this.resultCategory = resultCategory; 213 this.inputCategory = inputCategory; 214 this.allowedSizes = allowedSizes; 215 } 216 217 protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) { 218 assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op; 219 assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op; 220 221 for (OperandSize s : allowedSizes) { 222 if (size == s) { 223 return true; 224 } 225 } 226 227 assert false : "invalid operand size " + size + " used in " + op; 228 return false; 229 } 230 231 } 232 233 protected static final int P_0F = 0x0F; 234 protected static final int P_0F38 = 0x380F; 235 protected static final int P_0F3A = 0x3A0F; 236 237 /** 238 * Base class for AMD64 opcodes. 239 */ 240 public static class AMD64Op { 241 242 private final String opcode; 243 244 protected final int prefix1; 245 protected final int prefix2; 246 protected final int op; 247 248 private final boolean dstIsByte; 249 private final boolean srcIsByte; 250 251 private final OpAssertion assertion; 252 private final CPUFeature feature; 253 254 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 255 this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature); 256 } 257 258 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 259 this.opcode = opcode; 260 this.prefix1 = prefix1; 261 this.prefix2 = prefix2; 262 this.op = op; 263 264 this.dstIsByte = dstIsByte; 265 this.srcIsByte = srcIsByte; 266 267 this.assertion = assertion; 268 this.feature = feature; 269 } 270 271 protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) { 272 if (prefix1 != 0) { 273 asm.emitByte(prefix1); 274 } 275 if (size.getSizePrefix() != 0) { 276 asm.emitByte(size.getSizePrefix()); 277 } 278 int rexPrefix = 0x40 | rxb; 279 if (size == QWORD) { 280 rexPrefix |= 0x08; 281 } 282 if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) { 283 asm.emitByte(rexPrefix); 284 } 285 if (prefix2 > 0xFF) { 286 asm.emitShort(prefix2); 287 } else if (prefix2 > 0) { 288 asm.emitByte(prefix2); 289 } 290 asm.emitByte(op); 291 } 292 293 protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) { 294 assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode); 295 assert assertion.checkOperands(this, size, resultReg, inputReg); 296 return true; 297 } 298 299 public OperandSize[] getAllowedSizes() { 300 return assertion.allowedSizes; 301 } 302 303 protected final boolean isSSEInstruction() { 304 if (feature == null) { 305 return false; 306 } 307 switch (feature) { 308 case SSE: 309 case SSE2: 310 case SSE3: 311 case SSSE3: 312 case SSE4A: 313 case SSE4_1: 314 case SSE4_2: 315 return true; 316 default: 317 return false; 318 } 319 } 320 321 public final OpAssertion getAssertion() { 322 return assertion; 323 } 324 325 @Override 326 public String toString() { 327 return opcode; 328 } 329 } 330 331 /** 332 * Base class for AMD64 opcodes with immediate operands. 333 */ 334 public static class AMD64ImmOp extends AMD64Op { 335 336 private final boolean immIsByte; 337 338 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) { 339 this(opcode, immIsByte, prefix, op, assertion, null); 340 } 341 342 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 343 super(opcode, 0, prefix, op, assertion, feature); 344 this.immIsByte = immIsByte; 345 } 346 347 protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) { 348 if (immIsByte) { 349 assert imm == (byte) imm; 350 asm.emitByte(imm); 351 } else { 352 size.emitImmediate(asm, imm); 353 } 354 } 355 356 protected final int immediateSize(OperandSize size) { 357 if (immIsByte) { 358 return 1; 359 } else { 360 return size.getBytes(); 361 } 362 } 363 } 364 365 /** 366 * Opcode with operand order of either RM or MR for 2 address forms. 367 */ 368 public abstract static class AMD64RROp extends AMD64Op { 369 370 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 371 super(opcode, prefix1, prefix2, op, assertion, feature); 372 } 373 374 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 375 super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature); 376 } 377 378 public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src); 379 } 380 381 /** 382 * Opcode with operand order of RM. 383 */ 384 public static class AMD64RMOp extends AMD64RROp { 385 // @formatter:off 386 public static final AMD64RMOp IMUL = new AMD64RMOp("IMUL", P_0F, 0xAF, OpAssertion.ByteOrLargerAssertion); 387 public static final AMD64RMOp BSF = new AMD64RMOp("BSF", P_0F, 0xBC); 388 public static final AMD64RMOp BSR = new AMD64RMOp("BSR", P_0F, 0xBD); 389 // POPCNT, TZCNT, and LZCNT support word operation. However, the legacy size prefix should 390 // be emitted before the mandatory prefix 0xF3. Since we are not emitting bit count for 391 // 16-bit operands, here we simply use DwordOrLargerAssertion. 392 public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, OpAssertion.DwordOrLargerAssertion, CPUFeature.POPCNT); 393 public static final AMD64RMOp TZCNT = new AMD64RMOp("TZCNT", 0xF3, P_0F, 0xBC, OpAssertion.DwordOrLargerAssertion, CPUFeature.BMI1); 394 public static final AMD64RMOp LZCNT = new AMD64RMOp("LZCNT", 0xF3, P_0F, 0xBD, OpAssertion.DwordOrLargerAssertion, CPUFeature.LZCNT); 395 public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB", P_0F, 0xB6, false, true, OpAssertion.WordOrLargerAssertion); 396 public static final AMD64RMOp MOVZX = new AMD64RMOp("MOVZX", P_0F, 0xB7, OpAssertion.DwordOrLargerAssertion); 397 public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB", P_0F, 0xBE, false, true, OpAssertion.WordOrLargerAssertion); 398 public static final AMD64RMOp MOVSX = new AMD64RMOp("MOVSX", P_0F, 0xBF, OpAssertion.DwordOrLargerAssertion); 399 public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD", 0x63, OpAssertion.QwordAssertion); 400 public static final AMD64RMOp MOVB = new AMD64RMOp("MOVB", 0x8A, OpAssertion.ByteAssertion); 401 public static final AMD64RMOp MOV = new AMD64RMOp("MOV", 0x8B); 402 public static final AMD64RMOp CMP = new AMD64RMOp("CMP", 0x3B); 403 404 // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix 405 public static final AMD64RMOp MOVD = new AMD64RMOp("MOVD", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 406 public static final AMD64RMOp MOVQ = new AMD64RMOp("MOVQ", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 407 public static final AMD64RMOp MOVSS = new AMD64RMOp("MOVSS", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE); 408 public static final AMD64RMOp MOVSD = new AMD64RMOp("MOVSD", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE); 409 410 // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient. 411 public static final AMD64RMOp TESTB = new AMD64RMOp("TEST", 0x84, OpAssertion.ByteAssertion); 412 public static final AMD64RMOp TEST = new AMD64RMOp("TEST", 0x85); 413 // @formatter:on 414 415 protected AMD64RMOp(String opcode, int op) { 416 this(opcode, 0, op); 417 } 418 419 protected AMD64RMOp(String opcode, int op, OpAssertion assertion) { 420 this(opcode, 0, op, assertion); 421 } 422 423 protected AMD64RMOp(String opcode, int prefix, int op) { 424 this(opcode, 0, prefix, op, null); 425 } 426 427 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) { 428 this(opcode, 0, prefix, op, assertion, null); 429 } 430 431 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 432 this(opcode, 0, prefix, op, assertion, feature); 433 } 434 435 protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) { 436 super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null); 437 } 438 439 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) { 440 this(opcode, prefix1, prefix2, op, OpAssertion.WordOrLargerAssertion, feature); 441 } 442 443 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 444 super(opcode, prefix1, prefix2, op, assertion, feature); 445 } 446 447 @Override 448 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 449 assert verify(asm, size, dst, src); 450 if (isSSEInstruction()) { 451 Register nds = Register.None; 452 switch (op) { 453 case 0x10: 454 case 0x51: 455 if ((size == SS) || (size == SD)) { 456 nds = dst; 457 } 458 break; 459 case 0x2A: 460 case 0x54: 461 case 0x55: 462 case 0x56: 463 case 0x57: 464 case 0x58: 465 case 0x59: 466 case 0x5A: 467 case 0x5C: 468 case 0x5D: 469 case 0x5E: 470 case 0x5F: 471 nds = dst; 472 break; 473 default: 474 break; 475 } 476 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, size == QWORD); 477 asm.emitByte(op); 478 asm.emitModRM(dst, src); 479 } else { 480 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 481 asm.emitModRM(dst, src); 482 } 483 } 484 485 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) { 486 assert verify(asm, size, dst, null); 487 if (isSSEInstruction()) { 488 Register nds = Register.None; 489 switch (op) { 490 case 0x51: 491 if ((size == SS) || (size == SD)) { 492 nds = dst; 493 } 494 break; 495 case 0x2A: 496 case 0x54: 497 case 0x55: 498 case 0x56: 499 case 0x57: 500 case 0x58: 501 case 0x59: 502 case 0x5A: 503 case 0x5C: 504 case 0x5D: 505 case 0x5E: 506 case 0x5F: 507 nds = dst; 508 break; 509 default: 510 break; 511 } 512 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, size == QWORD); 513 asm.emitByte(op); 514 asm.emitOperandHelper(dst, src, 0); 515 } else { 516 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 517 asm.emitOperandHelper(dst, src, 0); 518 } 519 } 520 } 521 522 /** 523 * Opcode with operand order of MR. 524 */ 525 public static class AMD64MROp extends AMD64RROp { 526 // @formatter:off 527 public static final AMD64MROp MOVB = new AMD64MROp("MOVB", 0x88, OpAssertion.ByteAssertion); 528 public static final AMD64MROp MOV = new AMD64MROp("MOV", 0x89); 529 530 // MOVD and MOVQ are the same opcode, just with different operand size prefix 531 // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used. 532 public static final AMD64MROp MOVD = new AMD64MROp("MOVD", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 533 public static final AMD64MROp MOVQ = new AMD64MROp("MOVQ", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 534 535 // MOVSS and MOVSD are the same opcode, just with different operand size prefix 536 public static final AMD64MROp MOVSS = new AMD64MROp("MOVSS", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE); 537 public static final AMD64MROp MOVSD = new AMD64MROp("MOVSD", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE); 538 // @formatter:on 539 540 protected AMD64MROp(String opcode, int op) { 541 this(opcode, 0, op); 542 } 543 544 protected AMD64MROp(String opcode, int op, OpAssertion assertion) { 545 this(opcode, 0, op, assertion); 546 } 547 548 protected AMD64MROp(String opcode, int prefix, int op) { 549 this(opcode, prefix, op, OpAssertion.WordOrLargerAssertion); 550 } 551 552 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) { 553 this(opcode, prefix, op, assertion, null); 554 } 555 556 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 557 this(opcode, 0, prefix, op, assertion, feature); 558 } 559 560 protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 561 super(opcode, prefix1, prefix2, op, assertion, feature); 562 } 563 564 @Override 565 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 566 assert verify(asm, size, src, dst); 567 if (isSSEInstruction()) { 568 Register nds = Register.None; 569 switch (op) { 570 case 0x11: 571 if ((size == SS) || (size == SD)) { 572 nds = src; 573 } 574 break; 575 default: 576 break; 577 } 578 asm.simdPrefix(src, nds, dst, size, prefix1, prefix2, size == QWORD); 579 asm.emitByte(op); 580 asm.emitModRM(src, dst); 581 } else { 582 emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding); 583 asm.emitModRM(src, dst); 584 } 585 } 586 587 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) { 588 assert verify(asm, size, src, null); 589 if (isSSEInstruction()) { 590 asm.simdPrefix(src, Register.None, dst, size, prefix1, prefix2, size == QWORD); 591 asm.emitByte(op); 592 } else { 593 emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0); 594 } 595 asm.emitOperandHelper(src, dst, 0); 596 } 597 } 598 599 /** 600 * Opcodes with operand order of M. 601 */ 602 public static class AMD64MOp extends AMD64Op { 603 // @formatter:off 604 public static final AMD64MOp NOT = new AMD64MOp("NOT", 0xF7, 2); 605 public static final AMD64MOp NEG = new AMD64MOp("NEG", 0xF7, 3); 606 public static final AMD64MOp MUL = new AMD64MOp("MUL", 0xF7, 4); 607 public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5); 608 public static final AMD64MOp DIV = new AMD64MOp("DIV", 0xF7, 6); 609 public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7); 610 public static final AMD64MOp INC = new AMD64MOp("INC", 0xFF, 0); 611 public static final AMD64MOp DEC = new AMD64MOp("DEC", 0xFF, 1); 612 public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6); 613 public static final AMD64MOp POP = new AMD64MOp("POP", 0x8F, 0, OpAssertion.WordOrDwordAssertion); 614 // @formatter:on 615 616 private final int ext; 617 618 protected AMD64MOp(String opcode, int op, int ext) { 619 this(opcode, 0, op, ext); 620 } 621 622 protected AMD64MOp(String opcode, int prefix, int op, int ext) { 623 this(opcode, prefix, op, ext, OpAssertion.WordOrLargerAssertion); 624 } 625 626 protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) { 627 this(opcode, 0, op, ext, assertion); 628 } 629 630 protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) { 631 super(opcode, 0, prefix, op, assertion, null); 632 this.ext = ext; 633 } 634 635 public final void emit(AMD64Assembler asm, OperandSize size, Register dst) { 636 assert verify(asm, size, dst, null); 637 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 638 asm.emitModRM(ext, dst); 639 } 640 641 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) { 642 assert verify(asm, size, null, null); 643 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 644 asm.emitOperandHelper(ext, dst, 0); 645 } 646 } 647 648 /** 649 * Opcodes with operand order of MI. 650 */ 651 public static class AMD64MIOp extends AMD64ImmOp { 652 // @formatter:off 653 public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true, 0xC6, 0, OpAssertion.ByteAssertion); 654 public static final AMD64MIOp MOV = new AMD64MIOp("MOV", false, 0xC7, 0); 655 public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0); 656 // @formatter:on 657 658 private final int ext; 659 660 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) { 661 this(opcode, immIsByte, op, ext, OpAssertion.WordOrLargerAssertion); 662 } 663 664 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) { 665 this(opcode, immIsByte, 0, op, ext, assertion); 666 } 667 668 protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) { 669 super(opcode, immIsByte, prefix, op, assertion); 670 this.ext = ext; 671 } 672 673 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) { 674 emit(asm, size, dst, imm, false); 675 } 676 677 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm, boolean annotateImm) { 678 assert verify(asm, size, dst, null); 679 int insnPos = asm.position(); 680 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 681 asm.emitModRM(ext, dst); 682 int immPos = asm.position(); 683 emitImmediate(asm, size, imm); 684 int nextInsnPos = asm.position(); 685 if (annotateImm && asm.codePatchingAnnotationConsumer != null) { 686 asm.codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 687 } 688 } 689 690 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) { 691 emit(asm, size, dst, imm, false); 692 } 693 694 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm, boolean annotateImm) { 695 assert verify(asm, size, null, null); 696 int insnPos = asm.position(); 697 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 698 asm.emitOperandHelper(ext, dst, immediateSize(size)); 699 int immPos = asm.position(); 700 emitImmediate(asm, size, imm); 701 int nextInsnPos = asm.position(); 702 if (annotateImm && asm.codePatchingAnnotationConsumer != null) { 703 asm.codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 704 } 705 } 706 } 707 708 /** 709 * Opcodes with operand order of RMI. 710 * 711 * We only have one form of round as the operation is always treated with single variant input, 712 * making its extension to 3 address forms redundant. 713 */ 714 public static class AMD64RMIOp extends AMD64ImmOp { 715 // @formatter:off 716 public static final AMD64RMIOp IMUL = new AMD64RMIOp("IMUL", false, 0x69); 717 public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true, 0x6B); 718 public static final AMD64RMIOp ROUNDSS = new AMD64RMIOp("ROUNDSS", true, P_0F3A, 0x0A, OpAssertion.PackedDoubleAssertion, CPUFeature.SSE4_1); 719 public static final AMD64RMIOp ROUNDSD = new AMD64RMIOp("ROUNDSD", true, P_0F3A, 0x0B, OpAssertion.PackedDoubleAssertion, CPUFeature.SSE4_1); 720 // @formatter:on 721 722 protected AMD64RMIOp(String opcode, boolean immIsByte, int op) { 723 this(opcode, immIsByte, 0, op, OpAssertion.WordOrLargerAssertion, null); 724 } 725 726 protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 727 super(opcode, immIsByte, prefix, op, assertion, feature); 728 } 729 730 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) { 731 assert verify(asm, size, dst, src); 732 if (isSSEInstruction()) { 733 Register nds = Register.None; 734 switch (op) { 735 case 0x0A: 736 case 0x0B: 737 nds = dst; 738 break; 739 default: 740 break; 741 } 742 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, false); 743 asm.emitByte(op); 744 asm.emitModRM(dst, src); 745 } else { 746 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 747 asm.emitModRM(dst, src); 748 } 749 emitImmediate(asm, size, imm); 750 } 751 752 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) { 753 assert verify(asm, size, dst, null); 754 if (isSSEInstruction()) { 755 Register nds = Register.None; 756 switch (op) { 757 case 0x0A: 758 case 0x0B: 759 nds = dst; 760 break; 761 default: 762 break; 763 } 764 asm.simdPrefix(dst, nds, src, size, prefix1, prefix2, false); 765 asm.emitByte(op); 766 } else { 767 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 768 } 769 asm.emitOperandHelper(dst, src, immediateSize(size)); 770 emitImmediate(asm, size, imm); 771 } 772 } 773 774 public static class SSEOp extends AMD64RMOp { 775 // @formatter:off 776 public static final SSEOp CVTSI2SS = new SSEOp("CVTSI2SS", 0xF3, P_0F, 0x2A, OpAssertion.IntToFloatAssertion); 777 public static final SSEOp CVTSI2SD = new SSEOp("CVTSI2SD", 0xF2, P_0F, 0x2A, OpAssertion.IntToFloatAssertion); 778 public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatToIntAssertion); 779 public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatToIntAssertion); 780 public static final SSEOp UCOMIS = new SSEOp("UCOMIS", P_0F, 0x2E, OpAssertion.PackedFloatAssertion); 781 public static final SSEOp SQRT = new SSEOp("SQRT", P_0F, 0x51); 782 public static final SSEOp AND = new SSEOp("AND", P_0F, 0x54, OpAssertion.PackedFloatAssertion); 783 public static final SSEOp ANDN = new SSEOp("ANDN", P_0F, 0x55, OpAssertion.PackedFloatAssertion); 784 public static final SSEOp OR = new SSEOp("OR", P_0F, 0x56, OpAssertion.PackedFloatAssertion); 785 public static final SSEOp XOR = new SSEOp("XOR", P_0F, 0x57, OpAssertion.PackedFloatAssertion); 786 public static final SSEOp ADD = new SSEOp("ADD", P_0F, 0x58); 787 public static final SSEOp MUL = new SSEOp("MUL", P_0F, 0x59); 788 public static final SSEOp CVTSS2SD = new SSEOp("CVTSS2SD", P_0F, 0x5A, OpAssertion.SingleAssertion); 789 public static final SSEOp CVTSD2SS = new SSEOp("CVTSD2SS", P_0F, 0x5A, OpAssertion.DoubleAssertion); 790 public static final SSEOp SUB = new SSEOp("SUB", P_0F, 0x5C); 791 public static final SSEOp MIN = new SSEOp("MIN", P_0F, 0x5D); 792 public static final SSEOp DIV = new SSEOp("DIV", P_0F, 0x5E); 793 public static final SSEOp MAX = new SSEOp("MAX", P_0F, 0x5F); 794 // @formatter:on 795 796 protected SSEOp(String opcode, int prefix, int op) { 797 this(opcode, prefix, op, OpAssertion.FloatAssertion); 798 } 799 800 protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) { 801 this(opcode, 0, prefix, op, assertion); 802 } 803 804 protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) { 805 super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2); 806 } 807 } 808 809 /** 810 * Arithmetic operation with operand order of RM, MR or MI. 811 */ 812 public static final class AMD64BinaryArithmetic { 813 // @formatter:off 814 public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0); 815 public static final AMD64BinaryArithmetic OR = new AMD64BinaryArithmetic("OR", 1); 816 public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2); 817 public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3); 818 public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4); 819 public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5); 820 public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6); 821 public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7); 822 // @formatter:on 823 824 private final AMD64MIOp byteImmOp; 825 private final AMD64MROp byteMrOp; 826 private final AMD64RMOp byteRmOp; 827 828 private final AMD64MIOp immOp; 829 private final AMD64MIOp immSxOp; 830 private final AMD64MROp mrOp; 831 private final AMD64RMOp rmOp; 832 833 private AMD64BinaryArithmetic(String opcode, int code) { 834 int baseOp = code << 3; 835 836 byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion); 837 byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion); 838 byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion); 839 840 immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.WordOrLargerAssertion); 841 immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.WordOrLargerAssertion); 842 mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.WordOrLargerAssertion); 843 rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.WordOrLargerAssertion); 844 } 845 846 public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) { 847 if (size == BYTE) { 848 return byteImmOp; 849 } else if (sx) { 850 return immSxOp; 851 } else { 852 return immOp; 853 } 854 } 855 856 public AMD64MROp getMROpcode(OperandSize size) { 857 if (size == BYTE) { 858 return byteMrOp; 859 } else { 860 return mrOp; 861 } 862 } 863 864 public AMD64RMOp getRMOpcode(OperandSize size) { 865 if (size == BYTE) { 866 return byteRmOp; 867 } else { 868 return rmOp; 869 } 870 } 871 } 872 873 /** 874 * Shift operation with operand order of M1, MC or MI. 875 */ 876 public static final class AMD64Shift { 877 // @formatter:off 878 public static final AMD64Shift ROL = new AMD64Shift("ROL", 0); 879 public static final AMD64Shift ROR = new AMD64Shift("ROR", 1); 880 public static final AMD64Shift RCL = new AMD64Shift("RCL", 2); 881 public static final AMD64Shift RCR = new AMD64Shift("RCR", 3); 882 public static final AMD64Shift SHL = new AMD64Shift("SHL", 4); 883 public static final AMD64Shift SHR = new AMD64Shift("SHR", 5); 884 public static final AMD64Shift SAR = new AMD64Shift("SAR", 7); 885 // @formatter:on 886 887 public final AMD64MOp m1Op; 888 public final AMD64MOp mcOp; 889 public final AMD64MIOp miOp; 890 891 private AMD64Shift(String opcode, int code) { 892 m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.WordOrLargerAssertion); 893 mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.WordOrLargerAssertion); 894 miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.WordOrLargerAssertion); 895 } 896 } 897 898 private enum VEXOpAssertion { 899 AVX1(CPUFeature.AVX, CPUFeature.AVX), 900 AVX1_2(CPUFeature.AVX, CPUFeature.AVX2), 901 AVX2(CPUFeature.AVX2, CPUFeature.AVX2), 902 AVX1_128ONLY(CPUFeature.AVX, null), 903 AVX1_256ONLY(null, CPUFeature.AVX), 904 AVX2_256ONLY(null, CPUFeature.AVX2), 905 XMM_CPU(CPUFeature.AVX, null, XMM, null, CPU, null), 906 XMM_XMM_CPU(CPUFeature.AVX, null, XMM, XMM, CPU, null), 907 CPU_XMM(CPUFeature.AVX, null, CPU, null, XMM, null), 908 AVX1_2_CPU_XMM(CPUFeature.AVX, CPUFeature.AVX2, CPU, null, XMM, null), 909 BMI1(CPUFeature.BMI1, null, CPU, CPU, CPU, null), 910 BMI2(CPUFeature.BMI2, null, CPU, CPU, CPU, null), 911 FMA(CPUFeature.FMA, null, XMM, XMM, XMM, null); 912 913 private final CPUFeature l128feature; 914 private final CPUFeature l256feature; 915 916 private final RegisterCategory rCategory; 917 private final RegisterCategory vCategory; 918 private final RegisterCategory mCategory; 919 private final RegisterCategory imm8Category; 920 921 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature) { 922 this(l128feature, l256feature, XMM, XMM, XMM, XMM); 923 } 924 925 VEXOpAssertion(CPUFeature l128feature, CPUFeature l256feature, RegisterCategory rCategory, RegisterCategory vCategory, RegisterCategory mCategory, RegisterCategory imm8Category) { 926 this.l128feature = l128feature; 927 this.l256feature = l256feature; 928 this.rCategory = rCategory; 929 this.vCategory = vCategory; 930 this.mCategory = mCategory; 931 this.imm8Category = imm8Category; 932 } 933 934 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m) { 935 return check(arch, getLFlag(size), r, v, m, null); 936 } 937 938 public boolean check(AMD64 arch, AVXSize size, Register r, Register v, Register m, Register imm8) { 939 return check(arch, getLFlag(size), r, v, m, imm8); 940 } 941 942 public boolean check(AMD64 arch, int l, Register r, Register v, Register m, Register imm8) { 943 switch (l) { 944 case L128: 945 assert l128feature != null && arch.getFeatures().contains(l128feature) : "emitting illegal 128 bit instruction"; 946 break; 947 case L256: 948 assert l256feature != null && arch.getFeatures().contains(l256feature) : "emitting illegal 256 bit instruction"; 949 break; 950 } 951 if (r != null) { 952 assert r.getRegisterCategory().equals(rCategory); 953 } 954 if (v != null) { 955 assert v.getRegisterCategory().equals(vCategory); 956 } 957 if (m != null) { 958 assert m.getRegisterCategory().equals(mCategory); 959 } 960 if (imm8 != null) { 961 assert imm8.getRegisterCategory().equals(imm8Category); 962 } 963 return true; 964 } 965 966 public boolean supports(EnumSet<CPUFeature> features, AVXSize avxSize) { 967 switch (avxSize) { 968 case XMM: 969 return l128feature != null && features.contains(l128feature); 970 case YMM: 971 return l256feature != null && features.contains(l256feature); 972 default: 973 throw GraalError.shouldNotReachHere(); 974 } 975 } 976 } 977 978 /** 979 * Base class for VEX-encoded instructions. 980 */ 981 public static class VexOp { 982 protected final int pp; 983 protected final int mmmmm; 984 protected final int w; 985 protected final int op; 986 987 private final String opcode; 988 protected final VEXOpAssertion assertion; 989 990 protected VexOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 991 this.pp = pp; 992 this.mmmmm = mmmmm; 993 this.w = w; 994 this.op = op; 995 this.opcode = opcode; 996 this.assertion = assertion; 997 } 998 999 public final boolean isSupported(AMD64Assembler vasm, AVXSize size) { 1000 return assertion.supports(((AMD64) vasm.target.arch).getFeatures(), size); 1001 } 1002 1003 @Override 1004 public String toString() { 1005 return opcode; 1006 } 1007 } 1008 1009 /** 1010 * VEX-encoded instructions with an operand order of RM, but the M operand must be a register. 1011 */ 1012 public static class VexRROp extends VexOp { 1013 // @formatter:off 1014 public static final VexRROp VMASKMOVDQU = new VexRROp("VMASKMOVDQU", P_66, M_0F, WIG, 0xF7, VEXOpAssertion.AVX1_128ONLY); 1015 // @formatter:on 1016 1017 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op) { 1018 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1019 } 1020 1021 protected VexRROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1022 super(opcode, pp, mmmmm, w, op, assertion); 1023 } 1024 1025 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1026 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); 1027 assert op != 0x1A || op != 0x5A; 1028 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1029 asm.emitByte(op); 1030 asm.emitModRM(dst, src); 1031 } 1032 } 1033 1034 /** 1035 * VEX-encoded instructions with an operand order of RM. 1036 */ 1037 public static class VexRMOp extends VexRROp { 1038 // @formatter:off 1039 public static final VexRMOp VCVTTSS2SI = new VexRMOp("VCVTTSS2SI", P_F3, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM); 1040 public static final VexRMOp VCVTTSS2SQ = new VexRMOp("VCVTTSS2SQ", P_F3, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM); 1041 public static final VexRMOp VCVTTSD2SI = new VexRMOp("VCVTTSD2SI", P_F2, M_0F, W0, 0x2C, VEXOpAssertion.CPU_XMM); 1042 public static final VexRMOp VCVTTSD2SQ = new VexRMOp("VCVTTSD2SQ", P_F2, M_0F, W1, 0x2C, VEXOpAssertion.CPU_XMM); 1043 public static final VexRMOp VCVTPS2PD = new VexRMOp("VCVTPS2PD", P_, M_0F, WIG, 0x5A); 1044 public static final VexRMOp VCVTPD2PS = new VexRMOp("VCVTPD2PS", P_66, M_0F, WIG, 0x5A); 1045 public static final VexRMOp VCVTDQ2PS = new VexRMOp("VCVTDQ2PS", P_, M_0F, WIG, 0x5B); 1046 public static final VexRMOp VCVTTPS2DQ = new VexRMOp("VCVTTPS2DQ", P_F3, M_0F, WIG, 0x5B); 1047 public static final VexRMOp VCVTTPD2DQ = new VexRMOp("VCVTTPD2DQ", P_66, M_0F, WIG, 0xE6); 1048 public static final VexRMOp VCVTDQ2PD = new VexRMOp("VCVTDQ2PD", P_F3, M_0F, WIG, 0xE6); 1049 public static final VexRMOp VBROADCASTSS = new VexRMOp("VBROADCASTSS", P_66, M_0F38, W0, 0x18); 1050 public static final VexRMOp VBROADCASTSD = new VexRMOp("VBROADCASTSD", P_66, M_0F38, W0, 0x19, VEXOpAssertion.AVX1_256ONLY); 1051 public static final VexRMOp VBROADCASTF128 = new VexRMOp("VBROADCASTF128", P_66, M_0F38, W0, 0x1A, VEXOpAssertion.AVX1_256ONLY); 1052 public static final VexRMOp VPBROADCASTI128 = new VexRMOp("VPBROADCASTI128", P_66, M_0F38, W0, 0x5A, VEXOpAssertion.AVX2_256ONLY); 1053 public static final VexRMOp VPBROADCASTB = new VexRMOp("VPBROADCASTB", P_66, M_0F38, W0, 0x78, VEXOpAssertion.AVX2); 1054 public static final VexRMOp VPBROADCASTW = new VexRMOp("VPBROADCASTW", P_66, M_0F38, W0, 0x79, VEXOpAssertion.AVX2); 1055 public static final VexRMOp VPBROADCASTD = new VexRMOp("VPBROADCASTD", P_66, M_0F38, W0, 0x58, VEXOpAssertion.AVX2); 1056 public static final VexRMOp VPBROADCASTQ = new VexRMOp("VPBROADCASTQ", P_66, M_0F38, W0, 0x59, VEXOpAssertion.AVX2); 1057 public static final VexRMOp VPMOVMSKB = new VexRMOp("VPMOVMSKB", P_66, M_0F, WIG, 0xD7, VEXOpAssertion.AVX1_2_CPU_XMM); 1058 public static final VexRMOp VPMOVSXBW = new VexRMOp("VPMOVSXBW", P_66, M_0F38, WIG, 0x20); 1059 public static final VexRMOp VPMOVSXBD = new VexRMOp("VPMOVSXBD", P_66, M_0F38, WIG, 0x21); 1060 public static final VexRMOp VPMOVSXBQ = new VexRMOp("VPMOVSXBQ", P_66, M_0F38, WIG, 0x22); 1061 public static final VexRMOp VPMOVSXWD = new VexRMOp("VPMOVSXWD", P_66, M_0F38, WIG, 0x23); 1062 public static final VexRMOp VPMOVSXWQ = new VexRMOp("VPMOVSXWQ", P_66, M_0F38, WIG, 0x24); 1063 public static final VexRMOp VPMOVSXDQ = new VexRMOp("VPMOVSXDQ", P_66, M_0F38, WIG, 0x25); 1064 public static final VexRMOp VPMOVZXBW = new VexRMOp("VPMOVZXBW", P_66, M_0F38, WIG, 0x30); 1065 public static final VexRMOp VPMOVZXBD = new VexRMOp("VPMOVZXBD", P_66, M_0F38, WIG, 0x31); 1066 public static final VexRMOp VPMOVZXBQ = new VexRMOp("VPMOVZXBQ", P_66, M_0F38, WIG, 0x32); 1067 public static final VexRMOp VPMOVZXWD = new VexRMOp("VPMOVZXWD", P_66, M_0F38, WIG, 0x33); 1068 public static final VexRMOp VPMOVZXWQ = new VexRMOp("VPMOVZXWQ", P_66, M_0F38, WIG, 0x34); 1069 public static final VexRMOp VPMOVZXDQ = new VexRMOp("VPMOVZXDQ", P_66, M_0F38, WIG, 0x35); 1070 public static final VexRMOp VPTEST = new VexRMOp("VPTEST", P_66, M_0F38, WIG, 0x17); 1071 public static final VexRMOp VSQRTPD = new VexRMOp("VSQRTPD", P_66, M_0F, WIG, 0x51); 1072 public static final VexRMOp VSQRTPS = new VexRMOp("VSQRTPS", P_, M_0F, WIG, 0x51); 1073 public static final VexRMOp VSQRTSD = new VexRMOp("VSQRTSD", P_F2, M_0F, WIG, 0x51); 1074 public static final VexRMOp VSQRTSS = new VexRMOp("VSQRTSS", P_F3, M_0F, WIG, 0x51); 1075 public static final VexRMOp VUCOMISS = new VexRMOp("VUCOMISS", P_, M_0F, WIG, 0x2E); 1076 public static final VexRMOp VUCOMISD = new VexRMOp("VUCOMISD", P_66, M_0F, WIG, 0x2E); 1077 // @formatter:on 1078 1079 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op) { 1080 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1081 } 1082 1083 protected VexRMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1084 super(opcode, pp, mmmmm, w, op, assertion); 1085 } 1086 1087 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) { 1088 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1089 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1090 asm.emitByte(op); 1091 asm.emitOperandHelper(dst, src, 0); 1092 } 1093 } 1094 1095 /** 1096 * VEX-encoded move instructions. 1097 * <p> 1098 * These instructions have two opcodes: op is the forward move instruction with an operand order 1099 * of RM, and opReverse is the reverse move instruction with an operand order of MR. 1100 */ 1101 public static final class VexMoveOp extends VexRMOp { 1102 // @formatter:off 1103 public static final VexMoveOp VMOVDQA = new VexMoveOp("VMOVDQA", P_66, M_0F, WIG, 0x6F, 0x7F); 1104 public static final VexMoveOp VMOVDQU = new VexMoveOp("VMOVDQU", P_F3, M_0F, WIG, 0x6F, 0x7F); 1105 public static final VexMoveOp VMOVAPS = new VexMoveOp("VMOVAPS", P_, M_0F, WIG, 0x28, 0x29); 1106 public static final VexMoveOp VMOVAPD = new VexMoveOp("VMOVAPD", P_66, M_0F, WIG, 0x28, 0x29); 1107 public static final VexMoveOp VMOVUPS = new VexMoveOp("VMOVUPS", P_, M_0F, WIG, 0x10, 0x11); 1108 public static final VexMoveOp VMOVUPD = new VexMoveOp("VMOVUPD", P_66, M_0F, WIG, 0x10, 0x11); 1109 public static final VexMoveOp VMOVSS = new VexMoveOp("VMOVSS", P_F3, M_0F, WIG, 0x10, 0x11); 1110 public static final VexMoveOp VMOVSD = new VexMoveOp("VMOVSD", P_F2, M_0F, WIG, 0x10, 0x11); 1111 public static final VexMoveOp VMOVD = new VexMoveOp("VMOVD", P_66, M_0F, W0, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU); 1112 public static final VexMoveOp VMOVQ = new VexMoveOp("VMOVQ", P_66, M_0F, W1, 0x6E, 0x7E, VEXOpAssertion.XMM_CPU); 1113 // @formatter:on 1114 1115 private final int opReverse; 1116 1117 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) { 1118 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1); 1119 } 1120 1121 private VexMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) { 1122 super(opcode, pp, mmmmm, w, op, assertion); 1123 this.opReverse = opReverse; 1124 } 1125 1126 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src) { 1127 assert assertion.check((AMD64) asm.target.arch, size, src, null, null); 1128 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1129 asm.emitByte(opReverse); 1130 asm.emitOperandHelper(src, dst, 0); 1131 } 1132 1133 public void emitReverse(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1134 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); 1135 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1136 asm.emitByte(opReverse); 1137 asm.emitModRM(src, dst); 1138 } 1139 } 1140 1141 public interface VexRRIOp { 1142 void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8); 1143 } 1144 1145 /** 1146 * VEX-encoded instructions with an operand order of RMI. 1147 */ 1148 public static final class VexRMIOp extends VexOp implements VexRRIOp { 1149 // @formatter:off 1150 public static final VexRMIOp VPERMQ = new VexRMIOp("VPERMQ", P_66, M_0F3A, W1, 0x00, VEXOpAssertion.AVX2_256ONLY); 1151 public static final VexRMIOp VPSHUFLW = new VexRMIOp("VPSHUFLW", P_F2, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1152 public static final VexRMIOp VPSHUFHW = new VexRMIOp("VPSHUFHW", P_F3, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1153 public static final VexRMIOp VPSHUFD = new VexRMIOp("VPSHUFD", P_66, M_0F, WIG, 0x70, VEXOpAssertion.AVX1_2); 1154 // @formatter:on 1155 1156 private VexRMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1157 super(opcode, pp, mmmmm, w, op, assertion); 1158 } 1159 1160 @Override 1161 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1162 assert assertion.check((AMD64) asm.target.arch, size, dst, null, src); 1163 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1164 asm.emitByte(op); 1165 asm.emitModRM(dst, src); 1166 asm.emitByte(imm8); 1167 } 1168 1169 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src, int imm8) { 1170 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1171 asm.vexPrefix(dst, Register.None, src, size, pp, mmmmm, w, false); 1172 asm.emitByte(op); 1173 asm.emitOperandHelper(dst, src, 1); 1174 asm.emitByte(imm8); 1175 } 1176 } 1177 1178 /** 1179 * VEX-encoded instructions with an operand order of MRI. 1180 */ 1181 public static final class VexMRIOp extends VexOp implements VexRRIOp { 1182 // @formatter:off 1183 public static final VexMRIOp VEXTRACTF128 = new VexMRIOp("VEXTRACTF128", P_66, M_0F3A, W0, 0x19, VEXOpAssertion.AVX1_256ONLY); 1184 public static final VexMRIOp VEXTRACTI128 = new VexMRIOp("VEXTRACTI128", P_66, M_0F3A, W0, 0x39, VEXOpAssertion.AVX2_256ONLY); 1185 public static final VexMRIOp VPEXTRB = new VexMRIOp("VPEXTRB", P_66, M_0F3A, W0, 0x14, VEXOpAssertion.XMM_CPU); 1186 public static final VexMRIOp VPEXTRW = new VexMRIOp("VPEXTRW", P_66, M_0F3A, W0, 0x15, VEXOpAssertion.XMM_CPU); 1187 public static final VexMRIOp VPEXTRD = new VexMRIOp("VPEXTRD", P_66, M_0F3A, W0, 0x16, VEXOpAssertion.XMM_CPU); 1188 public static final VexMRIOp VPEXTRQ = new VexMRIOp("VPEXTRQ", P_66, M_0F3A, W1, 0x16, VEXOpAssertion.XMM_CPU); 1189 // @formatter:on 1190 1191 private VexMRIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1192 super(opcode, pp, mmmmm, w, op, assertion); 1193 } 1194 1195 @Override 1196 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1197 assert assertion.check((AMD64) asm.target.arch, size, src, null, dst); 1198 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1199 asm.emitByte(op); 1200 asm.emitModRM(src, dst); 1201 asm.emitByte(imm8); 1202 } 1203 1204 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register src, int imm8) { 1205 assert assertion.check((AMD64) asm.target.arch, size, src, null, null); 1206 asm.vexPrefix(src, Register.None, dst, size, pp, mmmmm, w, false); 1207 asm.emitByte(op); 1208 asm.emitOperandHelper(src, dst, 1); 1209 asm.emitByte(imm8); 1210 } 1211 } 1212 1213 /** 1214 * VEX-encoded instructions with an operand order of RVMR. 1215 */ 1216 public static class VexRVMROp extends VexOp { 1217 // @formatter:off 1218 public static final VexRVMROp VPBLENDVB = new VexRVMROp("VPBLENDVB", P_66, M_0F3A, W0, 0x4C, VEXOpAssertion.AVX1_2); 1219 public static final VexRVMROp VPBLENDVPS = new VexRVMROp("VPBLENDVPS", P_66, M_0F3A, W0, 0x4A, VEXOpAssertion.AVX1); 1220 public static final VexRVMROp VPBLENDVPD = new VexRVMROp("VPBLENDVPD", P_66, M_0F3A, W0, 0x4B, VEXOpAssertion.AVX1); 1221 // @formatter:on 1222 1223 protected VexRVMROp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1224 super(opcode, pp, mmmmm, w, op, assertion); 1225 } 1226 1227 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, Register src2) { 1228 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, src2); 1229 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1230 asm.emitByte(op); 1231 asm.emitModRM(dst, src2); 1232 asm.emitByte(mask.encoding() << 4); 1233 } 1234 1235 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, Register src1, AMD64Address src2) { 1236 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, src1, null); 1237 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1238 asm.emitByte(op); 1239 asm.emitOperandHelper(dst, src2, 0); 1240 asm.emitByte(mask.encoding() << 4); 1241 } 1242 } 1243 1244 /** 1245 * VEX-encoded instructions with an operand order of RVM. 1246 */ 1247 public static class VexRVMOp extends VexOp { 1248 // @formatter:off 1249 public static final VexRVMOp VANDPS = new VexRVMOp("VANDPS", P_, M_0F, WIG, 0x54); 1250 public static final VexRVMOp VANDPD = new VexRVMOp("VANDPD", P_66, M_0F, WIG, 0x54); 1251 public static final VexRVMOp VANDNPS = new VexRVMOp("VANDNPS", P_, M_0F, WIG, 0x55); 1252 public static final VexRVMOp VANDNPD = new VexRVMOp("VANDNPD", P_66, M_0F, WIG, 0x55); 1253 public static final VexRVMOp VORPS = new VexRVMOp("VORPS", P_, M_0F, WIG, 0x56); 1254 public static final VexRVMOp VORPD = new VexRVMOp("VORPD", P_66, M_0F, WIG, 0x56); 1255 public static final VexRVMOp VXORPS = new VexRVMOp("VXORPS", P_, M_0F, WIG, 0x57); 1256 public static final VexRVMOp VXORPD = new VexRVMOp("VXORPD", P_66, M_0F, WIG, 0x57); 1257 public static final VexRVMOp VADDPS = new VexRVMOp("VADDPS", P_, M_0F, WIG, 0x58); 1258 public static final VexRVMOp VADDPD = new VexRVMOp("VADDPD", P_66, M_0F, WIG, 0x58); 1259 public static final VexRVMOp VADDSS = new VexRVMOp("VADDSS", P_F3, M_0F, WIG, 0x58); 1260 public static final VexRVMOp VADDSD = new VexRVMOp("VADDSD", P_F2, M_0F, WIG, 0x58); 1261 public static final VexRVMOp VMULPS = new VexRVMOp("VMULPS", P_, M_0F, WIG, 0x59); 1262 public static final VexRVMOp VMULPD = new VexRVMOp("VMULPD", P_66, M_0F, WIG, 0x59); 1263 public static final VexRVMOp VMULSS = new VexRVMOp("VMULSS", P_F3, M_0F, WIG, 0x59); 1264 public static final VexRVMOp VMULSD = new VexRVMOp("VMULSD", P_F2, M_0F, WIG, 0x59); 1265 public static final VexRVMOp VSUBPS = new VexRVMOp("VSUBPS", P_, M_0F, WIG, 0x5C); 1266 public static final VexRVMOp VSUBPD = new VexRVMOp("VSUBPD", P_66, M_0F, WIG, 0x5C); 1267 public static final VexRVMOp VSUBSS = new VexRVMOp("VSUBSS", P_F3, M_0F, WIG, 0x5C); 1268 public static final VexRVMOp VSUBSD = new VexRVMOp("VSUBSD", P_F2, M_0F, WIG, 0x5C); 1269 public static final VexRVMOp VMINPS = new VexRVMOp("VMINPS", P_, M_0F, WIG, 0x5D); 1270 public static final VexRVMOp VMINPD = new VexRVMOp("VMINPD", P_66, M_0F, WIG, 0x5D); 1271 public static final VexRVMOp VMINSS = new VexRVMOp("VMINSS", P_F3, M_0F, WIG, 0x5D); 1272 public static final VexRVMOp VMINSD = new VexRVMOp("VMINSD", P_F2, M_0F, WIG, 0x5D); 1273 public static final VexRVMOp VDIVPS = new VexRVMOp("VDIVPS", P_, M_0F, WIG, 0x5E); 1274 public static final VexRVMOp VDIVPD = new VexRVMOp("VDIVPD", P_66, M_0F, WIG, 0x5E); 1275 public static final VexRVMOp VDIVSS = new VexRVMOp("VDIVPS", P_F3, M_0F, WIG, 0x5E); 1276 public static final VexRVMOp VDIVSD = new VexRVMOp("VDIVPD", P_F2, M_0F, WIG, 0x5E); 1277 public static final VexRVMOp VMAXPS = new VexRVMOp("VMAXPS", P_, M_0F, WIG, 0x5F); 1278 public static final VexRVMOp VMAXPD = new VexRVMOp("VMAXPD", P_66, M_0F, WIG, 0x5F); 1279 public static final VexRVMOp VMAXSS = new VexRVMOp("VMAXSS", P_F3, M_0F, WIG, 0x5F); 1280 public static final VexRVMOp VMAXSD = new VexRVMOp("VMAXSD", P_F2, M_0F, WIG, 0x5F); 1281 public static final VexRVMOp VADDSUBPS = new VexRVMOp("VADDSUBPS", P_F2, M_0F, WIG, 0xD0); 1282 public static final VexRVMOp VADDSUBPD = new VexRVMOp("VADDSUBPD", P_66, M_0F, WIG, 0xD0); 1283 public static final VexRVMOp VPAND = new VexRVMOp("VPAND", P_66, M_0F, WIG, 0xDB, VEXOpAssertion.AVX1_2); 1284 public static final VexRVMOp VPOR = new VexRVMOp("VPOR", P_66, M_0F, WIG, 0xEB, VEXOpAssertion.AVX1_2); 1285 public static final VexRVMOp VPXOR = new VexRVMOp("VPXOR", P_66, M_0F, WIG, 0xEF, VEXOpAssertion.AVX1_2); 1286 public static final VexRVMOp VPADDB = new VexRVMOp("VPADDB", P_66, M_0F, WIG, 0xFC, VEXOpAssertion.AVX1_2); 1287 public static final VexRVMOp VPADDW = new VexRVMOp("VPADDW", P_66, M_0F, WIG, 0xFD, VEXOpAssertion.AVX1_2); 1288 public static final VexRVMOp VPADDD = new VexRVMOp("VPADDD", P_66, M_0F, WIG, 0xFE, VEXOpAssertion.AVX1_2); 1289 public static final VexRVMOp VPADDQ = new VexRVMOp("VPADDQ", P_66, M_0F, WIG, 0xD4, VEXOpAssertion.AVX1_2); 1290 public static final VexRVMOp VPMULHUW = new VexRVMOp("VPMULHUW", P_66, M_0F, WIG, 0xE4, VEXOpAssertion.AVX1_2); 1291 public static final VexRVMOp VPMULHW = new VexRVMOp("VPMULHW", P_66, M_0F, WIG, 0xE5, VEXOpAssertion.AVX1_2); 1292 public static final VexRVMOp VPMULLW = new VexRVMOp("VPMULLW", P_66, M_0F, WIG, 0xD5, VEXOpAssertion.AVX1_2); 1293 public static final VexRVMOp VPMULLD = new VexRVMOp("VPMULLD", P_66, M_0F38, WIG, 0x40, VEXOpAssertion.AVX1_2); 1294 public static final VexRVMOp VPSUBB = new VexRVMOp("VPSUBB", P_66, M_0F, WIG, 0xF8, VEXOpAssertion.AVX1_2); 1295 public static final VexRVMOp VPSUBW = new VexRVMOp("VPSUBW", P_66, M_0F, WIG, 0xF9, VEXOpAssertion.AVX1_2); 1296 public static final VexRVMOp VPSUBD = new VexRVMOp("VPSUBD", P_66, M_0F, WIG, 0xFA, VEXOpAssertion.AVX1_2); 1297 public static final VexRVMOp VPSUBQ = new VexRVMOp("VPSUBQ", P_66, M_0F, WIG, 0xFB, VEXOpAssertion.AVX1_2); 1298 public static final VexRVMOp VPSHUFB = new VexRVMOp("VPSHUFB", P_66, M_0F38, WIG, 0x00, VEXOpAssertion.AVX1_2); 1299 public static final VexRVMOp VCVTSD2SS = new VexRVMOp("VCVTSD2SS", P_F2, M_0F, WIG, 0x5A); 1300 public static final VexRVMOp VCVTSS2SD = new VexRVMOp("VCVTSS2SD", P_F3, M_0F, WIG, 0x5A); 1301 public static final VexRVMOp VCVTSI2SD = new VexRVMOp("VCVTSI2SD", P_F2, M_0F, W0, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1302 public static final VexRVMOp VCVTSQ2SD = new VexRVMOp("VCVTSQ2SD", P_F2, M_0F, W1, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1303 public static final VexRVMOp VCVTSI2SS = new VexRVMOp("VCVTSI2SS", P_F3, M_0F, W0, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1304 public static final VexRVMOp VCVTSQ2SS = new VexRVMOp("VCVTSQ2SS", P_F3, M_0F, W1, 0x2A, VEXOpAssertion.XMM_XMM_CPU); 1305 public static final VexRVMOp VPCMPEQB = new VexRVMOp("VPCMPEQB", P_66, M_0F, WIG, 0x74, VEXOpAssertion.AVX1_2); 1306 public static final VexRVMOp VPCMPEQW = new VexRVMOp("VPCMPEQW", P_66, M_0F, WIG, 0x75, VEXOpAssertion.AVX1_2); 1307 public static final VexRVMOp VPCMPEQD = new VexRVMOp("VPCMPEQD", P_66, M_0F, WIG, 0x76, VEXOpAssertion.AVX1_2); 1308 public static final VexRVMOp VPCMPEQQ = new VexRVMOp("VPCMPEQQ", P_66, M_0F38, WIG, 0x29, VEXOpAssertion.AVX1_2); 1309 public static final VexRVMOp VPCMPGTB = new VexRVMOp("VPCMPGTB", P_66, M_0F, WIG, 0x64, VEXOpAssertion.AVX1_2); 1310 public static final VexRVMOp VPCMPGTW = new VexRVMOp("VPCMPGTW", P_66, M_0F, WIG, 0x65, VEXOpAssertion.AVX1_2); 1311 public static final VexRVMOp VPCMPGTD = new VexRVMOp("VPCMPGTD", P_66, M_0F, WIG, 0x66, VEXOpAssertion.AVX1_2); 1312 public static final VexRVMOp VPCMPGTQ = new VexRVMOp("VPCMPGTQ", P_66, M_0F38, WIG, 0x37, VEXOpAssertion.AVX1_2); 1313 public static final VexRVMOp VFMADD231SS = new VexRVMOp("VFMADD231SS", P_66, M_0F38, W0, 0xB9, VEXOpAssertion.FMA); 1314 public static final VexRVMOp VFMADD231SD = new VexRVMOp("VFMADD231SD", P_66, M_0F38, W1, 0xB9, VEXOpAssertion.FMA); 1315 // @formatter:on 1316 1317 private VexRVMOp(String opcode, int pp, int mmmmm, int w, int op) { 1318 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1319 } 1320 1321 protected VexRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1322 super(opcode, pp, mmmmm, w, op, assertion); 1323 } 1324 1325 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1326 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1327 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1328 asm.emitByte(op); 1329 asm.emitModRM(dst, src2); 1330 } 1331 1332 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { 1333 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1334 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1335 asm.emitByte(op); 1336 asm.emitOperandHelper(dst, src2, 0); 1337 } 1338 } 1339 1340 public static final class VexGeneralPurposeRVMOp extends VexRVMOp { 1341 // @formatter:off 1342 public static final VexGeneralPurposeRVMOp ANDN = new VexGeneralPurposeRVMOp("ANDN", P_, M_0F38, WIG, 0xF2, VEXOpAssertion.BMI1); 1343 public static final VexGeneralPurposeRVMOp MULX = new VexGeneralPurposeRVMOp("MULX", P_F2, M_0F38, WIG, 0xF6, VEXOpAssertion.BMI2); 1344 public static final VexGeneralPurposeRVMOp PDEP = new VexGeneralPurposeRVMOp("PDEP", P_F2, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1345 public static final VexGeneralPurposeRVMOp PEXT = new VexGeneralPurposeRVMOp("PEXT", P_F3, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1346 // @formatter:on 1347 1348 private VexGeneralPurposeRVMOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1349 super(opcode, pp, mmmmm, w, op, assertion); 1350 } 1351 1352 @Override 1353 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1354 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, src2, null); 1355 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1356 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1357 asm.emitByte(op); 1358 asm.emitModRM(dst, src2); 1359 } 1360 1361 @Override 1362 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2) { 1363 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src1, null, null); 1364 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1365 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1366 asm.emitByte(op); 1367 asm.emitOperandHelper(dst, src2, 0); 1368 } 1369 } 1370 1371 public static final class VexGeneralPurposeRMVOp extends VexOp { 1372 // @formatter:off 1373 public static final VexGeneralPurposeRMVOp BEXTR = new VexGeneralPurposeRMVOp("BEXTR", P_, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI1); 1374 public static final VexGeneralPurposeRMVOp BZHI = new VexGeneralPurposeRMVOp("BZHI", P_, M_0F38, WIG, 0xF5, VEXOpAssertion.BMI2); 1375 public static final VexGeneralPurposeRMVOp SARX = new VexGeneralPurposeRMVOp("SARX", P_F3, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1376 public static final VexGeneralPurposeRMVOp SHRX = new VexGeneralPurposeRMVOp("SHRX", P_F2, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1377 public static final VexGeneralPurposeRMVOp SHLX = new VexGeneralPurposeRMVOp("SHLX", P_66, M_0F38, WIG, 0xF7, VEXOpAssertion.BMI2); 1378 // @formatter:on 1379 1380 private VexGeneralPurposeRMVOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1381 super(opcode, pp, mmmmm, w, op, assertion); 1382 } 1383 1384 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2) { 1385 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, src1, null); 1386 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1387 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1388 asm.emitByte(op); 1389 asm.emitModRM(dst, src1); 1390 } 1391 1392 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src1, Register src2) { 1393 assert assertion.check((AMD64) asm.target.arch, LZ, dst, src2, null, null); 1394 assert size == AVXSize.DWORD || size == AVXSize.QWORD; 1395 asm.vexPrefix(dst, src2, src1, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1396 asm.emitByte(op); 1397 asm.emitOperandHelper(dst, src1, 0); 1398 } 1399 } 1400 1401 public static final class VexGeneralPurposeRMOp extends VexRMOp { 1402 // @formatter:off 1403 public static final VexGeneralPurposeRMOp BLSI = new VexGeneralPurposeRMOp("BLSI", P_, M_0F38, WIG, 0xF3, 3, VEXOpAssertion.BMI1); 1404 public static final VexGeneralPurposeRMOp BLSMSK = new VexGeneralPurposeRMOp("BLSMSK", P_, M_0F38, WIG, 0xF3, 2, VEXOpAssertion.BMI1); 1405 public static final VexGeneralPurposeRMOp BLSR = new VexGeneralPurposeRMOp("BLSR", P_, M_0F38, WIG, 0xF3, 1, VEXOpAssertion.BMI1); 1406 // @formatter:on 1407 private final int ext; 1408 1409 private VexGeneralPurposeRMOp(String opcode, int pp, int mmmmm, int w, int op, int ext, VEXOpAssertion assertion) { 1410 super(opcode, pp, mmmmm, w, op, assertion); 1411 this.ext = ext; 1412 } 1413 1414 @Override 1415 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src) { 1416 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1417 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1418 asm.emitByte(op); 1419 asm.emitModRM(ext, src); 1420 } 1421 1422 @Override 1423 public void emit(AMD64Assembler asm, AVXSize size, Register dst, AMD64Address src) { 1424 assert assertion.check((AMD64) asm.target.arch, size, dst, null, null); 1425 asm.vexPrefix(AMD64.cpuRegisters[ext], dst, src, size, pp, mmmmm, size == AVXSize.DWORD ? W0 : W1, false); 1426 asm.emitByte(op); 1427 asm.emitOperandHelper(ext, src, 0); 1428 } 1429 } 1430 1431 /** 1432 * VEX-encoded shift instructions with an operand order of either RVM or VMI. 1433 */ 1434 public static final class VexShiftOp extends VexRVMOp implements VexRRIOp { 1435 // @formatter:off 1436 public static final VexShiftOp VPSRLW = new VexShiftOp("VPSRLW", P_66, M_0F, WIG, 0xD1, 0x71, 2); 1437 public static final VexShiftOp VPSRLD = new VexShiftOp("VPSRLD", P_66, M_0F, WIG, 0xD2, 0x72, 2); 1438 public static final VexShiftOp VPSRLQ = new VexShiftOp("VPSRLQ", P_66, M_0F, WIG, 0xD3, 0x73, 2); 1439 public static final VexShiftOp VPSRAW = new VexShiftOp("VPSRAW", P_66, M_0F, WIG, 0xE1, 0x71, 4); 1440 public static final VexShiftOp VPSRAD = new VexShiftOp("VPSRAD", P_66, M_0F, WIG, 0xE2, 0x72, 4); 1441 public static final VexShiftOp VPSLLW = new VexShiftOp("VPSLLW", P_66, M_0F, WIG, 0xF1, 0x71, 6); 1442 public static final VexShiftOp VPSLLD = new VexShiftOp("VPSLLD", P_66, M_0F, WIG, 0xF2, 0x72, 6); 1443 public static final VexShiftOp VPSLLQ = new VexShiftOp("VPSLLQ", P_66, M_0F, WIG, 0xF3, 0x73, 6); 1444 // @formatter:on 1445 1446 private final int immOp; 1447 private final int r; 1448 1449 private VexShiftOp(String opcode, int pp, int mmmmm, int w, int op, int immOp, int r) { 1450 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1_2); 1451 this.immOp = immOp; 1452 this.r = r; 1453 } 1454 1455 @Override 1456 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { 1457 assert assertion.check((AMD64) asm.target.arch, size, null, dst, src); 1458 asm.vexPrefix(null, dst, src, size, pp, mmmmm, w, false); 1459 asm.emitByte(immOp); 1460 asm.emitModRM(r, src); 1461 asm.emitByte(imm8); 1462 } 1463 } 1464 1465 public static final class VexMaskMoveOp extends VexOp { 1466 // @formatter:off 1467 public static final VexMaskMoveOp VMASKMOVPS = new VexMaskMoveOp("VMASKMOVPS", P_66, M_0F38, W0, 0x2C, 0x2E); 1468 public static final VexMaskMoveOp VMASKMOVPD = new VexMaskMoveOp("VMASKMOVPD", P_66, M_0F38, W0, 0x2D, 0x2F); 1469 public static final VexMaskMoveOp VPMASKMOVD = new VexMaskMoveOp("VPMASKMOVD", P_66, M_0F38, W0, 0x8C, 0x8E, VEXOpAssertion.AVX2); 1470 public static final VexMaskMoveOp VPMASKMOVQ = new VexMaskMoveOp("VPMASKMOVQ", P_66, M_0F38, W1, 0x8C, 0x8E, VEXOpAssertion.AVX2); 1471 // @formatter:on 1472 1473 private final int opReverse; 1474 1475 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse) { 1476 this(opcode, pp, mmmmm, w, op, opReverse, VEXOpAssertion.AVX1); 1477 } 1478 1479 private VexMaskMoveOp(String opcode, int pp, int mmmmm, int w, int op, int opReverse, VEXOpAssertion assertion) { 1480 super(opcode, pp, mmmmm, w, op, assertion); 1481 this.opReverse = opReverse; 1482 } 1483 1484 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register mask, AMD64Address src) { 1485 assert assertion.check((AMD64) asm.target.arch, size, dst, mask, null); 1486 asm.vexPrefix(dst, mask, src, size, pp, mmmmm, w, false); 1487 asm.emitByte(op); 1488 asm.emitOperandHelper(dst, src, 0); 1489 } 1490 1491 public void emit(AMD64Assembler asm, AVXSize size, AMD64Address dst, Register mask, Register src) { 1492 assert assertion.check((AMD64) asm.target.arch, size, src, mask, null); 1493 asm.vexPrefix(src, mask, dst, size, pp, mmmmm, w, false); 1494 asm.emitByte(opReverse); 1495 asm.emitOperandHelper(src, dst, 0); 1496 } 1497 } 1498 1499 /** 1500 * VEX-encoded instructions with an operand order of RVMI. 1501 */ 1502 public static final class VexRVMIOp extends VexOp { 1503 // @formatter:off 1504 public static final VexRVMIOp VSHUFPS = new VexRVMIOp("VSHUFPS", P_, M_0F, WIG, 0xC6); 1505 public static final VexRVMIOp VSHUFPD = new VexRVMIOp("VSHUFPD", P_66, M_0F, WIG, 0xC6); 1506 public static final VexRVMIOp VINSERTF128 = new VexRVMIOp("VINSERTF128", P_66, M_0F3A, W0, 0x18, VEXOpAssertion.AVX1_256ONLY); 1507 public static final VexRVMIOp VINSERTI128 = new VexRVMIOp("VINSERTI128", P_66, M_0F3A, W0, 0x38, VEXOpAssertion.AVX2_256ONLY); 1508 // @formatter:on 1509 1510 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op) { 1511 this(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1512 } 1513 1514 private VexRVMIOp(String opcode, int pp, int mmmmm, int w, int op, VEXOpAssertion assertion) { 1515 super(opcode, pp, mmmmm, w, op, assertion); 1516 } 1517 1518 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, int imm8) { 1519 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1520 assert (imm8 & 0xFF) == imm8; 1521 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1522 asm.emitByte(op); 1523 asm.emitModRM(dst, src2); 1524 asm.emitByte(imm8); 1525 } 1526 1527 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, int imm8) { 1528 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1529 assert (imm8 & 0xFF) == imm8; 1530 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1531 asm.emitByte(op); 1532 asm.emitOperandHelper(dst, src2, 1); 1533 asm.emitByte(imm8); 1534 } 1535 } 1536 1537 /** 1538 * VEX-encoded comparison operation with an operand order of RVMI. The immediate operand is a 1539 * comparison operator. 1540 */ 1541 public static final class VexFloatCompareOp extends VexOp { 1542 // @formatter:off 1543 public static final VexFloatCompareOp VCMPPS = new VexFloatCompareOp("VCMPPS", P_, M_0F, WIG, 0xC2); 1544 public static final VexFloatCompareOp VCMPPD = new VexFloatCompareOp("VCMPPD", P_66, M_0F, WIG, 0xC2); 1545 public static final VexFloatCompareOp VCMPSS = new VexFloatCompareOp("VCMPSS", P_F2, M_0F, WIG, 0xC2); 1546 public static final VexFloatCompareOp VCMPSD = new VexFloatCompareOp("VCMPSD", P_F2, M_0F, WIG, 0xC2); 1547 // @formatter:on 1548 1549 public enum Predicate { 1550 EQ_OQ(0x00), 1551 LT_OS(0x01), 1552 LE_OS(0x02), 1553 UNORD_Q(0x03), 1554 NEQ_UQ(0x04), 1555 NLT_US(0x05), 1556 NLE_US(0x06), 1557 ORD_Q(0x07), 1558 EQ_UQ(0x08), 1559 NGE_US(0x09), 1560 NGT_US(0x0a), 1561 FALSE_OQ(0x0b), 1562 NEQ_OQ(0x0c), 1563 GE_OS(0x0d), 1564 GT_OS(0x0e), 1565 TRUE_UQ(0x0f), 1566 EQ_OS(0x10), 1567 LT_OQ(0x11), 1568 LE_OQ(0x12), 1569 UNORD_S(0x13), 1570 NEQ_US(0x14), 1571 NLT_UQ(0x15), 1572 NLE_UQ(0x16), 1573 ORD_S(0x17), 1574 EQ_US(0x18), 1575 NGE_UQ(0x19), 1576 NGT_UQ(0x1a), 1577 FALSE_OS(0x1b), 1578 NEQ_OS(0x1c), 1579 GE_OQ(0x1d), 1580 GT_OQ(0x1e), 1581 TRUE_US(0x1f); 1582 1583 private int imm8; 1584 1585 Predicate(int imm8) { 1586 this.imm8 = imm8; 1587 } 1588 1589 public static Predicate getPredicate(Condition condition, boolean unorderedIsTrue) { 1590 if (unorderedIsTrue) { 1591 switch (condition) { 1592 case EQ: 1593 return EQ_UQ; 1594 case NE: 1595 return NEQ_UQ; 1596 case LT: 1597 return NGE_UQ; 1598 case LE: 1599 return NGT_UQ; 1600 case GT: 1601 return NLE_UQ; 1602 case GE: 1603 return NLT_UQ; 1604 default: 1605 throw GraalError.shouldNotReachHere(); 1606 } 1607 } else { 1608 switch (condition) { 1609 case EQ: 1610 return EQ_OQ; 1611 case NE: 1612 return NEQ_OQ; 1613 case LT: 1614 return LT_OQ; 1615 case LE: 1616 return LE_OQ; 1617 case GT: 1618 return GT_OQ; 1619 case GE: 1620 return GE_OQ; 1621 default: 1622 throw GraalError.shouldNotReachHere(); 1623 } 1624 } 1625 } 1626 } 1627 1628 private VexFloatCompareOp(String opcode, int pp, int mmmmm, int w, int op) { 1629 super(opcode, pp, mmmmm, w, op, VEXOpAssertion.AVX1); 1630 } 1631 1632 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, Register src2, Predicate p) { 1633 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, src2); 1634 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1635 asm.emitByte(op); 1636 asm.emitModRM(dst, src2); 1637 asm.emitByte(p.imm8); 1638 } 1639 1640 public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src1, AMD64Address src2, Predicate p) { 1641 assert assertion.check((AMD64) asm.target.arch, size, dst, src1, null); 1642 asm.vexPrefix(dst, src1, src2, size, pp, mmmmm, w, false); 1643 asm.emitByte(op); 1644 asm.emitOperandHelper(dst, src2, 1); 1645 asm.emitByte(p.imm8); 1646 } 1647 } 1648 1649 public final void addl(AMD64Address dst, int imm32) { 1650 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1651 } 1652 1653 public final void addl(Register dst, int imm32) { 1654 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1655 } 1656 1657 public final void addl(Register dst, Register src) { 1658 ADD.rmOp.emit(this, DWORD, dst, src); 1659 } 1660 1661 public final void addpd(Register dst, Register src) { 1662 SSEOp.ADD.emit(this, PD, dst, src); 1663 } 1664 1665 public final void addpd(Register dst, AMD64Address src) { 1666 SSEOp.ADD.emit(this, PD, dst, src); 1667 } 1668 1669 public final void addsd(Register dst, Register src) { 1670 SSEOp.ADD.emit(this, SD, dst, src); 1671 } 1672 1673 public final void addsd(Register dst, AMD64Address src) { 1674 SSEOp.ADD.emit(this, SD, dst, src); 1675 } 1676 1677 private void addrNop4() { 1678 // 4 bytes: NOP DWORD PTR [EAX+0] 1679 emitByte(0x0F); 1680 emitByte(0x1F); 1681 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc); 1682 emitByte(0); // 8-bits offset (1 byte) 1683 } 1684 1685 private void addrNop5() { 1686 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 1687 emitByte(0x0F); 1688 emitByte(0x1F); 1689 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4); 1690 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1691 emitByte(0); // 8-bits offset (1 byte) 1692 } 1693 1694 private void addrNop7() { 1695 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 1696 emitByte(0x0F); 1697 emitByte(0x1F); 1698 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc); 1699 emitInt(0); // 32-bits offset (4 bytes) 1700 } 1701 1702 private void addrNop8() { 1703 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 1704 emitByte(0x0F); 1705 emitByte(0x1F); 1706 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4); 1707 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1708 emitInt(0); // 32-bits offset (4 bytes) 1709 } 1710 1711 public final void andl(Register dst, int imm32) { 1712 AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1713 } 1714 1715 public final void andl(Register dst, Register src) { 1716 AND.rmOp.emit(this, DWORD, dst, src); 1717 } 1718 1719 public final void andpd(Register dst, Register src) { 1720 SSEOp.AND.emit(this, PD, dst, src); 1721 } 1722 1723 public final void andpd(Register dst, AMD64Address src) { 1724 SSEOp.AND.emit(this, PD, dst, src); 1725 } 1726 1727 public final void bsfq(Register dst, Register src) { 1728 prefixq(dst, src); 1729 emitByte(0x0F); 1730 emitByte(0xBC); 1731 emitModRM(dst, src); 1732 } 1733 1734 public final void bsrl(Register dst, Register src) { 1735 prefix(dst, src); 1736 emitByte(0x0F); 1737 emitByte(0xBD); 1738 emitModRM(dst, src); 1739 } 1740 1741 public final void bswapl(Register reg) { 1742 prefix(reg); 1743 emitByte(0x0F); 1744 emitModRM(1, reg); 1745 } 1746 1747 public final void cdql() { 1748 emitByte(0x99); 1749 } 1750 1751 public final void cmovl(ConditionFlag cc, Register dst, Register src) { 1752 prefix(dst, src); 1753 emitByte(0x0F); 1754 emitByte(0x40 | cc.getValue()); 1755 emitModRM(dst, src); 1756 } 1757 1758 public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) { 1759 prefix(src, dst); 1760 emitByte(0x0F); 1761 emitByte(0x40 | cc.getValue()); 1762 emitOperandHelper(dst, src, 0); 1763 } 1764 1765 public final void cmpb(Register dst, Register src) { 1766 CMP.byteRmOp.emit(this, BYTE, dst, src); 1767 } 1768 1769 public final void cmpw(Register dst, Register src) { 1770 CMP.rmOp.emit(this, WORD, dst, src); 1771 } 1772 1773 public final void cmpl(Register dst, int imm32) { 1774 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1775 } 1776 1777 public final void cmpl(Register dst, Register src) { 1778 CMP.rmOp.emit(this, DWORD, dst, src); 1779 } 1780 1781 public final void cmpl(Register dst, AMD64Address src) { 1782 CMP.rmOp.emit(this, DWORD, dst, src); 1783 } 1784 1785 public final void cmpl(AMD64Address dst, int imm32) { 1786 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1787 } 1788 1789 /** 1790 * The 8-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg into 1791 * adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the compared 1792 * values were equal, and cleared otherwise. 1793 */ 1794 public final void cmpxchgb(Register reg, AMD64Address adr) { // cmpxchg 1795 prefixb(adr, reg); 1796 emitByte(0x0F); 1797 emitByte(0xB0); 1798 emitOperandHelper(reg, adr, 0); 1799 } 1800 1801 /** 1802 * The 16-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg 1803 * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the 1804 * compared values were equal, and cleared otherwise. 1805 */ 1806 public final void cmpxchgw(Register reg, AMD64Address adr) { // cmpxchg 1807 emitByte(0x66); // Switch to 16-bit mode. 1808 prefix(adr, reg); 1809 emitByte(0x0F); 1810 emitByte(0xB1); 1811 emitOperandHelper(reg, adr, 0); 1812 } 1813 1814 /** 1815 * The 32-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg 1816 * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the 1817 * compared values were equal, and cleared otherwise. 1818 */ 1819 public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg 1820 prefix(adr, reg); 1821 emitByte(0x0F); 1822 emitByte(0xB1); 1823 emitOperandHelper(reg, adr, 0); 1824 } 1825 1826 public final void cvtsi2sdl(Register dst, Register src) { 1827 SSEOp.CVTSI2SD.emit(this, DWORD, dst, src); 1828 } 1829 1830 public final void cvttsd2sil(Register dst, Register src) { 1831 SSEOp.CVTTSD2SI.emit(this, DWORD, dst, src); 1832 } 1833 1834 public final void decl(AMD64Address dst) { 1835 prefix(dst); 1836 emitByte(0xFF); 1837 emitOperandHelper(1, dst, 0); 1838 } 1839 1840 public final void divsd(Register dst, Register src) { 1841 SSEOp.DIV.emit(this, SD, dst, src); 1842 } 1843 1844 public final void hlt() { 1845 emitByte(0xF4); 1846 } 1847 1848 public final void imull(Register dst, Register src, int value) { 1849 if (isByte(value)) { 1850 AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value); 1851 } else { 1852 AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value); 1853 } 1854 } 1855 1856 public final void incl(AMD64Address dst) { 1857 prefix(dst); 1858 emitByte(0xFF); 1859 emitOperandHelper(0, dst, 0); 1860 } 1861 1862 public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { 1863 int shortSize = 2; 1864 int longSize = 6; 1865 long disp = jumpTarget - position(); 1866 if (!forceDisp32 && isByte(disp - shortSize)) { 1867 // 0111 tttn #8-bit disp 1868 emitByte(0x70 | cc.getValue()); 1869 emitByte((int) ((disp - shortSize) & 0xFF)); 1870 } else { 1871 // 0000 1111 1000 tttn #32-bit disp 1872 assert isInt(disp - longSize) : "must be 32bit offset (call4)"; 1873 emitByte(0x0F); 1874 emitByte(0x80 | cc.getValue()); 1875 emitInt((int) (disp - longSize)); 1876 } 1877 } 1878 1879 public final void jcc(ConditionFlag cc, Label l) { 1880 assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc"; 1881 if (l.isBound()) { 1882 jcc(cc, l.position(), false); 1883 } else { 1884 // Note: could eliminate cond. jumps to this jump if condition 1885 // is the same however, seems to be rather unlikely case. 1886 // Note: use jccb() if label to be bound is very close to get 1887 // an 8-bit displacement 1888 l.addPatchAt(position(), this); 1889 emitByte(0x0F); 1890 emitByte(0x80 | cc.getValue()); 1891 emitInt(0); 1892 } 1893 1894 } 1895 1896 public final void jccb(ConditionFlag cc, Label l) { 1897 if (l.isBound()) { 1898 int shortSize = 2; 1899 int entry = l.position(); 1900 assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp"; 1901 long disp = entry - position(); 1902 // 0111 tttn #8-bit disp 1903 emitByte(0x70 | cc.getValue()); 1904 emitByte((int) ((disp - shortSize) & 0xFF)); 1905 } else { 1906 l.addPatchAt(position(), this); 1907 emitByte(0x70 | cc.getValue()); 1908 emitByte(0); 1909 } 1910 } 1911 1912 public final void jmp(int jumpTarget, boolean forceDisp32) { 1913 int shortSize = 2; 1914 int longSize = 5; 1915 long disp = jumpTarget - position(); 1916 if (!forceDisp32 && isByte(disp - shortSize)) { 1917 emitByte(0xEB); 1918 emitByte((int) ((disp - shortSize) & 0xFF)); 1919 } else { 1920 emitByte(0xE9); 1921 emitInt((int) (disp - longSize)); 1922 } 1923 } 1924 1925 @Override 1926 public final void jmp(Label l) { 1927 if (l.isBound()) { 1928 jmp(l.position(), false); 1929 } else { 1930 // By default, forward jumps are always 32-bit displacements, since 1931 // we can't yet know where the label will be bound. If you're sure that 1932 // the forward jump will not run beyond 256 bytes, use jmpb to 1933 // force an 8-bit displacement. 1934 1935 l.addPatchAt(position(), this); 1936 emitByte(0xE9); 1937 emitInt(0); 1938 } 1939 } 1940 1941 public final void jmp(Register entry) { 1942 prefix(entry); 1943 emitByte(0xFF); 1944 emitModRM(4, entry); 1945 } 1946 1947 public final void jmp(AMD64Address adr) { 1948 prefix(adr); 1949 emitByte(0xFF); 1950 emitOperandHelper(AMD64.rsp, adr, 0); 1951 } 1952 1953 public final void jmpb(Label l) { 1954 if (l.isBound()) { 1955 int shortSize = 2; 1956 // Displacement is relative to byte just after jmpb instruction 1957 int displacement = l.position() - position() - shortSize; 1958 GraalError.guarantee(isByte(displacement), "Displacement too large to be encoded as a byte: %d", displacement); 1959 emitByte(0xEB); 1960 emitByte(displacement & 0xFF); 1961 } else { 1962 l.addPatchAt(position(), this); 1963 emitByte(0xEB); 1964 emitByte(0); 1965 } 1966 } 1967 1968 public final void lead(Register dst, AMD64Address src) { 1969 prefix(src, dst); 1970 emitByte(0x8D); 1971 emitOperandHelper(dst, src, 0); 1972 } 1973 1974 public final void leaq(Register dst, AMD64Address src) { 1975 prefixq(src, dst); 1976 emitByte(0x8D); 1977 emitOperandHelper(dst, src, 0); 1978 } 1979 1980 public final void leave() { 1981 emitByte(0xC9); 1982 } 1983 1984 public final void lock() { 1985 emitByte(0xF0); 1986 } 1987 1988 public final void movapd(Register dst, Register src) { 1989 assert inRC(XMM, dst) && inRC(XMM, src); 1990 simdPrefix(dst, Register.None, src, PD, P_0F, false); 1991 emitByte(0x28); 1992 emitModRM(dst, src); 1993 } 1994 1995 public final void movaps(Register dst, Register src) { 1996 assert inRC(XMM, dst) && inRC(XMM, src); 1997 simdPrefix(dst, Register.None, src, PS, P_0F, false); 1998 emitByte(0x28); 1999 emitModRM(dst, src); 2000 } 2001 2002 public final void movb(AMD64Address dst, int imm8) { 2003 prefix(dst); 2004 emitByte(0xC6); 2005 emitOperandHelper(0, dst, 1); 2006 emitByte(imm8); 2007 } 2008 2009 public final void movb(AMD64Address dst, Register src) { 2010 assert inRC(CPU, src) : "must have byte register"; 2011 prefixb(dst, src); 2012 emitByte(0x88); 2013 emitOperandHelper(src, dst, 0); 2014 } 2015 2016 public final void movl(Register dst, int imm32) { 2017 movl(dst, imm32, false); 2018 } 2019 2020 public final void movl(Register dst, int imm32, boolean annotateImm) { 2021 int insnPos = position(); 2022 prefix(dst); 2023 emitByte(0xB8 + encode(dst)); 2024 int immPos = position(); 2025 emitInt(imm32); 2026 int nextInsnPos = position(); 2027 if (annotateImm && codePatchingAnnotationConsumer != null) { 2028 codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 2029 } 2030 } 2031 2032 public final void movl(Register dst, Register src) { 2033 prefix(dst, src); 2034 emitByte(0x8B); 2035 emitModRM(dst, src); 2036 } 2037 2038 public final void movl(Register dst, AMD64Address src) { 2039 prefix(src, dst); 2040 emitByte(0x8B); 2041 emitOperandHelper(dst, src, 0); 2042 } 2043 2044 /** 2045 * @param wide use 4 byte encoding for displacements that would normally fit in a byte 2046 */ 2047 public final void movl(Register dst, AMD64Address src, boolean wide) { 2048 prefix(src, dst); 2049 emitByte(0x8B); 2050 emitOperandHelper(dst, src, wide, 0); 2051 } 2052 2053 public final void movl(AMD64Address dst, int imm32) { 2054 prefix(dst); 2055 emitByte(0xC7); 2056 emitOperandHelper(0, dst, 4); 2057 emitInt(imm32); 2058 } 2059 2060 public final void movl(AMD64Address dst, Register src) { 2061 prefix(dst, src); 2062 emitByte(0x89); 2063 emitOperandHelper(src, dst, 0); 2064 } 2065 2066 /** 2067 * New CPUs require use of movsd and movss to avoid partial register stall when loading from 2068 * memory. But for old Opteron use movlpd instead of movsd. The selection is done in 2069 * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and 2070 * {@link AMD64MacroAssembler#movflt(Register, Register)}. 2071 */ 2072 public final void movlpd(Register dst, AMD64Address src) { 2073 assert inRC(XMM, dst); 2074 simdPrefix(dst, dst, src, PD, P_0F, false); 2075 emitByte(0x12); 2076 emitOperandHelper(dst, src, 0); 2077 } 2078 2079 public final void movlhps(Register dst, Register src) { 2080 assert inRC(XMM, dst) && inRC(XMM, src); 2081 simdPrefix(dst, src, src, PS, P_0F, false); 2082 emitByte(0x16); 2083 emitModRM(dst, src); 2084 } 2085 2086 public final void movq(Register dst, AMD64Address src) { 2087 movq(dst, src, false); 2088 } 2089 2090 public final void movq(Register dst, AMD64Address src, boolean force4BytesDisplacement) { 2091 if (inRC(XMM, dst)) { 2092 // Insn: MOVQ xmm, r/m64 2093 // Code: F3 0F 7E /r 2094 // An alternative instruction would be 66 REX.W 0F 6E /r. We prefer the REX.W free 2095 // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction 2096 // when applicable. 2097 simdPrefix(dst, Register.None, src, SS, P_0F, false); 2098 emitByte(0x7E); 2099 emitOperandHelper(dst, src, force4BytesDisplacement, 0); 2100 } else { 2101 // gpr version of movq 2102 prefixq(src, dst); 2103 emitByte(0x8B); 2104 emitOperandHelper(dst, src, force4BytesDisplacement, 0); 2105 } 2106 } 2107 2108 public final void movq(Register dst, Register src) { 2109 assert inRC(CPU, dst) && inRC(CPU, src); 2110 prefixq(dst, src); 2111 emitByte(0x8B); 2112 emitModRM(dst, src); 2113 } 2114 2115 public final void movq(AMD64Address dst, Register src) { 2116 if (inRC(XMM, src)) { 2117 // Insn: MOVQ r/m64, xmm 2118 // Code: 66 0F D6 /r 2119 // An alternative instruction would be 66 REX.W 0F 7E /r. We prefer the REX.W free 2120 // format, because it would allow us to emit 2-bytes-prefixed vex-encoding instruction 2121 // when applicable. 2122 simdPrefix(src, Register.None, dst, PD, P_0F, false); 2123 emitByte(0xD6); 2124 emitOperandHelper(src, dst, 0); 2125 } else { 2126 // gpr version of movq 2127 prefixq(dst, src); 2128 emitByte(0x89); 2129 emitOperandHelper(src, dst, 0); 2130 } 2131 } 2132 2133 public final void movsbl(Register dst, AMD64Address src) { 2134 prefix(src, dst); 2135 emitByte(0x0F); 2136 emitByte(0xBE); 2137 emitOperandHelper(dst, src, 0); 2138 } 2139 2140 public final void movsbl(Register dst, Register src) { 2141 prefix(dst, false, src, true); 2142 emitByte(0x0F); 2143 emitByte(0xBE); 2144 emitModRM(dst, src); 2145 } 2146 2147 public final void movsbq(Register dst, AMD64Address src) { 2148 prefixq(src, dst); 2149 emitByte(0x0F); 2150 emitByte(0xBE); 2151 emitOperandHelper(dst, src, 0); 2152 } 2153 2154 public final void movsbq(Register dst, Register src) { 2155 prefixq(dst, src); 2156 emitByte(0x0F); 2157 emitByte(0xBE); 2158 emitModRM(dst, src); 2159 } 2160 2161 public final void movsd(Register dst, Register src) { 2162 AMD64RMOp.MOVSD.emit(this, SD, dst, src); 2163 } 2164 2165 public final void movsd(Register dst, AMD64Address src) { 2166 AMD64RMOp.MOVSD.emit(this, SD, dst, src); 2167 } 2168 2169 public final void movsd(AMD64Address dst, Register src) { 2170 AMD64MROp.MOVSD.emit(this, SD, dst, src); 2171 } 2172 2173 public final void movss(Register dst, Register src) { 2174 AMD64RMOp.MOVSS.emit(this, SS, dst, src); 2175 } 2176 2177 public final void movss(Register dst, AMD64Address src) { 2178 AMD64RMOp.MOVSS.emit(this, SS, dst, src); 2179 } 2180 2181 public final void movss(AMD64Address dst, Register src) { 2182 AMD64MROp.MOVSS.emit(this, SS, dst, src); 2183 } 2184 2185 public final void mulpd(Register dst, Register src) { 2186 SSEOp.MUL.emit(this, PD, dst, src); 2187 } 2188 2189 public final void mulpd(Register dst, AMD64Address src) { 2190 SSEOp.MUL.emit(this, PD, dst, src); 2191 } 2192 2193 public final void mulsd(Register dst, Register src) { 2194 SSEOp.MUL.emit(this, SD, dst, src); 2195 } 2196 2197 public final void mulsd(Register dst, AMD64Address src) { 2198 SSEOp.MUL.emit(this, SD, dst, src); 2199 } 2200 2201 public final void mulss(Register dst, Register src) { 2202 SSEOp.MUL.emit(this, SS, dst, src); 2203 } 2204 2205 public final void movswl(Register dst, AMD64Address src) { 2206 AMD64RMOp.MOVSX.emit(this, DWORD, dst, src); 2207 } 2208 2209 public final void movswq(Register dst, AMD64Address src) { 2210 AMD64RMOp.MOVSX.emit(this, QWORD, dst, src); 2211 } 2212 2213 public final void movw(AMD64Address dst, int imm16) { 2214 emitByte(0x66); // switch to 16-bit mode 2215 prefix(dst); 2216 emitByte(0xC7); 2217 emitOperandHelper(0, dst, 2); 2218 emitShort(imm16); 2219 } 2220 2221 public final void movw(AMD64Address dst, Register src) { 2222 emitByte(0x66); 2223 prefix(dst, src); 2224 emitByte(0x89); 2225 emitOperandHelper(src, dst, 0); 2226 } 2227 2228 public final void movw(Register dst, AMD64Address src) { 2229 emitByte(0x66); 2230 prefix(src, dst); 2231 emitByte(0x8B); 2232 emitOperandHelper(dst, src, 0); 2233 } 2234 2235 public final void movzbl(Register dst, AMD64Address src) { 2236 prefix(src, dst); 2237 emitByte(0x0F); 2238 emitByte(0xB6); 2239 emitOperandHelper(dst, src, 0); 2240 } 2241 2242 public final void movzbl(Register dst, Register src) { 2243 AMD64RMOp.MOVZXB.emit(this, DWORD, dst, src); 2244 } 2245 2246 public final void movzbq(Register dst, Register src) { 2247 AMD64RMOp.MOVZXB.emit(this, QWORD, dst, src); 2248 } 2249 2250 public final void movzbq(Register dst, AMD64Address src) { 2251 AMD64RMOp.MOVZXB.emit(this, QWORD, dst, src); 2252 } 2253 2254 public final void movzwl(Register dst, AMD64Address src) { 2255 AMD64RMOp.MOVZX.emit(this, DWORD, dst, src); 2256 } 2257 2258 public final void movzwq(Register dst, AMD64Address src) { 2259 AMD64RMOp.MOVZX.emit(this, QWORD, dst, src); 2260 } 2261 2262 public final void negl(Register dst) { 2263 NEG.emit(this, DWORD, dst); 2264 } 2265 2266 public final void notl(Register dst) { 2267 NOT.emit(this, DWORD, dst); 2268 } 2269 2270 public final void notq(Register dst) { 2271 NOT.emit(this, QWORD, dst); 2272 } 2273 2274 @Override 2275 public final void ensureUniquePC() { 2276 nop(); 2277 } 2278 2279 public final void nop() { 2280 nop(1); 2281 } 2282 2283 public void nop(int count) { 2284 int i = count; 2285 if (UseNormalNop) { 2286 assert i > 0 : " "; 2287 // The fancy nops aren't currently recognized by debuggers making it a 2288 // pain to disassemble code while debugging. If assert are on clearly 2289 // speed is not an issue so simply use the single byte traditional nop 2290 // to do alignment. 2291 2292 for (; i > 0; i--) { 2293 emitByte(0x90); 2294 } 2295 return; 2296 } 2297 2298 if (UseAddressNop) { 2299 if (UseIntelNops) { 2300 intelNops(i); 2301 } else { 2302 amdNops(i); 2303 } 2304 return; 2305 } 2306 2307 // Using nops with size prefixes "0x66 0x90". 2308 // From AMD Optimization Guide: 2309 // 1: 0x90 2310 // 2: 0x66 0x90 2311 // 3: 0x66 0x66 0x90 2312 // 4: 0x66 0x66 0x66 0x90 2313 // 5: 0x66 0x66 0x90 0x66 0x90 2314 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2315 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2316 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2317 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2318 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2319 // 2320 while (i > 12) { 2321 i -= 4; 2322 emitByte(0x66); // size prefix 2323 emitByte(0x66); 2324 emitByte(0x66); 2325 emitByte(0x90); // nop 2326 } 2327 // 1 - 12 nops 2328 if (i > 8) { 2329 if (i > 9) { 2330 i -= 1; 2331 emitByte(0x66); 2332 } 2333 i -= 3; 2334 emitByte(0x66); 2335 emitByte(0x66); 2336 emitByte(0x90); 2337 } 2338 // 1 - 8 nops 2339 if (i > 4) { 2340 if (i > 6) { 2341 i -= 1; 2342 emitByte(0x66); 2343 } 2344 i -= 3; 2345 emitByte(0x66); 2346 emitByte(0x66); 2347 emitByte(0x90); 2348 } 2349 switch (i) { 2350 case 4: 2351 emitByte(0x66); 2352 emitByte(0x66); 2353 emitByte(0x66); 2354 emitByte(0x90); 2355 break; 2356 case 3: 2357 emitByte(0x66); 2358 emitByte(0x66); 2359 emitByte(0x90); 2360 break; 2361 case 2: 2362 emitByte(0x66); 2363 emitByte(0x90); 2364 break; 2365 case 1: 2366 emitByte(0x90); 2367 break; 2368 default: 2369 assert i == 0; 2370 } 2371 } 2372 2373 private void amdNops(int count) { 2374 int i = count; 2375 // 2376 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD. 2377 // 1: 0x90 2378 // 2: 0x66 0x90 2379 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2380 // 4: 0x0F 0x1F 0x40 0x00 2381 // 5: 0x0F 0x1F 0x44 0x00 0x00 2382 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2383 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2384 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2385 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2386 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2387 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2388 2389 // The rest coding is AMD specific - use consecutive Address nops 2390 2391 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2392 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2393 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2394 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2395 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2396 // Size prefixes (0x66) are added for larger sizes 2397 2398 while (i >= 22) { 2399 i -= 11; 2400 emitByte(0x66); // size prefix 2401 emitByte(0x66); // size prefix 2402 emitByte(0x66); // size prefix 2403 addrNop8(); 2404 } 2405 // Generate first nop for size between 21-12 2406 switch (i) { 2407 case 21: 2408 i -= 11; 2409 emitByte(0x66); // size prefix 2410 emitByte(0x66); // size prefix 2411 emitByte(0x66); // size prefix 2412 addrNop8(); 2413 break; 2414 case 20: 2415 case 19: 2416 i -= 10; 2417 emitByte(0x66); // size prefix 2418 emitByte(0x66); // size prefix 2419 addrNop8(); 2420 break; 2421 case 18: 2422 case 17: 2423 i -= 9; 2424 emitByte(0x66); // size prefix 2425 addrNop8(); 2426 break; 2427 case 16: 2428 case 15: 2429 i -= 8; 2430 addrNop8(); 2431 break; 2432 case 14: 2433 case 13: 2434 i -= 7; 2435 addrNop7(); 2436 break; 2437 case 12: 2438 i -= 6; 2439 emitByte(0x66); // size prefix 2440 addrNop5(); 2441 break; 2442 default: 2443 assert i < 12; 2444 } 2445 2446 // Generate second nop for size between 11-1 2447 switch (i) { 2448 case 11: 2449 emitByte(0x66); // size prefix 2450 emitByte(0x66); // size prefix 2451 emitByte(0x66); // size prefix 2452 addrNop8(); 2453 break; 2454 case 10: 2455 emitByte(0x66); // size prefix 2456 emitByte(0x66); // size prefix 2457 addrNop8(); 2458 break; 2459 case 9: 2460 emitByte(0x66); // size prefix 2461 addrNop8(); 2462 break; 2463 case 8: 2464 addrNop8(); 2465 break; 2466 case 7: 2467 addrNop7(); 2468 break; 2469 case 6: 2470 emitByte(0x66); // size prefix 2471 addrNop5(); 2472 break; 2473 case 5: 2474 addrNop5(); 2475 break; 2476 case 4: 2477 addrNop4(); 2478 break; 2479 case 3: 2480 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2481 emitByte(0x66); // size prefix 2482 emitByte(0x66); // size prefix 2483 emitByte(0x90); // nop 2484 break; 2485 case 2: 2486 emitByte(0x66); // size prefix 2487 emitByte(0x90); // nop 2488 break; 2489 case 1: 2490 emitByte(0x90); // nop 2491 break; 2492 default: 2493 assert i == 0; 2494 } 2495 } 2496 2497 @SuppressWarnings("fallthrough") 2498 private void intelNops(int count) { 2499 // 2500 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 2501 // 1: 0x90 2502 // 2: 0x66 0x90 2503 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2504 // 4: 0x0F 0x1F 0x40 0x00 2505 // 5: 0x0F 0x1F 0x44 0x00 0x00 2506 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2507 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2508 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2509 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2510 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2511 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2512 2513 // The rest coding is Intel specific - don't use consecutive address nops 2514 2515 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2516 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2517 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2518 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2519 2520 int i = count; 2521 while (i >= 15) { 2522 // For Intel don't generate consecutive addess nops (mix with regular nops) 2523 i -= 15; 2524 emitByte(0x66); // size prefix 2525 emitByte(0x66); // size prefix 2526 emitByte(0x66); // size prefix 2527 addrNop8(); 2528 emitByte(0x66); // size prefix 2529 emitByte(0x66); // size prefix 2530 emitByte(0x66); // size prefix 2531 emitByte(0x90); 2532 // nop 2533 } 2534 switch (i) { 2535 case 14: 2536 emitByte(0x66); // size prefix 2537 // fall through 2538 case 13: 2539 emitByte(0x66); // size prefix 2540 // fall through 2541 case 12: 2542 addrNop8(); 2543 emitByte(0x66); // size prefix 2544 emitByte(0x66); // size prefix 2545 emitByte(0x66); // size prefix 2546 emitByte(0x90); 2547 // nop 2548 break; 2549 case 11: 2550 emitByte(0x66); // size prefix 2551 // fall through 2552 case 10: 2553 emitByte(0x66); // size prefix 2554 // fall through 2555 case 9: 2556 emitByte(0x66); // size prefix 2557 // fall through 2558 case 8: 2559 addrNop8(); 2560 break; 2561 case 7: 2562 addrNop7(); 2563 break; 2564 case 6: 2565 emitByte(0x66); // size prefix 2566 // fall through 2567 case 5: 2568 addrNop5(); 2569 break; 2570 case 4: 2571 addrNop4(); 2572 break; 2573 case 3: 2574 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2575 emitByte(0x66); // size prefix 2576 // fall through 2577 case 2: 2578 emitByte(0x66); // size prefix 2579 // fall through 2580 case 1: 2581 emitByte(0x90); 2582 // nop 2583 break; 2584 default: 2585 assert i == 0; 2586 } 2587 } 2588 2589 public final void orl(Register dst, Register src) { 2590 OR.rmOp.emit(this, DWORD, dst, src); 2591 } 2592 2593 public final void orl(Register dst, int imm32) { 2594 OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2595 } 2596 2597 // Insn: VPACKUSWB xmm1, xmm2, xmm3/m128 2598 // ----- 2599 // Insn: VPACKUSWB xmm1, xmm1, xmm2 2600 2601 public final void packuswb(Register dst, Register src) { 2602 assert inRC(XMM, dst) && inRC(XMM, src); 2603 // Code: VEX.NDS.128.66.0F.WIG 67 /r 2604 simdPrefix(dst, dst, src, PD, P_0F, false); 2605 emitByte(0x67); 2606 emitModRM(dst, src); 2607 } 2608 2609 public final void pop(Register dst) { 2610 prefix(dst); 2611 emitByte(0x58 + encode(dst)); 2612 } 2613 2614 public void popfq() { 2615 emitByte(0x9D); 2616 } 2617 2618 public final void ptest(Register dst, Register src) { 2619 assert supports(CPUFeature.SSE4_1); 2620 assert inRC(XMM, dst) && inRC(XMM, src); 2621 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2622 emitByte(0x17); 2623 emitModRM(dst, src); 2624 } 2625 2626 public final void pcmpeqb(Register dst, Register src) { 2627 assert supports(CPUFeature.SSE2); 2628 assert inRC(XMM, dst) && inRC(XMM, src); 2629 simdPrefix(dst, dst, src, PD, P_0F, false); 2630 emitByte(0x74); 2631 emitModRM(dst, src); 2632 } 2633 2634 public final void pcmpeqw(Register dst, Register src) { 2635 assert supports(CPUFeature.SSE2); 2636 assert inRC(XMM, dst) && inRC(XMM, src); 2637 simdPrefix(dst, dst, src, PD, P_0F, false); 2638 emitByte(0x75); 2639 emitModRM(dst, src); 2640 } 2641 2642 public final void pcmpeqd(Register dst, Register src) { 2643 assert supports(CPUFeature.SSE2); 2644 assert dst.getRegisterCategory().equals(XMM) && src.getRegisterCategory().equals(XMM); 2645 simdPrefix(dst, dst, src, PD, P_0F, false); 2646 emitByte(0x76); 2647 emitModRM(dst, src); 2648 } 2649 2650 public final void pcmpestri(Register dst, AMD64Address src, int imm8) { 2651 assert supports(CPUFeature.SSE4_2); 2652 assert inRC(XMM, dst); 2653 simdPrefix(dst, Register.None, src, PD, P_0F3A, false); 2654 emitByte(0x61); 2655 emitOperandHelper(dst, src, 0); 2656 emitByte(imm8); 2657 } 2658 2659 public final void pcmpestri(Register dst, Register src, int imm8) { 2660 assert supports(CPUFeature.SSE4_2); 2661 assert inRC(XMM, dst) && inRC(XMM, src); 2662 simdPrefix(dst, Register.None, src, PD, P_0F3A, false); 2663 emitByte(0x61); 2664 emitModRM(dst, src); 2665 emitByte(imm8); 2666 } 2667 2668 public final void pmovmskb(Register dst, Register src) { 2669 assert supports(CPUFeature.SSE2); 2670 assert inRC(CPU, dst) && inRC(XMM, src); 2671 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2672 emitByte(0xD7); 2673 emitModRM(dst, src); 2674 } 2675 2676 private void pmovSZx(Register dst, AMD64Address src, int op) { 2677 assert supports(CPUFeature.SSE4_1); 2678 assert inRC(XMM, dst); 2679 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2680 emitByte(op); 2681 emitOperandHelper(dst, src, 0); 2682 } 2683 2684 public final void pmovsxbw(Register dst, AMD64Address src) { 2685 pmovSZx(dst, src, 0x20); 2686 } 2687 2688 public final void pmovsxbd(Register dst, AMD64Address src) { 2689 pmovSZx(dst, src, 0x21); 2690 } 2691 2692 public final void pmovsxbq(Register dst, AMD64Address src) { 2693 pmovSZx(dst, src, 0x22); 2694 } 2695 2696 public final void pmovsxwd(Register dst, AMD64Address src) { 2697 pmovSZx(dst, src, 0x23); 2698 } 2699 2700 public final void pmovsxwq(Register dst, AMD64Address src) { 2701 pmovSZx(dst, src, 0x24); 2702 } 2703 2704 public final void pmovsxdq(Register dst, AMD64Address src) { 2705 pmovSZx(dst, src, 0x25); 2706 } 2707 2708 // Insn: VPMOVZXBW xmm1, xmm2/m64 2709 public final void pmovzxbw(Register dst, AMD64Address src) { 2710 pmovSZx(dst, src, 0x30); 2711 } 2712 2713 public final void pmovzxbd(Register dst, AMD64Address src) { 2714 pmovSZx(dst, src, 0x31); 2715 } 2716 2717 public final void pmovzxbq(Register dst, AMD64Address src) { 2718 pmovSZx(dst, src, 0x32); 2719 } 2720 2721 public final void pmovzxwd(Register dst, AMD64Address src) { 2722 pmovSZx(dst, src, 0x33); 2723 } 2724 2725 public final void pmovzxwq(Register dst, AMD64Address src) { 2726 pmovSZx(dst, src, 0x34); 2727 } 2728 2729 public final void pmovzxdq(Register dst, AMD64Address src) { 2730 pmovSZx(dst, src, 0x35); 2731 } 2732 2733 public final void pmovzxbw(Register dst, Register src) { 2734 assert supports(CPUFeature.SSE4_1); 2735 assert inRC(XMM, dst) && inRC(XMM, src); 2736 simdPrefix(dst, Register.None, src, PD, P_0F38, false); 2737 emitByte(0x30); 2738 emitModRM(dst, src); 2739 } 2740 2741 public final void push(Register src) { 2742 prefix(src); 2743 emitByte(0x50 + encode(src)); 2744 } 2745 2746 public void pushfq() { 2747 emitByte(0x9c); 2748 } 2749 2750 public final void paddd(Register dst, Register src) { 2751 assert inRC(XMM, dst) && inRC(XMM, src); 2752 simdPrefix(dst, dst, src, PD, P_0F, false); 2753 emitByte(0xFE); 2754 emitModRM(dst, src); 2755 } 2756 2757 public final void paddq(Register dst, Register src) { 2758 assert inRC(XMM, dst) && inRC(XMM, src); 2759 simdPrefix(dst, dst, src, PD, P_0F, false); 2760 emitByte(0xD4); 2761 emitModRM(dst, src); 2762 } 2763 2764 public final void pextrw(Register dst, Register src, int imm8) { 2765 assert inRC(CPU, dst) && inRC(XMM, src); 2766 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2767 emitByte(0xC5); 2768 emitModRM(dst, src); 2769 emitByte(imm8); 2770 } 2771 2772 public final void pinsrw(Register dst, Register src, int imm8) { 2773 assert inRC(XMM, dst) && inRC(CPU, src); 2774 simdPrefix(dst, dst, src, PD, P_0F, false); 2775 emitByte(0xC4); 2776 emitModRM(dst, src); 2777 emitByte(imm8); 2778 } 2779 2780 public final void por(Register dst, Register src) { 2781 assert inRC(XMM, dst) && inRC(XMM, src); 2782 simdPrefix(dst, dst, src, PD, P_0F, false); 2783 emitByte(0xEB); 2784 emitModRM(dst, src); 2785 } 2786 2787 public final void pand(Register dst, Register src) { 2788 assert inRC(XMM, dst) && inRC(XMM, src); 2789 simdPrefix(dst, dst, src, PD, P_0F, false); 2790 emitByte(0xDB); 2791 emitModRM(dst, src); 2792 } 2793 2794 public final void pxor(Register dst, Register src) { 2795 assert inRC(XMM, dst) && inRC(XMM, src); 2796 simdPrefix(dst, dst, src, PD, P_0F, false); 2797 emitByte(0xEF); 2798 emitModRM(dst, src); 2799 } 2800 2801 public final void pslld(Register dst, int imm8) { 2802 assert isUByte(imm8) : "invalid value"; 2803 assert inRC(XMM, dst); 2804 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 2805 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); 2806 emitByte(0x72); 2807 emitModRM(6, dst); 2808 emitByte(imm8 & 0xFF); 2809 } 2810 2811 public final void psllq(Register dst, Register shift) { 2812 assert inRC(XMM, dst) && inRC(XMM, shift); 2813 simdPrefix(dst, dst, shift, PD, P_0F, false); 2814 emitByte(0xF3); 2815 emitModRM(dst, shift); 2816 } 2817 2818 public final void psllq(Register dst, int imm8) { 2819 assert isUByte(imm8) : "invalid value"; 2820 assert inRC(XMM, dst); 2821 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 2822 simdPrefix(AMD64.xmm6, dst, dst, PD, P_0F, false); 2823 emitByte(0x73); 2824 emitModRM(6, dst); 2825 emitByte(imm8); 2826 } 2827 2828 public final void psrad(Register dst, int imm8) { 2829 assert isUByte(imm8) : "invalid value"; 2830 assert inRC(XMM, dst); 2831 // XMM4 is for /4 encoding: 66 0F 72 /4 ib 2832 simdPrefix(AMD64.xmm4, dst, dst, PD, P_0F, false); 2833 emitByte(0x72); 2834 emitModRM(4, dst); 2835 emitByte(imm8); 2836 } 2837 2838 public final void psrld(Register dst, int imm8) { 2839 assert isUByte(imm8) : "invalid value"; 2840 assert inRC(XMM, dst); 2841 // XMM2 is for /2 encoding: 66 0F 72 /2 ib 2842 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); 2843 emitByte(0x72); 2844 emitModRM(2, dst); 2845 emitByte(imm8); 2846 } 2847 2848 public final void psrlq(Register dst, int imm8) { 2849 assert isUByte(imm8) : "invalid value"; 2850 assert inRC(XMM, dst); 2851 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 2852 simdPrefix(AMD64.xmm2, dst, dst, PD, P_0F, false); 2853 emitByte(0x73); 2854 emitModRM(2, dst); 2855 emitByte(imm8); 2856 } 2857 2858 public final void psrldq(Register dst, int imm8) { 2859 assert isUByte(imm8) : "invalid value"; 2860 assert inRC(XMM, dst); 2861 simdPrefix(AMD64.xmm3, dst, dst, PD, P_0F, false); 2862 emitByte(0x73); 2863 emitModRM(3, dst); 2864 emitByte(imm8); 2865 } 2866 2867 public final void pshufb(Register dst, Register src) { 2868 assert supports(CPUFeature.SSSE3); 2869 assert inRC(XMM, dst) && inRC(XMM, src); 2870 simdPrefix(dst, dst, src, PD, P_0F38, false); 2871 emitByte(0x00); 2872 emitModRM(dst, src); 2873 } 2874 2875 public final void pshuflw(Register dst, Register src, int imm8) { 2876 assert supports(CPUFeature.SSE2); 2877 assert isUByte(imm8) : "invalid value"; 2878 assert inRC(XMM, dst) && inRC(XMM, src); 2879 simdPrefix(dst, Register.None, src, SD, P_0F, false); 2880 emitByte(0x70); 2881 emitModRM(dst, src); 2882 emitByte(imm8); 2883 } 2884 2885 public final void pshufd(Register dst, Register src, int imm8) { 2886 assert isUByte(imm8) : "invalid value"; 2887 assert inRC(XMM, dst) && inRC(XMM, src); 2888 simdPrefix(dst, Register.None, src, PD, P_0F, false); 2889 emitByte(0x70); 2890 emitModRM(dst, src); 2891 emitByte(imm8); 2892 } 2893 2894 public final void psubd(Register dst, Register src) { 2895 assert inRC(XMM, dst) && inRC(XMM, src); 2896 simdPrefix(dst, dst, src, PD, P_0F, false); 2897 emitByte(0xFA); 2898 emitModRM(dst, src); 2899 } 2900 2901 public final void punpcklbw(Register dst, Register src) { 2902 assert supports(CPUFeature.SSE2); 2903 assert inRC(XMM, dst) && inRC(XMM, src); 2904 simdPrefix(dst, dst, src, PD, P_0F, false); 2905 emitByte(0x60); 2906 emitModRM(dst, src); 2907 } 2908 2909 public final void rcpps(Register dst, Register src) { 2910 assert inRC(XMM, dst) && inRC(XMM, src); 2911 simdPrefix(dst, Register.None, src, PS, P_0F, false); 2912 emitByte(0x53); 2913 emitModRM(dst, src); 2914 } 2915 2916 public final void ret(int imm16) { 2917 if (imm16 == 0) { 2918 emitByte(0xC3); 2919 } else { 2920 emitByte(0xC2); 2921 emitShort(imm16); 2922 } 2923 } 2924 2925 public final void sarl(Register dst, int imm8) { 2926 prefix(dst); 2927 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2928 if (imm8 == 1) { 2929 emitByte(0xD1); 2930 emitModRM(7, dst); 2931 } else { 2932 emitByte(0xC1); 2933 emitModRM(7, dst); 2934 emitByte(imm8); 2935 } 2936 } 2937 2938 public final void shll(Register dst, int imm8) { 2939 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2940 prefix(dst); 2941 if (imm8 == 1) { 2942 emitByte(0xD1); 2943 emitModRM(4, dst); 2944 } else { 2945 emitByte(0xC1); 2946 emitModRM(4, dst); 2947 emitByte(imm8); 2948 } 2949 } 2950 2951 public final void shll(Register dst) { 2952 // Multiply dst by 2, CL times. 2953 prefix(dst); 2954 emitByte(0xD3); 2955 emitModRM(4, dst); 2956 } 2957 2958 // Insn: SHLX r32a, r/m32, r32b 2959 2960 public final void shlxl(Register dst, Register src1, Register src2) { 2961 VexGeneralPurposeRMVOp.SHLX.emit(this, AVXSize.DWORD, dst, src1, src2); 2962 } 2963 2964 public final void shrl(Register dst, int imm8) { 2965 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2966 prefix(dst); 2967 emitByte(0xC1); 2968 emitModRM(5, dst); 2969 emitByte(imm8); 2970 } 2971 2972 public final void shrl(Register dst) { 2973 // Unsigned divide dst by 2, CL times. 2974 prefix(dst); 2975 emitByte(0xD3); 2976 emitModRM(5, dst); 2977 } 2978 2979 public final void subl(AMD64Address dst, int imm32) { 2980 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2981 } 2982 2983 public final void subl(Register dst, int imm32) { 2984 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2985 } 2986 2987 public final void subl(Register dst, Register src) { 2988 SUB.rmOp.emit(this, DWORD, dst, src); 2989 } 2990 2991 public final void subpd(Register dst, Register src) { 2992 SSEOp.SUB.emit(this, PD, dst, src); 2993 } 2994 2995 public final void subsd(Register dst, Register src) { 2996 SSEOp.SUB.emit(this, SD, dst, src); 2997 } 2998 2999 public final void subsd(Register dst, AMD64Address src) { 3000 SSEOp.SUB.emit(this, SD, dst, src); 3001 } 3002 3003 public final void testl(Register dst, int imm32) { 3004 // not using emitArith because test 3005 // doesn't support sign-extension of 3006 // 8bit operands 3007 if (dst.encoding == 0) { 3008 emitByte(0xA9); 3009 } else { 3010 prefix(dst); 3011 emitByte(0xF7); 3012 emitModRM(0, dst); 3013 } 3014 emitInt(imm32); 3015 } 3016 3017 public final void testl(Register dst, Register src) { 3018 prefix(dst, src); 3019 emitByte(0x85); 3020 emitModRM(dst, src); 3021 } 3022 3023 public final void testl(Register dst, AMD64Address src) { 3024 prefix(src, dst); 3025 emitByte(0x85); 3026 emitOperandHelper(dst, src, 0); 3027 } 3028 3029 public final void unpckhpd(Register dst, Register src) { 3030 assert inRC(XMM, dst) && inRC(XMM, src); 3031 simdPrefix(dst, dst, src, PD, P_0F, false); 3032 emitByte(0x15); 3033 emitModRM(dst, src); 3034 } 3035 3036 public final void unpcklpd(Register dst, Register src) { 3037 assert inRC(XMM, dst) && inRC(XMM, src); 3038 simdPrefix(dst, dst, src, PD, P_0F, false); 3039 emitByte(0x14); 3040 emitModRM(dst, src); 3041 } 3042 3043 public final void xorl(Register dst, Register src) { 3044 XOR.rmOp.emit(this, DWORD, dst, src); 3045 } 3046 3047 public final void xorq(Register dst, Register src) { 3048 XOR.rmOp.emit(this, QWORD, dst, src); 3049 } 3050 3051 public final void xorpd(Register dst, Register src) { 3052 SSEOp.XOR.emit(this, PD, dst, src); 3053 } 3054 3055 public final void xorps(Register dst, Register src) { 3056 SSEOp.XOR.emit(this, PS, dst, src); 3057 } 3058 3059 protected final void decl(Register dst) { 3060 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 3061 prefix(dst); 3062 emitByte(0xFF); 3063 emitModRM(1, dst); 3064 } 3065 3066 protected final void incl(Register dst) { 3067 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3068 prefix(dst); 3069 emitByte(0xFF); 3070 emitModRM(0, dst); 3071 } 3072 3073 public final void addq(Register dst, int imm32) { 3074 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3075 } 3076 3077 public final void addq(AMD64Address dst, int imm32) { 3078 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3079 } 3080 3081 public final void addq(Register dst, Register src) { 3082 ADD.rmOp.emit(this, QWORD, dst, src); 3083 } 3084 3085 public final void addq(AMD64Address dst, Register src) { 3086 ADD.mrOp.emit(this, QWORD, dst, src); 3087 } 3088 3089 public final void andq(Register dst, int imm32) { 3090 AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3091 } 3092 3093 public final void bsrq(Register dst, Register src) { 3094 prefixq(dst, src); 3095 emitByte(0x0F); 3096 emitByte(0xBD); 3097 emitModRM(dst, src); 3098 } 3099 3100 public final void bswapq(Register reg) { 3101 prefixq(reg); 3102 emitByte(0x0F); 3103 emitByte(0xC8 + encode(reg)); 3104 } 3105 3106 public final void cdqq() { 3107 rexw(); 3108 emitByte(0x99); 3109 } 3110 3111 public final void repStosb() { 3112 emitByte(0xf3); 3113 rexw(); 3114 emitByte(0xaa); 3115 } 3116 3117 public final void repStosq() { 3118 emitByte(0xf3); 3119 rexw(); 3120 emitByte(0xab); 3121 } 3122 3123 public final void cmovq(ConditionFlag cc, Register dst, Register src) { 3124 prefixq(dst, src); 3125 emitByte(0x0F); 3126 emitByte(0x40 | cc.getValue()); 3127 emitModRM(dst, src); 3128 } 3129 3130 public final void setb(ConditionFlag cc, Register dst) { 3131 prefix(dst, true); 3132 emitByte(0x0F); 3133 emitByte(0x90 | cc.getValue()); 3134 emitModRM(0, dst); 3135 } 3136 3137 public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) { 3138 prefixq(src, dst); 3139 emitByte(0x0F); 3140 emitByte(0x40 | cc.getValue()); 3141 emitOperandHelper(dst, src, 0); 3142 } 3143 3144 public final void cmpq(Register dst, int imm32) { 3145 CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3146 } 3147 3148 public final void cmpq(Register dst, Register src) { 3149 CMP.rmOp.emit(this, QWORD, dst, src); 3150 } 3151 3152 public final void cmpq(Register dst, AMD64Address src) { 3153 CMP.rmOp.emit(this, QWORD, dst, src); 3154 } 3155 3156 public final void cmpxchgq(Register reg, AMD64Address adr) { 3157 prefixq(adr, reg); 3158 emitByte(0x0F); 3159 emitByte(0xB1); 3160 emitOperandHelper(reg, adr, 0); 3161 } 3162 3163 public final void cvtdq2pd(Register dst, Register src) { 3164 assert inRC(XMM, dst) && inRC(XMM, src); 3165 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3166 emitByte(0xE6); 3167 emitModRM(dst, src); 3168 } 3169 3170 public final void cvtsi2sdq(Register dst, Register src) { 3171 SSEOp.CVTSI2SD.emit(this, QWORD, dst, src); 3172 } 3173 3174 public final void cvttsd2siq(Register dst, Register src) { 3175 SSEOp.CVTTSD2SI.emit(this, QWORD, dst, src); 3176 } 3177 3178 public final void cvttpd2dq(Register dst, Register src) { 3179 assert inRC(XMM, dst) && inRC(XMM, src); 3180 simdPrefix(dst, Register.None, src, PD, P_0F, false); 3181 emitByte(0xE6); 3182 emitModRM(dst, src); 3183 } 3184 3185 public final void decq(Register dst) { 3186 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 3187 prefixq(dst); 3188 emitByte(0xFF); 3189 emitModRM(1, dst); 3190 } 3191 3192 public final void decq(AMD64Address dst) { 3193 DEC.emit(this, QWORD, dst); 3194 } 3195 3196 public final void imulq(Register dst, Register src) { 3197 prefixq(dst, src); 3198 emitByte(0x0F); 3199 emitByte(0xAF); 3200 emitModRM(dst, src); 3201 } 3202 3203 public final void incq(Register dst) { 3204 // Don't use it directly. Use Macroincrementq() instead. 3205 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3206 prefixq(dst); 3207 emitByte(0xFF); 3208 emitModRM(0, dst); 3209 } 3210 3211 public final void incq(AMD64Address dst) { 3212 INC.emit(this, QWORD, dst); 3213 } 3214 3215 public final void movq(Register dst, long imm64) { 3216 movq(dst, imm64, false); 3217 } 3218 3219 public final void movq(Register dst, long imm64, boolean annotateImm) { 3220 int insnPos = position(); 3221 prefixq(dst); 3222 emitByte(0xB8 + encode(dst)); 3223 int immPos = position(); 3224 emitLong(imm64); 3225 int nextInsnPos = position(); 3226 if (annotateImm && codePatchingAnnotationConsumer != null) { 3227 codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(insnPos, immPos, nextInsnPos - immPos, nextInsnPos)); 3228 } 3229 } 3230 3231 public final void movslq(Register dst, int imm32) { 3232 prefixq(dst); 3233 emitByte(0xC7); 3234 emitModRM(0, dst); 3235 emitInt(imm32); 3236 } 3237 3238 public final void movdq(Register dst, AMD64Address src) { 3239 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src); 3240 } 3241 3242 public final void movdq(AMD64Address dst, Register src) { 3243 AMD64MROp.MOVQ.emit(this, QWORD, dst, src); 3244 } 3245 3246 public final void movdq(Register dst, Register src) { 3247 if (inRC(XMM, dst) && inRC(CPU, src)) { 3248 AMD64RMOp.MOVQ.emit(this, QWORD, dst, src); 3249 } else if (inRC(XMM, src) && inRC(CPU, dst)) { 3250 AMD64MROp.MOVQ.emit(this, QWORD, dst, src); 3251 } else { 3252 throw new InternalError("should not reach here"); 3253 } 3254 } 3255 3256 public final void movdl(Register dst, Register src) { 3257 if (inRC(XMM, dst) && inRC(CPU, src)) { 3258 AMD64RMOp.MOVD.emit(this, DWORD, dst, src); 3259 } else if (inRC(XMM, src) && inRC(CPU, dst)) { 3260 AMD64MROp.MOVD.emit(this, DWORD, dst, src); 3261 } else { 3262 throw new InternalError("should not reach here"); 3263 } 3264 } 3265 3266 public final void movdl(Register dst, AMD64Address src) { 3267 AMD64RMOp.MOVD.emit(this, DWORD, dst, src); 3268 } 3269 3270 public final void movddup(Register dst, Register src) { 3271 assert supports(CPUFeature.SSE3); 3272 assert inRC(XMM, dst) && inRC(XMM, src); 3273 simdPrefix(dst, Register.None, src, SD, P_0F, false); 3274 emitByte(0x12); 3275 emitModRM(dst, src); 3276 } 3277 3278 public final void movdqu(Register dst, AMD64Address src) { 3279 assert inRC(XMM, dst); 3280 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3281 emitByte(0x6F); 3282 emitOperandHelper(dst, src, 0); 3283 } 3284 3285 public final void movdqu(Register dst, Register src) { 3286 assert inRC(XMM, dst) && inRC(XMM, src); 3287 simdPrefix(dst, Register.None, src, SS, P_0F, false); 3288 emitByte(0x6F); 3289 emitModRM(dst, src); 3290 } 3291 3292 // Insn: VMOVDQU xmm2/m128, xmm1 3293 3294 public final void movdqu(AMD64Address dst, Register src) { 3295 assert inRC(XMM, src); 3296 // Code: VEX.128.F3.0F.WIG 7F /r 3297 simdPrefix(src, Register.None, dst, SS, P_0F, false); 3298 emitByte(0x7F); 3299 emitOperandHelper(src, dst, 0); 3300 } 3301 3302 public final void movslq(AMD64Address dst, int imm32) { 3303 prefixq(dst); 3304 emitByte(0xC7); 3305 emitOperandHelper(0, dst, 4); 3306 emitInt(imm32); 3307 } 3308 3309 public final void movslq(Register dst, AMD64Address src) { 3310 prefixq(src, dst); 3311 emitByte(0x63); 3312 emitOperandHelper(dst, src, 0); 3313 } 3314 3315 public final void movslq(Register dst, Register src) { 3316 prefixq(dst, src); 3317 emitByte(0x63); 3318 emitModRM(dst, src); 3319 } 3320 3321 public final void negq(Register dst) { 3322 prefixq(dst); 3323 emitByte(0xF7); 3324 emitModRM(3, dst); 3325 } 3326 3327 public final void orq(Register dst, Register src) { 3328 OR.rmOp.emit(this, QWORD, dst, src); 3329 } 3330 3331 public final void shlq(Register dst, int imm8) { 3332 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3333 prefixq(dst); 3334 if (imm8 == 1) { 3335 emitByte(0xD1); 3336 emitModRM(4, dst); 3337 } else { 3338 emitByte(0xC1); 3339 emitModRM(4, dst); 3340 emitByte(imm8); 3341 } 3342 } 3343 3344 public final void shlq(Register dst) { 3345 // Multiply dst by 2, CL times. 3346 prefixq(dst); 3347 emitByte(0xD3); 3348 emitModRM(4, dst); 3349 } 3350 3351 public final void shrq(Register dst, int imm8) { 3352 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3353 prefixq(dst); 3354 if (imm8 == 1) { 3355 emitByte(0xD1); 3356 emitModRM(5, dst); 3357 } else { 3358 emitByte(0xC1); 3359 emitModRM(5, dst); 3360 emitByte(imm8); 3361 } 3362 } 3363 3364 public final void shrq(Register dst) { 3365 prefixq(dst); 3366 emitByte(0xD3); 3367 // Unsigned divide dst by 2, CL times. 3368 emitModRM(5, dst); 3369 } 3370 3371 public final void sarq(Register dst, int imm8) { 3372 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3373 prefixq(dst); 3374 if (imm8 == 1) { 3375 emitByte(0xD1); 3376 emitModRM(7, dst); 3377 } else { 3378 emitByte(0xC1); 3379 emitModRM(7, dst); 3380 emitByte(imm8); 3381 } 3382 } 3383 3384 public final void sbbq(Register dst, Register src) { 3385 SBB.rmOp.emit(this, QWORD, dst, src); 3386 } 3387 3388 public final void subq(Register dst, int imm32) { 3389 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3390 } 3391 3392 public final void subq(AMD64Address dst, int imm32) { 3393 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3394 } 3395 3396 public final void subqWide(Register dst, int imm32) { 3397 // don't use the sign-extending version, forcing a 32-bit immediate 3398 SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32); 3399 } 3400 3401 public final void subq(Register dst, Register src) { 3402 SUB.rmOp.emit(this, QWORD, dst, src); 3403 } 3404 3405 public final void testq(Register dst, Register src) { 3406 prefixq(dst, src); 3407 emitByte(0x85); 3408 emitModRM(dst, src); 3409 } 3410 3411 public final void btrq(Register src, int imm8) { 3412 prefixq(src); 3413 emitByte(0x0F); 3414 emitByte(0xBA); 3415 emitModRM(6, src); 3416 emitByte(imm8); 3417 } 3418 3419 public final void xaddb(AMD64Address dst, Register src) { 3420 prefixb(dst, src); 3421 emitByte(0x0F); 3422 emitByte(0xC0); 3423 emitOperandHelper(src, dst, 0); 3424 } 3425 3426 public final void xaddw(AMD64Address dst, Register src) { 3427 emitByte(0x66); // Switch to 16-bit mode. 3428 prefix(dst, src); 3429 emitByte(0x0F); 3430 emitByte(0xC1); 3431 emitOperandHelper(src, dst, 0); 3432 } 3433 3434 public final void xaddl(AMD64Address dst, Register src) { 3435 prefix(dst, src); 3436 emitByte(0x0F); 3437 emitByte(0xC1); 3438 emitOperandHelper(src, dst, 0); 3439 } 3440 3441 public final void xaddq(AMD64Address dst, Register src) { 3442 prefixq(dst, src); 3443 emitByte(0x0F); 3444 emitByte(0xC1); 3445 emitOperandHelper(src, dst, 0); 3446 } 3447 3448 public final void xchgb(Register dst, AMD64Address src) { 3449 prefixb(src, dst); 3450 emitByte(0x86); 3451 emitOperandHelper(dst, src, 0); 3452 } 3453 3454 public final void xchgw(Register dst, AMD64Address src) { 3455 emitByte(0x66); 3456 prefix(src, dst); 3457 emitByte(0x87); 3458 emitOperandHelper(dst, src, 0); 3459 } 3460 3461 public final void xchgl(Register dst, AMD64Address src) { 3462 prefix(src, dst); 3463 emitByte(0x87); 3464 emitOperandHelper(dst, src, 0); 3465 } 3466 3467 public final void xchgq(Register dst, AMD64Address src) { 3468 prefixq(src, dst); 3469 emitByte(0x87); 3470 emitOperandHelper(dst, src, 0); 3471 } 3472 3473 public final void membar(int barriers) { 3474 if (target.isMP) { 3475 // We only have to handle StoreLoad 3476 if ((barriers & STORE_LOAD) != 0) { 3477 // All usable chips support "locked" instructions which suffice 3478 // as barriers, and are much faster than the alternative of 3479 // using cpuid instruction. We use here a locked add [rsp],0. 3480 // This is conveniently otherwise a no-op except for blowing 3481 // flags. 3482 // Any change to this code may need to revisit other places in 3483 // the code where this idiom is used, in particular the 3484 // orderAccess code. 3485 lock(); 3486 addl(new AMD64Address(AMD64.rsp, 0), 0); // Assert the lock# signal here 3487 } 3488 } 3489 } 3490 3491 @Override 3492 protected final void patchJumpTarget(int branch, int branchTarget) { 3493 int op = getByte(branch); 3494 assert op == 0xE8 // call 3495 || op == 0x00 // jump table entry 3496 || op == 0xE9 // jmp 3497 || op == 0xEB // short jmp 3498 || (op & 0xF0) == 0x70 // short jcc 3499 || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc 3500 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op; 3501 3502 if (op == 0x00) { 3503 int offsetToJumpTableBase = getShort(branch + 1); 3504 int jumpTableBase = branch - offsetToJumpTableBase; 3505 int imm32 = branchTarget - jumpTableBase; 3506 emitInt(imm32, branch); 3507 } else if (op == 0xEB || (op & 0xF0) == 0x70) { 3508 3509 // short offset operators (jmp and jcc) 3510 final int imm8 = branchTarget - (branch + 2); 3511 /* 3512 * Since a wrongly patched short branch can potentially lead to working but really bad 3513 * behaving code we should always fail with an exception instead of having an assert. 3514 */ 3515 GraalError.guarantee(isByte(imm8), "Displacement too large to be encoded as a byte: %d", imm8); 3516 emitByte(imm8, branch + 1); 3517 3518 } else { 3519 3520 int off = 1; 3521 if (op == 0x0F) { 3522 off = 2; 3523 } 3524 3525 int imm32 = branchTarget - (branch + 4 + off); 3526 emitInt(imm32, branch + off); 3527 } 3528 } 3529 3530 public void nullCheck(AMD64Address address) { 3531 testl(AMD64.rax, address); 3532 } 3533 3534 @Override 3535 public void align(int modulus) { 3536 if (position() % modulus != 0) { 3537 nop(modulus - (position() % modulus)); 3538 } 3539 } 3540 3541 /** 3542 * Emits a direct call instruction. Note that the actual call target is not specified, because 3543 * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is 3544 * responsible to add the call address to the appropriate patching tables. 3545 */ 3546 public final void call() { 3547 annotatePatchingImmediate(1, 4); 3548 emitByte(0xE8); 3549 emitInt(0); 3550 } 3551 3552 public final void call(Register src) { 3553 prefix(src); 3554 emitByte(0xFF); 3555 emitModRM(2, src); 3556 } 3557 3558 public final void int3() { 3559 emitByte(0xCC); 3560 } 3561 3562 public final void pause() { 3563 emitByte(0xF3); 3564 emitByte(0x90); 3565 } 3566 3567 private void emitx87(int b1, int b2, int i) { 3568 assert 0 <= i && i < 8 : "illegal stack offset"; 3569 emitByte(b1); 3570 emitByte(b2 + i); 3571 } 3572 3573 public final void fldd(AMD64Address src) { 3574 emitByte(0xDD); 3575 emitOperandHelper(0, src, 0); 3576 } 3577 3578 public final void flds(AMD64Address src) { 3579 emitByte(0xD9); 3580 emitOperandHelper(0, src, 0); 3581 } 3582 3583 public final void fldln2() { 3584 emitByte(0xD9); 3585 emitByte(0xED); 3586 } 3587 3588 public final void fldlg2() { 3589 emitByte(0xD9); 3590 emitByte(0xEC); 3591 } 3592 3593 public final void fyl2x() { 3594 emitByte(0xD9); 3595 emitByte(0xF1); 3596 } 3597 3598 public final void fstps(AMD64Address src) { 3599 emitByte(0xD9); 3600 emitOperandHelper(3, src, 0); 3601 } 3602 3603 public final void fstpd(AMD64Address src) { 3604 emitByte(0xDD); 3605 emitOperandHelper(3, src, 0); 3606 } 3607 3608 private void emitFPUArith(int b1, int b2, int i) { 3609 assert 0 <= i && i < 8 : "illegal FPU register: " + i; 3610 emitByte(b1); 3611 emitByte(b2 + i); 3612 } 3613 3614 public void ffree(int i) { 3615 emitFPUArith(0xDD, 0xC0, i); 3616 } 3617 3618 public void fincstp() { 3619 emitByte(0xD9); 3620 emitByte(0xF7); 3621 } 3622 3623 public void fxch(int i) { 3624 emitFPUArith(0xD9, 0xC8, i); 3625 } 3626 3627 public void fnstswAX() { 3628 emitByte(0xDF); 3629 emitByte(0xE0); 3630 } 3631 3632 public void fwait() { 3633 emitByte(0x9B); 3634 } 3635 3636 public void fprem() { 3637 emitByte(0xD9); 3638 emitByte(0xF8); 3639 } 3640 3641 public final void fsin() { 3642 emitByte(0xD9); 3643 emitByte(0xFE); 3644 } 3645 3646 public final void fcos() { 3647 emitByte(0xD9); 3648 emitByte(0xFF); 3649 } 3650 3651 public final void fptan() { 3652 emitByte(0xD9); 3653 emitByte(0xF2); 3654 } 3655 3656 public final void fstp(int i) { 3657 emitx87(0xDD, 0xD8, i); 3658 } 3659 3660 @Override 3661 public AMD64Address makeAddress(Register base, int displacement) { 3662 return new AMD64Address(base, displacement); 3663 } 3664 3665 @Override 3666 public AMD64Address getPlaceholder(int instructionStartPosition) { 3667 return new AMD64Address(AMD64.rip, Register.None, Scale.Times1, 0, instructionStartPosition); 3668 } 3669 3670 private void prefetchPrefix(AMD64Address src) { 3671 prefix(src); 3672 emitByte(0x0F); 3673 } 3674 3675 public void prefetchnta(AMD64Address src) { 3676 prefetchPrefix(src); 3677 emitByte(0x18); 3678 emitOperandHelper(0, src, 0); 3679 } 3680 3681 void prefetchr(AMD64Address src) { 3682 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3683 prefetchPrefix(src); 3684 emitByte(0x0D); 3685 emitOperandHelper(0, src, 0); 3686 } 3687 3688 public void prefetcht0(AMD64Address src) { 3689 assert supports(CPUFeature.SSE); 3690 prefetchPrefix(src); 3691 emitByte(0x18); 3692 emitOperandHelper(1, src, 0); 3693 } 3694 3695 public void prefetcht1(AMD64Address src) { 3696 assert supports(CPUFeature.SSE); 3697 prefetchPrefix(src); 3698 emitByte(0x18); 3699 emitOperandHelper(2, src, 0); 3700 } 3701 3702 public void prefetcht2(AMD64Address src) { 3703 assert supports(CPUFeature.SSE); 3704 prefix(src); 3705 emitByte(0x0f); 3706 emitByte(0x18); 3707 emitOperandHelper(3, src, 0); 3708 } 3709 3710 public void prefetchw(AMD64Address src) { 3711 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3712 prefix(src); 3713 emitByte(0x0f); 3714 emitByte(0x0D); 3715 emitOperandHelper(1, src, 0); 3716 } 3717 3718 public void rdtsc() { 3719 emitByte(0x0F); 3720 emitByte(0x31); 3721 } 3722 3723 /** 3724 * Emits an instruction which is considered to be illegal. This is used if we deliberately want 3725 * to crash the program (debugging etc.). 3726 */ 3727 public void illegal() { 3728 emitByte(0x0f); 3729 emitByte(0x0b); 3730 } 3731 3732 public void lfence() { 3733 emitByte(0x0f); 3734 emitByte(0xae); 3735 emitByte(0xe8); 3736 } 3737 3738 public final void vptest(Register dst, Register src) { 3739 VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src); 3740 } 3741 3742 public final void vpxor(Register dst, Register nds, Register src) { 3743 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src); 3744 } 3745 3746 public final void vpxor(Register dst, Register nds, AMD64Address src) { 3747 VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src); 3748 } 3749 3750 public final void vmovdqu(Register dst, AMD64Address src) { 3751 VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src); 3752 } 3753 3754 public final void vmovdqu(AMD64Address dst, Register src) { 3755 assert inRC(XMM, src); 3756 VexMoveOp.VMOVDQU.emit(this, AVXSize.YMM, dst, src); 3757 } 3758 3759 public final void vpmovzxbw(Register dst, AMD64Address src) { 3760 assert supports(CPUFeature.AVX2); 3761 VexRMOp.VPMOVZXBW.emit(this, AVXSize.YMM, dst, src); 3762 } 3763 3764 public final void vzeroupper() { 3765 emitVEX(L128, P_, M_0F, W0, 0, 0, true); 3766 emitByte(0x77); 3767 } 3768 3769 // Insn: KORTESTD k1, k2 3770 3771 // This instruction produces ZF or CF flags 3772 public final void kortestd(Register src1, Register src2) { 3773 assert supports(CPUFeature.AVX512BW); 3774 assert inRC(MASK, src1) && inRC(MASK, src2); 3775 // Code: VEX.L0.66.0F.W1 98 /r 3776 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true); 3777 emitByte(0x98); 3778 emitModRM(src1, src2); 3779 } 3780 3781 // Insn: KORTESTQ k1, k2 3782 3783 // This instruction produces ZF or CF flags 3784 public final void kortestq(Register src1, Register src2) { 3785 assert supports(CPUFeature.AVX512BW); 3786 assert inRC(MASK, src1) && inRC(MASK, src2); 3787 // Code: VEX.L0.0F.W1 98 /r 3788 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_, M_0F, W1, true); 3789 emitByte(0x98); 3790 emitModRM(src1, src2); 3791 } 3792 3793 public final void kmovd(Register dst, Register src) { 3794 assert supports(CPUFeature.AVX512BW); 3795 assert inRC(MASK, dst) || inRC(CPU, dst); 3796 assert inRC(MASK, src) || inRC(CPU, src); 3797 assert !(inRC(CPU, dst) && inRC(CPU, src)); 3798 3799 if (inRC(MASK, dst)) { 3800 if (inRC(MASK, src)) { 3801 // kmovd(KRegister dst, KRegister src): 3802 // Insn: KMOVD k1, k2/m32 3803 // Code: VEX.L0.66.0F.W1 90 /r 3804 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_66, M_0F, W1, true); 3805 emitByte(0x90); 3806 emitModRM(dst, src); 3807 } else { 3808 // kmovd(KRegister dst, Register src) 3809 // Insn: KMOVD k1, r32 3810 // Code: VEX.L0.F2.0F.W0 92 /r 3811 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true); 3812 emitByte(0x92); 3813 emitModRM(dst, src); 3814 } 3815 } else { 3816 if (inRC(MASK, src)) { 3817 // kmovd(Register dst, KRegister src) 3818 // Insn: KMOVD r32, k1 3819 // Code: VEX.L0.F2.0F.W0 93 /r 3820 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W0, true); 3821 emitByte(0x93); 3822 emitModRM(dst, src); 3823 } else { 3824 throw GraalError.shouldNotReachHere(); 3825 } 3826 } 3827 } 3828 3829 public final void kmovq(Register dst, Register src) { 3830 assert supports(CPUFeature.AVX512BW); 3831 assert inRC(MASK, dst) || inRC(CPU, dst); 3832 assert inRC(MASK, src) || inRC(CPU, src); 3833 assert !(inRC(CPU, dst) && inRC(CPU, src)); 3834 3835 if (inRC(MASK, dst)) { 3836 if (inRC(MASK, src)) { 3837 // kmovq(KRegister dst, KRegister src): 3838 // Insn: KMOVQ k1, k2/m64 3839 // Code: VEX.L0.0F.W1 90 /r 3840 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_, M_0F, W1, true); 3841 emitByte(0x90); 3842 emitModRM(dst, src); 3843 } else { 3844 // kmovq(KRegister dst, Register src) 3845 // Insn: KMOVQ k1, r64 3846 // Code: VEX.L0.F2.0F.W1 92 /r 3847 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true); 3848 emitByte(0x92); 3849 emitModRM(dst, src); 3850 } 3851 } else { 3852 if (inRC(MASK, src)) { 3853 // kmovq(Register dst, KRegister src) 3854 // Insn: KMOVQ r64, k1 3855 // Code: VEX.L0.F2.0F.W1 93 /r 3856 vexPrefix(dst, Register.None, src, AVXSize.XMM, P_F2, M_0F, W1, true); 3857 emitByte(0x93); 3858 emitModRM(dst, src); 3859 } else { 3860 throw GraalError.shouldNotReachHere(); 3861 } 3862 } 3863 } 3864 3865 // Insn: KTESTD k1, k2 3866 3867 public final void ktestd(Register src1, Register src2) { 3868 assert supports(CPUFeature.AVX512BW); 3869 assert inRC(MASK, src1) && inRC(MASK, src2); 3870 // Code: VEX.L0.66.0F.W1 99 /r 3871 vexPrefix(src1, Register.None, src2, AVXSize.XMM, P_66, M_0F, W1, true); 3872 emitByte(0x99); 3873 emitModRM(src1, src2); 3874 } 3875 3876 public final void evmovdqu64(Register dst, AMD64Address src) { 3877 assert supports(CPUFeature.AVX512F); 3878 assert inRC(XMM, dst); 3879 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F3, M_0F, W1, Z0, B0); 3880 emitByte(0x6F); 3881 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3882 } 3883 3884 // Insn: VPMOVZXBW zmm1, m256 3885 3886 public final void evpmovzxbw(Register dst, AMD64Address src) { 3887 assert supports(CPUFeature.AVX512BW); 3888 assert inRC(XMM, dst); 3889 // Code: EVEX.512.66.0F38.WIG 30 /r 3890 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0); 3891 emitByte(0x30); 3892 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3893 } 3894 3895 public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src) { 3896 assert supports(CPUFeature.AVX512BW); 3897 assert inRC(MASK, kdst) && inRC(XMM, nds); 3898 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F, WIG, Z0, B0); 3899 emitByte(0x74); 3900 emitEVEXOperandHelper(kdst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3901 } 3902 3903 // Insn: VMOVDQU16 zmm1 {k1}{z}, zmm2/m512 3904 // ----- 3905 // Insn: VMOVDQU16 zmm1, m512 3906 3907 public final void evmovdqu16(Register dst, AMD64Address src) { 3908 assert supports(CPUFeature.AVX512BW); 3909 assert inRC(XMM, dst); 3910 // Code: EVEX.512.F2.0F.W1 6F /r 3911 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 3912 emitByte(0x6F); 3913 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3914 } 3915 3916 // Insn: VMOVDQU16 zmm1, k1:z, m512 3917 3918 public final void evmovdqu16(Register dst, Register mask, AMD64Address src) { 3919 assert supports(CPUFeature.AVX512BW); 3920 assert inRC(XMM, dst) && inRC(MASK, mask); 3921 // Code: EVEX.512.F2.0F.W1 6F /r 3922 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_F2, M_0F, W1, Z1, B0); 3923 emitByte(0x6F); 3924 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3925 } 3926 3927 // Insn: VMOVDQU16 zmm2/m512 {k1}{z}, zmm1 3928 // ----- 3929 // Insn: VMOVDQU16 m512, zmm1 3930 3931 public final void evmovdqu16(AMD64Address dst, Register src) { 3932 assert supports(CPUFeature.AVX512BW); 3933 assert inRC(XMM, src); 3934 // Code: EVEX.512.F2.0F.W1 7F /r 3935 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 3936 emitByte(0x7F); 3937 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3938 } 3939 3940 // Insn: VMOVDQU16 m512, k1, zmm1 3941 3942 public final void evmovdqu16(AMD64Address dst, Register mask, Register src) { 3943 assert supports(CPUFeature.AVX512BW); 3944 assert inRC(MASK, mask) && inRC(XMM, src); 3945 // Code: EVEX.512.F2.0F.W1 7F /r 3946 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F2, M_0F, W1, Z0, B0); 3947 emitByte(0x7F); 3948 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.FVM.getDisp8ScalingFactor(AVXSize.ZMM)); 3949 } 3950 3951 // Insn: VPBROADCASTW zmm1 {k1}{z}, reg 3952 // ----- 3953 // Insn: VPBROADCASTW zmm1, reg 3954 3955 public final void evpbroadcastw(Register dst, Register src) { 3956 assert supports(CPUFeature.AVX512BW); 3957 assert inRC(XMM, dst) && inRC(CPU, src); 3958 // Code: EVEX.512.66.0F38.W0 7B /r 3959 evexPrefix(dst, Register.None, Register.None, src, AVXSize.ZMM, P_66, M_0F38, W0, Z0, B0); 3960 emitByte(0x7B); 3961 emitModRM(dst, src); 3962 } 3963 3964 // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8 3965 // ----- 3966 // Insn: VPCMPUW k1, zmm2, zmm3, imm8 3967 3968 public final void evpcmpuw(Register kdst, Register nds, Register src, int vcc) { 3969 assert supports(CPUFeature.AVX512BW); 3970 assert inRC(MASK, kdst) && inRC(XMM, nds) && inRC(XMM, src); 3971 // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib 3972 evexPrefix(kdst, Register.None, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0); 3973 emitByte(0x3E); 3974 emitModRM(kdst, src); 3975 emitByte(vcc); 3976 } 3977 3978 // Insn: VPCMPUW k1 {k2}, zmm2, zmm3/m512, imm8 3979 // ----- 3980 // Insn: VPCMPUW k1, k2, zmm2, zmm3, imm8 3981 3982 public final void evpcmpuw(Register kdst, Register mask, Register nds, Register src, int vcc) { 3983 assert supports(CPUFeature.AVX512BW); 3984 assert inRC(MASK, kdst) && inRC(MASK, mask); 3985 assert inRC(XMM, nds) && inRC(XMM, src); 3986 // Code: EVEX.NDS.512.66.0F3A.W1 3E /r ib 3987 evexPrefix(kdst, mask, nds, src, AVXSize.ZMM, P_66, M_0F3A, W1, Z0, B0); 3988 emitByte(0x3E); 3989 emitModRM(kdst, src); 3990 emitByte(vcc); 3991 } 3992 3993 // Insn: VPMOVWB ymm1/m256 {k1}{z}, zmm2 3994 // ----- 3995 // Insn: VPMOVWB m256, zmm2 3996 3997 public final void evpmovwb(AMD64Address dst, Register src) { 3998 assert supports(CPUFeature.AVX512BW); 3999 assert inRC(XMM, src); 4000 // Code: EVEX.512.F3.0F38.W0 30 /r 4001 evexPrefix(src, Register.None, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0); 4002 emitByte(0x30); 4003 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 4004 } 4005 4006 // Insn: VPMOVWB m256, k1, zmm2 4007 4008 public final void evpmovwb(AMD64Address dst, Register mask, Register src) { 4009 assert supports(CPUFeature.AVX512BW); 4010 assert inRC(MASK, mask) && inRC(XMM, src); 4011 // Code: EVEX.512.F3.0F38.W0 30 /r 4012 evexPrefix(src, mask, Register.None, dst, AVXSize.ZMM, P_F3, M_0F38, W0, Z0, B0); 4013 emitByte(0x30); 4014 emitEVEXOperandHelper(src, dst, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 4015 } 4016 4017 // Insn: VPMOVZXBW zmm1 {k1}{z}, ymm2/m256 4018 // ----- 4019 // Insn: VPMOVZXBW zmm1, k1, m256 4020 4021 public final void evpmovzxbw(Register dst, Register mask, AMD64Address src) { 4022 assert supports(CPUFeature.AVX512BW); 4023 assert inRC(MASK, mask) && inRC(XMM, dst); 4024 // Code: EVEX.512.66.0F38.WIG 30 /r 4025 evexPrefix(dst, mask, Register.None, src, AVXSize.ZMM, P_66, M_0F38, WIG, Z0, B0); 4026 emitByte(0x30); 4027 emitEVEXOperandHelper(dst, src, 0, EVEXTuple.HVM.getDisp8ScalingFactor(AVXSize.ZMM)); 4028 } 4029 4030 }